R/rvg4yx.R

Defines functions rvg4yx

Documented in rvg4yx

#' @title Random variates generator for simulation studies
#' 
#' @description \code{rvg4yx} generates random variates of a response variable under 
#' the given conditions of mean and variance-covariance structure of matrices 
#' of predictors and their coefficients.
#'
#' @details
#' The mean parameter of a response variable is a function of linear predictor 
#' by a link function in the context of generalized linear models.
#' 
#' Log and logit links are supported.
#'
#' Default to \code{Xstr} is an independent multivariate normal distribution 
#' with mean \code{Xmean}, correlation \code{R}, and standard deviation 
#' \code{Xsd}. 
#' 
#' @param N a number of random variates
#' @param pars a vector of regression parameters
#' @param ztrunc FALSE by default
#' @param xreg FALSE by default
#' @param seed NULL a random seed
#' @param shape a shape parameter of gamma distribution  
#' @param ... other arguments
#' @param kind types of random variates 
#' @param center.X Are predictors centered at their means?
#' 
#'
#' @return a list with components
#' \item{n}{the number of observations which are actually realized}
#' \item{y}{the \code{n}-by-\code{1} vector of response generated}
#' \item{X}{the \code{n}-by-\code{p-1} matrix (an intercept is excluded)}
#' \item{yX}{the \code{n}-by-\code{p} data frame which includes \code{y} and \code{X}}
#'
#' @note
#' Default to a matrix decomposition \code{method} is \code{eigen}. 
#' Other options are \code{chol} and \code{svd}.
#' 
#' Currently, the link \code{logit} is not supported.
#'
#' @examples
#' \dontrun{
#' tmp <- rvg4yx(N=5e2, pars=c(0.5, -0.5), ztrunc=TRUE, xreg=TRUE, kind="mvnorm", mean=xm, sigma=xsigma, center.X=TRUE)
#' dat <- tmp$yX
#' table(dat$y)
#' }
#' # y <- dat$y
#' # print(c(mean(y), dat$cm1))
#' # print(c(var(y), dat$cm2, dat$cm22))
#' # print(c(skewness(y), dat$g1))
#' 
#' # dat1 <- rvg4yx(N=1e1, xreg=FALSE, pars=2, ztrunc=TRUE, shape=4)
#' # y1 <- dat1$y
#' # print(c(mean(y1), dat1$cm1))
#' # print(c(var(y1), dat1$cm2, dat1$cm22, dat1$cm23))
#' # print(c(skewness(y1), dat1$g1))
#'
#' @seealso \code{\link{rmvnorm}}, \code{\link{rmvbin}}, \code{\link{rsn}}, \code{\link{rmsn}}
#' @author Chel Hee Lee <\email{gnustats@@gmail.com}>
#'
#' @export
rvg4yx <- function(N, pars, xreg=FALSE, ztrunc=FALSE, kind=c("mvnorm", "mvbin", "norbin", "usrdef", "sknorm", "multinom", "poisson", "nbinom", "norbin", "gambin", "gambeta"), seed=NULL, shape=Inf, center.X=FALSE, ...){

  stopifnot(!missing(N), !missing(pars), !missing(ztrunc), !missing(xreg), !missing(kind))
  stopifnot(is.numeric(N), length(N)==1, is.vector(pars), !is.null(pars), is.numeric(pars), is.logical(ztrunc), is.logical(xreg), is.character(kind))
  
  if(!missing(seed)) set.seed(seed)
  if(missing(shape)) shape <- Inf # NB -> Pois

  kind <- match.arg(kind)
  if(any(kind %in% c("norbin", "gambin", "gambeta"))) stop("Method is not implemented yet.\nPlease contact the author")
  
  if(xreg) stopifnot(any(kind %in% c("mvnorm", "mvbin", "usrdef", "sknorm")))
  else stopifnot(any(kind %in% c("poisson", "nbinom")), !(is.infinite(shape) & kind=="nbinom"))
  
  fn.mvnorm <- function(cf0=NULL, ...){
  
    ## no intercept 'cf0'
    X <- ## mvtnorm::rmvnorm(n, mean:vector, sigma:matrix)
      if(length(pars) >= 2) rmvnorm(n=N, ...)
      else rnorm(n=N, ...)
    X <- as.matrix(X)
    colnames(X) <- paste("x", seq_len(ncol(X)), sep="")
    if(center.X) X <- scale(x=X, center=center.X, scale=FALSE)
    
    ## add intercept 'cf0'
    if(!is.null(cf0)){ 
      stopifnot(is.numeric(cf0), length(cf0)==1, is.vector(cf0))
      X <- cbind(1,X)
      colnames(X)[1] <- c("x0")
    }
    cfs <- c(cf0, pars)
    rvals <- list(X=X, cfs=cfs)
    return(rvals)
  }
  
  fn.mvbin <- function(cf0=NULL, ...){
    ## package:bindata
    ## args: margprob (vector), bincorr (matrix)
    X <- 
      if(length(pars)!=1) rmvbin(n=N, ...)
      else rbinom(n=N, ...)
    X <- as.matrix(X)
    colnames(X) <- paste("x", seq_len(ncol(X)), sep="")
    if(center.X) X <- scale(x=X, center=center.X, scale=FALSE)
    
    if(!is.null(cf0)){
      X <- cbind(1, X)
      colnames(X) <- paste("x", seq_len(ncol(X))-1, sep="")
    }
    pars <- c(cf0, pars)
    rvals <- list(X=X, pars=pars)
    return(rvals)
  }
  
  fn.norbin <- function(...){
    # NORTA
    return(NULL)
  }
  
  fn.gambin <- function(...){
    return(NULL)
  }
  
  fn.gambeta <- function(...){
    # copula (two continuous)
    return(NULL)
  }
  
  fn.sknorm <- function(xi=NULL, omega=NULL, alpha=NULL, ...){
    ## package:sn
    ## args: location (xi), scale (omega), shape (alpha)
    
    stopifnot(!missing(omega), omega>0)
    
    if(length(pars)==1){
      X <- rsn(n=N, location=xi, scale=omega, shape=alpha)
      delta <- alpha/(alpha+1)
      cm1 <- xi + omega*delta*sqrt(2/pi) # mean
      cm2 <- omega^2*(1-2*delta^2/pi) # variance
      cm3 <- (4-pi)/2 * (delta*sqrt(2/pi))^3 / (1-2*delta^2/pi)^(3/2) # skewness
    } else {
      stopifnot(is.vector(alpha), is.matrix(omega), is.vector(xi), length(alpha)==length(xi))
      X <- rmsn(n=N, xi=xi, Omega=omega, alpha=alpha)
      delta <- (omega%*%alpha)/sqrt(1+t(alpha)%*%omega%*%alpha) 
      cm1 <- sqrt(2/pi)*delta
      cm2 <- omega - cm1%*%t(cm1)
      cm3 <- message("Method is not implemented yet.\nPlease contact the author\n") 
    }
    
    if(center.X) X <- scale(X, center=center.X) 
    rvals <- list(X=X, pars=pars, p=length(pars), cm1=cm1, cm2=cm2, cm3=cm3)
    return(rvals)
  }
  
  fn.usrdef <- function(Xmat, ...){
    ## user-defined matrix X
    X <- Xmat
    # if(center.X) X <- sacle(X, center=center.X)
    rvals <- list(X=X, pars=pars, p=length(pars))
    return(rvals)
  }
  
  fn.poisson <- function(...){
    stopifnot(length(pars)==1, is.infinite(shape))
    mu <- pars
    y <- rpois(n=N, lambda=mu)
    
    if(!ztrunc){
      cm1 <- cm2 <- mu
      g1 <- mu^{-0.5}
    } else {
      y <- y[y>0]
      sf <- ppois(q=0, lambda=mu, lower.tail=FALSE, log.p=FALSE)
      p0 <- dpois(x=0, lambda=mu, log=FALSE)
      cm1 <- mu/sf
      cm2 <- mu*(sf-mu*p0)/(sf^2)
      g1 <- ((mu^2+3*mu+1)*cm1 - 3*(mu+1)*cm1^2 + 2*cm1^3) / (cm2^{3/2})
    }
    
    rvals <- list(y=y, mu=mu, cm1=cm1, cm2=cm2, g1=g1, n=length(y), ztrunc=ztrunc)
    return(rvals)
  }

  fn.nbinom <- function(...){
    stopifnot(length(pars)==1, !is.infinite(shape))
    mu <- pars
    y <- rnbinom(n=N, mu=mu, size=shape)
    
    if(!ztrunc){
      cm1 <- mu
      cm2 <- mu + (mu^2)/shape
      g1 <- NULL
    } else {
      sf <- pnbinom(q=0, mu=mu, size=shape, lower.tail=FALSE, log.p=FALSE)
      p0 <- pnbinom(q=0, mu=mu, size=shape, lower.tail=TRUE, log.p=FALSE)
      
      cm1 <- mu/sf
      cm2 <- cm1+cm1*(shape+1)*(mu/shape) - cm1^2
      g1 <- shape/sf*( (mu/shape)^3*(shape+1)*(shape+2) + 3*(mu/shape)^2*(shape+1) + mu/shape ) - 3/sf*shape*((mu/shape)+(mu/shape)^2*(shape+1)) * cm1 + 2*(cm1)^3
      g1 <- g1/(cm2^{3/2})    
    }
    
    rvals <- list(y=y, mu=mu, cm1=cm1, cm2=cm2, g1=g1, n=length(y), ztrunc=ztrunc, shape=shape)
    return(rvals)
  }

  fn.rvg <- # choose kind of random variates generator
    if(xreg) switch(kind, 
      "mvnorm"=fn.mvnorm,
      "mvbin"=fn.mvbin,
      "norbin"=fn.norbin,
      "sknorm"=fn.sknorm,
      "usrdef"=fn.usrdef)
    else switch(kind,
      "poisson"=fn.poisson,
      "nbinom"=fn.nbinom)

  if(!xreg) rvals <- fn.rvg(...)
  else {
    xtmp <- fn.rvg(...)

    X <- xtmp$X
    cfs <- xtmp$cfs
    p <- length(cfs)
    if(ncol(X)!=p) stop("'ncol(X)' is not equal to 'length(cfs)'")  
    mu <- as.vector(exp(X%*%cfs))
    
    y <- # poisson or nbinom random variates?
      if(is.infinite(shape)) rpois(n=nrow(X), lambda=mu)
      else rnbinom(n=nrow(X), mu=mu, size=shape)
    
    X <- as.data.frame(X)
    if(length(pars)+1 == p) X$x0 <- NULL
    # if(center.X) X <- scale(x=X, center=TRUE, scale=FALSE)
    yX <- data.frame(y,X)
    
    if(ztrunc){
      idx <- which(yX$y>0)
      yX <- yX[idx,]
      mu <- mu[idx]
    }
    rvals <- list(N=N, yX=yX, y=y, X=X, ztrunc=ztrunc, xreg=xreg, seed=seed, n=nrow(yX), mu=mu, pars=pars, xtmp=xtmp)
  }
  return(rvals)
}
NULL

Try the ipeglim package in your browser

Any scripts or data that you put into this service are public.

ipeglim documentation built on May 2, 2019, 4:31 p.m.