R/error_vars.R

Defines functions error_vars

Documented in error_vars

#' @title Generate Variables for Error Loop
#'
#' @description This function simulates the continuous, ordinal (r >= 2 categories), Poisson, or Negative Binomial variables
#'     used in \code{\link[SimMultiCorrData]{error_loop}}.  It is called in each iteration, regenerates all variables, and calculates the
#'     resulting correlation matrix.
#'     This function would not ordinarily be called directly by the user.
#'
#' @param marginal a list of length equal \code{k_cat}; the i-th element is a vector of the cumulative
#'     probabilities defining the marginal distribution of the i-th variable;
#'     if the variable can take r values, the vector will contain r - 1 probabilities (the r-th is assumed to be 1)
#' @param support a list of length equal \code{k_cat}; the i-th element is a vector of containing the r
#'     ordered support values; if not provided, the default is for the i-th element to be the vector 1, ..., r
#' @param method the method used to generate the continuous variables.  "Fleishman" uses a third-order polynomial transformation
#'     and "Polynomial" uses Headrick's fifth-order transformation.
#' @param means a vector of means for the continuous variables
#' @param vars a vector of variances
#' @param constants a matrix with \code{k_cont} rows, each a vector of constants c0, c1, c2, c3 (if \code{method} = "Fleishman") or
#'     c0, c1, c2, c3, c4, c5 (if \code{method} = "Polynomial"), like that returned by
#'     \code{\link[SimMultiCorrData]{find_constants}}
#' @param lam a vector of lambda (> 0) constants for the Poisson variables (see \code{\link[stats]{Poisson}})
#' @param size a vector of size parameters for the Negative Binomial variables (see \code{\link[stats]{NegBinomial}})
#' @param prob a vector of success probability parameters
#' @param mu a vector of mean parameters (*Note: either \code{prob} or \code{mu} should be supplied for all Negative Binomial variables,
#'     not a mixture)
#' @param Sigma the 2 x 2 intermediate correlation matrix generated by \code{\link[SimMultiCorrData]{error_loop}}
#' @param rho_calc the 2 x 2 final correlation matrix calculated in \code{\link[SimMultiCorrData]{error_loop}}
#' @param q the row index of the 1st variable
#' @param r the column index of the 2nd variable
#' @param k_cat the number of ordinal (r >= 2 categories) variables
#' @param k_cont the number of continuous variables
#' @param k_pois the number of Poisson variables
#' @param k_nb the number of Negative Binomial variables
#' @param Y_cat the ordinal variables generated from \code{\link[SimMultiCorrData]{error_loop}}
#' @param Y the continuous (mean 0, variance 1) variables
#' @param Yb the continuous variables with desired mean and variance
#' @param Y_pois the Poisson variables
#' @param Y_nb the Negative Binomial variables
#' @param n the sample size
#' @param seed the seed value for random number generation
#' @import stats
#' @import utils
#' @export
#' @keywords error, correlation
#' @seealso \code{\link[GenOrd]{ordcont}}, \code{\link[SimMultiCorrData]{rcorrvar}}, \code{\link[SimMultiCorrData]{rcorrvar2}},
#'     \code{\link[SimMultiCorrData]{error_loop}}
#'
#' @return A list with the following components:
#' @return \code{Sigma} the intermediate MVN correlation matrix
#' @return \code{rho_calc} the calculated final correlation matrix generated from Sigma
#' @return \code{Y_cat} the ordinal variables
#' @return \code{Y} the continuous (mean 0, variance 1) variables
#' @return \code{Yb} the continuous variables with desired mean and variance
#' @return \code{Y_pois} the Poisson variables
#' @return \code{Y_nb} the Negative Binomial variables
#' @references Please see references for \code{\link[SimMultiCorrData]{error_loop}}.
#'
error_vars <- function(marginal, support, method, means, vars, constants, lam,
                       size, prob, mu, Sigma, rho_calc, q, r, k_cat, k_cont,
                       k_pois, k_nb, Y_cat, Y, Yb, Y_pois, Y_nb, n, seed) {
  eig <- eigen(Sigma, symmetric = TRUE)
  sqrteigval <- diag(sqrt(pmax(eig$values, 0)))
  eigvec <- eig$vectors
  fry <- eigvec %*% sqrteigval
  set.seed(seed)
  X <- matrix(rnorm(ncol(Sigma) * n), n)
  X <- scale(X, TRUE, FALSE)
  X <- X %*% svd(X, nu = 0)$v
  X <- scale(X, FALSE, TRUE)
  X <- fry %*% t(X)
  X <- t(X)
  Y_cont <- Yb
  if (k_cat > 0) {
    X_cat <- X[, 1:k_cat, drop = FALSE]
    Y_cat <- matrix(1, nrow = n, ncol = ncol(X_cat))
    for (i in 1:ncol(X_cat)) {
      Y_cat[, i] <- as.integer(cut(X_cat[, i],
                                   breaks = c(min(X_cat[, i]) - 1,
                                              qnorm(marginal[[i]]),
                                              max(X_cat[, i])  +  1)))
      Y_cat[, i] <- support[[i]][Y_cat[, i]]
    }
  }
  if (k_cont > 0) {
    X_cont <- X[, (k_cat + 1):(k_cat + k_cont), drop = FALSE]
    Y_cont <- matrix(1, nrow = n, ncol = ncol(X_cont))
    Yb <- matrix(1, nrow = n, ncol = ncol(X_cont))
    for (i in 1:ncol(X_cont)) {
      if (method == "Fleishman") {
        Yb[, i] <- constants[i, 1] +
          constants[i, 2] * X_cont[, i] +
          constants[i, 3] * X_cont[, i]^2 +
          constants[i, 4] * X_cont[, i]^3
      }
      if (method == "Polynomial") {
        Yb[, i] <- constants[i, 1] +
          constants[i, 2] * X_cont[, i] +
          constants[i, 3] * X_cont[, i]^2 +
          constants[i, 4] * X_cont[, i]^3 +
          constants[i, 5] * X_cont[, i]^4 +
          constants[i, 6] * X_cont[, i]^5
      }
      Y_cont[, i] <- means[i] + sqrt(vars[i]) * Yb[, i]
    }
  }
  if (k_pois > 0) {
    X_pois <- X[, (k_cat + k_cont + 1):(k_cat + k_cont + k_pois),
                drop = FALSE]
    Y_pois <- matrix(1, nrow = n, ncol = ncol(X_pois))
    for (i in 1:ncol(X_pois)) {
      Y_pois[, i] <- qpois(pnorm(X_pois[, i]), lam[i])
    }
  }
  if (k_nb > 0) {
    X_nb <- X[, (k_cat + k_cont + k_pois + 1):ncol(X), drop = FALSE]
    Y_nb <- matrix(1, nrow = n, ncol = ncol(X_nb))
    if (length(prob) > 0) {
      for (i in 1:ncol(X_nb)) {
        Y_nb[, i] <- qnbinom(pnorm(X_nb[, i]), size[i], prob[i])
      }
    }
    if (length(mu) > 0) {
      for (i in 1:ncol(X_nb)) {
        Y_nb[, i] <- qnbinom(pnorm(X_nb[, i]), size[i], mu = mu[i])
      }
    }
  }
  rho_calc <- cor(cbind(Y_cat, Y_cont, Y_pois, Y_nb))
  return(list(Sigma = Sigma, rho_calc = rho_calc, Y_cat = Y_cat, Y = Yb,
              Yb = Y_cont, Y_pois = Y_pois, Y_nb = Y_nb))
}

Try the SimMultiCorrData package in your browser

Any scripts or data that you put into this service are public.

SimMultiCorrData documentation built on May 2, 2019, 9:50 a.m.