R/gen_zip.R

Defines functions gen_zip

#' Generates count data from a zero-inflated Poisson model.
#'
#' @param n  size of data set to be generated
#' @param beta  coefficients for count part of the model
#' @param xi   coefficients for the zero-inflation model
#' @param type  type of design matrix, options include "uniform", "stdnormal"
#' @param intercept  should output design matrices contain an intercept
#' @param designmat_basis  a list of two data.frame from which
#' simulated design matrices can be generated.  If non-null,
#' the first data.frame will be the basis of the count design matrix; the
#' second will be for the zero-inflation model design matrix.
#' Design matrices are generated by taking a random sample of rows with
#' replacement.
#' @param add_colnames  a list of length 2 to be used if design matrices are
#'                      supplied.  the first will contain names for X, and the
#'                      second will contain names for Z.
#' @param xz_equal should X and Z be equal to one another
#' @return A list
#' @export
gen_zip <- function(n, beta, xi, type="uniform", intercept=F,
                    designmat_basis=NULL, add_colnames=NULL,
                    xz_equal=F) {
  p <- length(beta) - 1
  q <- length(xi) - 1
  designmat <- gen_designmat(n, p, q, type, intercept=T, designmat_basis,
                             add_colnames)
  x <- designmat$x
  z <- designmat$z
  if(xz_equal){
    if(length(beta) == length(xi)){
      z <- x
    } else {
      stop("beta and xi must be the same length")
    }
  }
  count_eta <- x%*%beta
  zero_eta <- z%*%xi
  trueMeans <- exp(count_eta)
  probability <- exp(zero_eta)/(1+exp(zero_eta))
  count_means <- trueMeans*(1 - probability)
  u <- stats::runif(n, 0, 1)
  y <- stats::rpois(n, trueMeans)
  y[u < probability] <- 0
  structural_zeroes <- u < probability
  if(intercept == F){
    x <- x[, -1, drop = F]
    z <- z[, -1, drop = F]
  }
  theoretical_mse <- mean((y - count_means)^2)
  theoretical_rmse <- sqrt(theoretical_mse)
  #calculate true conditional means of T|U,Z,Y
  cond_probi <- rep(NA, n)
  cond_probi[y != 0] <- 0
  y0_ind <- y == 0
  y0_probi <- exp(-zero_eta[y0_ind] - trueMeans[y0_ind])
  y0_probi <- (1 + y0_probi)^(-1)
  cond_probi[y == 0] <- y0_probi
  out <- list(x=x, z=z, y=y, theoretical_mse=theoretical_mse,
              count_means=count_means,
              conditional_struczero_prob=cond_probi,
              unconditional_struczero_prob=probability,
              structural_zeroes = structural_zeroes)
}
daniel-conn17/scRNAzirf documentation built on Dec. 19, 2021, 8:05 p.m.