R/BASiCS_Sim.R

Defines functions BASiCS_Sim

Documented in BASiCS_Sim

#' @title Generates synthetic data according to the model implemented in BASiCS
#'
#' @description \code{BASiCS_Sim} creates a simulated dataset from the model
#' implemented in BASiCS.
#'
#' @param Mu Gene-specific mean expression parameters \eqn{\mu_i} for all
#' biological genes (vector of length \code{q.bio},
#' all elements must be positive numbers)
#' @param Mu_spikes \eqn{\mu_i} for all technical genes defined as true
#' input molecules (vector of length \code{q-q.bio},
#' all elements must be positive numbers). If \code{Mu_spikes = NULL}, the
#' generated data will not contain spike-ins. If \code{Phi = NULL}, 
#' \code{Mu_spikes} will be ignored. Default: \code{Mu_spikes = NULL}. 
#' @param Delta Gene-specific biological over-dispersion parameters
#' \eqn{\delta_i}, biological genes only
#' (vector of length \code{q.bio}, all elements must be positive numbers)
#' @param Phi Cell-specific mRNA content normalising parameters \eqn{\phi_j}
#' (vector of length \code{n}, all elements must be positive numbers and
#' the sum of its elements must be equal to \code{n}). \code{Phi} must be set
#' equal to \code{NULL} when generating data without spike-ins. If 
#' \code{Mu_spikes = NULL}, \code{Phi} will be ignored. 
#' Default: \code{Phi = NULL}
#' @param S Cell-specific technical normalising parameters \eqn{s_j}
#' (vector of length \code{n}, all elements must be positive numbers)
#' @param Theta Technical variability parameter \eqn{\theta} (must be positive).
#' \code{Theta} can be a scalar (single batch of samples), or a vector (multiple
#' batches of samples). If a value for \code{BatchInfo} is provided, the length
#' of \code{Theta} must match the number of unique values in \code{BatchInfo}.
#' @param BatchInfo Vector detailing which batch each cell should be simulated
#' from. If spike-ins are not in use, the number of unique values contained in 
#' \code{BatchInfo} must be larger than 1 (i.e. multiple batches are present).
#'
#' @return An object of class \code{\linkS4class{SingleCellExperiment}},
#' including synthetic data generated by the model implemented in BASiCS.
#'
#' @examples
#'
#' # Simulated parameter values for 10 genes
#' # (7 biogical and 3 spike-in) measured in 5 cells
#' Mu <- c(8.36, 10.65, 4.88, 6.29, 21.72, 12.93, 30.19)
#' Mu_spikes <-  c(1010.72, 7.90, 31.59)
#' Delta <- c(1.29, 0.88, 1.51, 1.49, 0.54, 0.40, 0.85)
#' Phi <- c(1.00, 1.06, 1.09, 1.05, 0.80)
#' S <- c(0.38, 0.40, 0.38, 0.39, 0.34)
#' Theta <- 0.39
#' 
#' # Data with spike-ins, single batch
#' Data <- BASiCS_Sim(Mu, Mu_spikes, Delta, Phi, S, Theta)
#' head(SingleCellExperiment::counts(Data))
#' dim(SingleCellExperiment::counts(Data))
#' altExp(Data)
#' rowData(altExp(Data))
#' 
#' # Data with spike-ins, multiple batches
#' BatchInfo <- c(1,1,1,2,2)
#' Theta2 <- rep(Theta, times = 2)
#' Data <- BASiCS_Sim(Mu, Mu_spikes, Delta, Phi, S, Theta2, BatchInfo)
#' 
#' # Data without spike-ins, multiple batches
#' Data <- BASiCS_Sim(Mu, Mu_spikes = NULL, Delta, 
#'                    Phi = NULL, S, Theta2, BatchInfo)
#'
#' @author Catalina A. Vallejos \email{cnvallej@@uc.cl}, Nils Eling
#'
#' @references
#'
#' Vallejos, Marioni and Richardson (2015). PLoS Computational Biology.
#'
#' @export
BASiCS_Sim <- function(
    Mu,
    Mu_spikes = NULL,
    Delta,
    Phi = NULL,
    S,
    Theta,
    BatchInfo = NULL) {
  
  # Checks validity of input parameters
  HiddenHeaderBASiCS_Sim(Mu, Mu_spikes, Delta, Phi, S, Theta, BatchInfo)
  
  # With and without batch structure
  if (length(Theta) == 1) {
    DataAux <- HiddenBASiCS_Sim(Mu, Mu_spikes, Delta, Phi, S, Theta)
  } else {
    # Setup of a design matrix
    if (!is.factor(BatchInfo)) {
      BatchInfoAux <- as.factor(BatchInfo)  
      message(
        "'BatchInfo' was treated as a factor with ",
        length(levels(BatchInfoAux)), " levels: \n",
        paste(levels(BatchInfoAux), colapse = " ")
      )
    }
    BatchDesign <- model.matrix(~BatchInfoAux - 1) 
    ThetaAux <- as.vector(BatchDesign %*% Theta)
    DataAux <- HiddenBASiCS_Sim(Mu, Mu_spikes, Delta, Phi, S, ThetaAux)
  }
  
  if (!is.null(Mu_spikes)) {
    Spikes <- SingleCellExperiment::SingleCellExperiment(
      assays = list(counts = DataAux$Counts[DataAux$Tech, ]),
      rowData = DataAux$SpikeInfo
    )
    
    out <- SingleCellExperiment::SingleCellExperiment(
      assays = list(counts = DataAux$Counts[!DataAux$Tech, ]),
      altExps = list("spike-ins" = Spikes)
    )
  } else {
    out <- SingleCellExperiment::SingleCellExperiment(
      assays = list(counts = DataAux$Counts)
    )
  }
  if (!is.null(BatchInfo)) {
    colData(out)$BatchInfo <- data.frame(BatchInfo)
  }
  out
}
catavallejos/BASiCS documentation built on March 27, 2024, 12:49 a.m.