R/reMixMisc.R

#' Preparing a Dataset for a Stimulation Respone Analysis with flowReMix
#'
#' Given a dataset and a list of stimulation groups, the function pairs each
#' stimulation group with control stimulations and defines a new cell_type variable
#' which is the interaction of the previous cell type with stimulation group.
#'
#' @param data the data set to be transformed.
#'
#' @param cell_type a factor vector identifying which cell type each row in the data
#' refers to.
#'
#' @param stim_var a vector identifying the which stimulation each row in the data refers to.
#'
#' @param controls a vector identifying which of the stimulations are controls.
#'
#' @param stim_groups a list detailing which stimulations are to be grouped together.
#'
#' @export
stimulationModel <- function(data, cell_type, stim_var, controls, stim_groups) {
  mc <- match.call()

  err0 <- length(stim_groups) == 1
  err1 <- !is.list(stim_groups)

  if(err0 | err1) {
    stop("stim_groups must be a list, each component of which is a vector of related stimulations.
         If there is only one group of stimulation, then there is no need to use this function.")
  }

  for(j in 1:length(stim_groups)) {
    if(any(controls %in% stim_groups[[j]])) {
      stop("The control stimulations must not be a part of any of the stimulation groups")
    }
  }

  stimname <- as.character(mc$stim_var)
  stimvec <- data[[stimname]]
  stimvec <- as.character(stimvec)
  if(!any(controls %in% unique(stimvec))) {
    stop("Can't find control stimulation in the dataset!")
  }

  notInGroups <- subset(unique(stimvec), !(unique(stimvec) %in% unlist(stim_groups)))
  notInGroups <- subset(notInGroups, !(notInGroups %in% controls))
  if(length(notInGroups) > 0) {
    warning(paste("The following stimulations are not in stim_groups:", notInGroups, collapse = ", "))
  }

  stimvec[stimvec %in% controls] <- "ctrl"
  stimlevels <- unique(stimvec) %>% subset(., . != "ctrl") %>% c("ctrl", .)
  stimvec <- factor(stimvec, levels = stimlevels)
  data[[stimname]] <- stimvec
  ctrldat <- subset(data, stimvec == "ctrl")
  if(is.null(names(stim_groups)) | any(is.na(names(stim_groups)))) {
    sNames <- sapply(stim_groups, function(x) x[1])
  } else {
    sNames <- names(stim_groups)
  }
  subdat <- vector(length(stim_groups), mode = "list")
  for(i in 1:length(subdat)) {
    subdat[[i]] <- subset(data, stimvec %in% stim_groups[[i]]) %>% rbind(ctrldat)
    subdat[[i]]$stimGroup <- sNames[i]
  }

  subdat <- do.call("rbind", subdat)
  subdat$stimCellType <- interaction(subdat$stimGroup, subdat[[as.character(mc$cell_type)]], sep = "/")

  message("To fit the stimulation response model, run flowReMix with `cell_type = stimCellType'.")
  return(subdat)
}



#' Auxiliary for Controlling flowReMix Fitting.
#'
#' @description Auxiliary function for \code{\link{flowReMix}} fitting. Can be
#'   used to generate an appropriate object for modifying the fitting process of
#'   the stochastic EM algorithm used by \code{flowReMix}.
#'
#' @param updateLag number of iterations before the algorithm is assumed to
#'   converge, at which time the parameter estimates will be aggregated.
#'
#' @param randomAssignProb an optional parameter for adding noise to the cluster
#'   assignments generated by the Gibbs sampler. Should only be changed if ising
#'   model estimates are unstable.
#'
#' @param nsamp number of Gibbs/componentwise MH cycles to perform for each
#'   subject. Must be larger than keepEach.
#'
#' @param lastSample how many samples to keep from the final iteration.
#'
#' @param initMHcoef the initial value for the shrinkage/inflation to perform on
#'   the estimated covariance in the componentwise MH sampler. The initial value
#'   does not matter much as this parameter self-tunes as the algorithm runs and
#'   usually converges to a good value within a few iterations.
#'
#' @param nPosteriors number of posterior samples to take per subject. If left
#'   as \code{NULL} then a value will be determined within flowReMix according
#'   to a preset formula.
#'
#' @param minDispersion the minimum overdispersion allowed. The larger the value of the variable the less overdispersion is allowed.
#'
#'
#' @param isingInit initialize the Ising model with this value.
#'
#' @param maxDispersion the maximum overdispersion level allowed. The lower the
#'   value of the variable the more overdispersion is allowed. Must be larger
#'   than 0.
#'
#' @param keepEach one out of how many Gibbs/MH samples to keep. This is used to
#'   reduce the dependence between posterior samples.
#'
#' @param centerCovariance whether to center random effect estimates before
#'   computing the covariance estimate or not.
#'
#' @param intSampSize number of importance samples to take when performing
#'   univariate numerical integration in the Gibbs sampler.
#'
#' @param initMethod the method used to initialize the regression coefficients.
#'   Options are either "sparse" for \code{\link[glmnet]{cv.glmnet}} or "binom"
#'   for \code{\link[stats]{glm}}. If left as \code{NULL} then it will be determined
#'   according to the regression_method specified for the \code{flowReMix} call.
#'
#' @param ncores The number of cpu cores to use to fit the model in parallel.
#'
#' @param preAssignCoefs coefficients to multiply the posterior probabilities. 0 is a hard assignment and observations that are
#' designated non-responders based on pu > ps will have posterior probabilities of 0. > 0 is a soft assignment, and a prior will be
#' placed on the prior probability of non-response in the Ising model.
#'
#' @param markovChainEM \code{logical} use the mcEM algorithm to fit the model. Default TRUE.
#'
#' @param zeroPosteriorProbs boolean. \code{TRUE} will zero out posterior response
#' probabilities where pu>ps, equivalent to a one-sided test. The full set of responses
#' will still be used to estimate the Ising model. Default \code{FALSE}. Can be used together
#' with the prior argument on the Ising model.
#'
#' @param seed \code{numeric} a random seed for reproducible initialization. Default 100.
#'
#' @param prior \code{numeric} value, a prior for response and non-response in the Ising model
#' used constrain non-responders (e.g. when pu>ps).
#'
#' @param isingWprior \code{logical} fit the Ising model with a prior on the baseline response using the parameter in \code{prior}. Default TRUE.
#'
#' @param clusterType \code{character} type of cluster. AUTO, FORK, SOCK. Default AUTO. Can be changed if the default doesn't work.
#'
#' @param sampleNew \code{logical} should the stability selection draw new samples. Default \code{FALSE}.
#'
#' @return An object of type \code{flowReMix_control}.
#'
#' @export
flowReMix_control <- function(updateLag = 10, randomAssignProb = 1e-8, nsamp = 50,
                              lastSample = NULL, initMHcoef = 2.5, nPosteriors = 3,
                              maxDispersion = 10^3, minDispersion = 10^7, isingInit = -4.59512,
                              keepEach = 5, centerCovariance = FALSE, intSampSize = 100,
                              initMethod = "robust", ncores = NULL, preAssignCoefs = 1,
                              markovChainEM = TRUE, seed=100, prior = 0,
                              isingWprior = TRUE, zeroPosteriorProbs = FALSE,
                              clusterType = c("AUTO","FORK","SOCK"),
                              isingStabilityReps = 200, randStabilityReps = 0,
                              stabilityGamma = 0.9, stabilityAND = TRUE,
                              learningRate = 0.6, keepWeightPercent = 0.9, sampleNew = FALSE,
                              subsetDiscardThreshold = 0, threads = NULL) {
  object <- list(updateLag = updateLag,
                 randomAssignProb = randomAssignProb,
                 nsamp = nsamp,
                 lastSample = lastSample,
                 initMHcoef = initMHcoef,
                 nPosteriors = nPosteriors,
                 maxDispersion = maxDispersion,
                 minDispersion = minDispersion,
                 isingInit = isingInit,
                 keepEach = keepEach,
                 centerCovariance = centerCovariance,
                 intSampSize = intSampSize,
                 initMethod = initMethod,
                 ncores = ncores,
                 preAssignCoefs = preAssignCoefs,
                 markovChainEM = markovChainEM,
                 seed=seed,
                 prior = abs(prior),
                 isingWprior = isingWprior,
                 zeroPosteriorProbs = zeroPosteriorProbs,
                 clusterType=c("AUTO","SOCK","FORK"),
                 isingStabilityReps = isingStabilityReps,
                 randStabilityReps = randStabilityReps,
                 stabilityGamma = stabilityGamma,
                 stabilityAND = stabilityAND,
                 learningRate = learningRate,
                 keepWeightPercent = keepWeightPercent,
                 sampleNew = sampleNew,
                 subsetDiscardThreshold = subsetDiscardThreshold, threads = threads)
  class(object) <- "flowReMix_control"
  return(object)
}
RGLab/flowReMix documentation built on May 8, 2019, 5:55 a.m.