R/nbresidual.R

Defines functions nbresidual

Documented in nbresidual

#' Extract Pearson residuals from the results of NEBULA
#'
#' @param nebula An object of the result obtained from running the function nebula.
#' @param count A raw count matrix of the single-cell data. The rows are the genes, and the columns are the cells. The matrix can be a matrix object or a sparse dgCMatrix object.
#' @param id A vector of subject IDs. The length should be the same as the number of columns of the count matrix.
#' @param pred A design matrix of the predictors. The rows are the cells and the columns are the predictors. If not specified, an intercept column will be generated by default.
#' @param offset A vector of the scaling factor. The values must be strictly positive. If not specified, a vector of all ones will be generated by default. 
#' @param conditional A logical value. By default (FALSE), the function returns marginal Pearson residuals. If TRUE, the function will return conditional Pearson residuals.
#' @return residuals: A matrix of Pearson residuals. The number of columns is the number of cells in the count matrix. The rows correspond to gene IDs reported in the result from nebula.
#' @return gene: Gene names corresponding to the row names of the count matrix.
#' @export
#' @examples
#' library(nebula)
#' data(sample_data)
#' pred = model.matrix(~X1+X2+cc,data=sample_data$pred)
#' re = nebula(count=sample_data$count,id=sample_data$sid,pred=pred)
#' resid = nbresidual(re,count=sample_data$count,id=sample_data$sid,pred=pred)
#' 


nbresidual = function(nebula, count, id, pred = NULL, offset = NULL, conditional = FALSE)
{
  if((conditional==TRUE) & (nebula$algorithm[1]=='PGMM'))
  {
    warning("The option conditional=TRUE does not support PGMM yet. Marginal Pearson residuals will be returned. Or try using NBLMM or NBGMM.")
    conditional <- FALSE
  }
  
  if((conditional==TRUE) & (is.null(nebula$random_effect)))
  {stop("To obtain conditional Pearson residuals, please first run nebula with output_re=TRUE.")}
  
  ncell = ncol(count)
  ngene = nrow(nebula$summary)
  
  if (is.null(pred)) {
    pred = matrix(1, ncol = 1, nrow = ncell)
    nb = 1
  }else{
    pred = as.matrix(pred)
    nb = ncol(pred)
  }
  
  sds = apply(pred,2,sd)
  intercept = which(sds==0)
  if(nb>1)
  {pred[,-intercept] = scale(pred[,-intercept],scale=FALSE)}
  pred[,intercept] = 1
  
  if (is.null(offset)) {
    offset = rep(1,ncell)
  }
  logoff = log(offset)
  
  if(conditional==TRUE){
    id = as.character(id)
    levels = unique(id)
    id = as.numeric(factor(id,levels=levels))
  }
  
  # nbres = matrix(NA,ngene,ncell)
  
  sigma2ind = as.matrix(nebula$overdispersion)[,1]
  sigma2cell = nebula$overdispersion[,2]
  sigma2cell[which(nebula$algorithm=='PGMM')] = 0
  
  if(conditional==FALSE)
  {
    ey = exp(pred%*%t(nebula$summary[,1:nb]) + logoff + rep(sigma2ind/2,each=ncell))
    vary = ey + t(t(ey*ey)*(exp(sigma2ind)*(1+sigma2cell)-1))
  }else{
    ey = exp(pred%*%t(nebula$summary[,1:nb]) + logoff + as.numeric(t(nebula$random_effect[,id])))
    vary = ey + t(t(ey*ey)*sigma2cell)
  }
  residuals = as.matrix(t(count[nebula$summary[,'gene_id'],]) - ey)/sqrt(vary)

  residuals = t(residuals)
  rownames(residuals) = nebula$summary[,'gene_id']
  colnames(residuals) = colnames(count)
  
  return(list(residuals=residuals,gene=rownames(count)[nebula$summary[,'gene_id']]))
}

Try the nebula package in your browser

Any scripts or data that you put into this service are public.

nebula documentation built on May 29, 2024, 8:56 a.m.