R/sce_cbind.R

Defines functions sce_cbind

Documented in sce_cbind

#' @title Combind several \code{SingleCellExperiment} objects from different batches/experiments
#'
#' @description Combind several \code{SingleCellExperiment} objects from 
#' different batches/experiments. 
#'
#' @param sce_list A list contains the \code{SingleCellExperiment} Object from each batch
#' @param method A string indicates the method of combining the gene expression matrix,
#' either \code{union} or \code{intersect}. Default to \code{intersect}. \code{union} only supports matrix class. 
#' @param cut_off_batch A numeric vector indicating the cut-off for the proportion of 
#' a gene is expressed within each batch
#' @param cut_off_overall A numeric vector  indicating the cut-off for the proportion of 
#' a gene is expressed overall data
#' @param exprs A string vector indicating the expression matrices to be combined. 
#' The first assay named will be used to determine the proportion of zeores.
#' @param colData_names A string vector indicating the \code{colData} that are combined
#' @param batch_names A string vector indicating the batch names for the output sce object
#' @import SingleCellExperiment
#'
#' @return A \code{SingleCellExperiment} object with the list of SCE objects combined.
#' @author Yingxin Lin
#' @export
#' @examples
#' data('example_sce', package = 'scMerge')
#' batch_names<-unique(example_sce$batch)
#' sce_list<-list(example_sce[,example_sce$batch=='batch2'],
#'                example_sce[,example_sce$batch=='batch3'])
#' sce_combine<-sce_cbind(sce_list,batch_names=batch_names)


sce_cbind <- function(sce_list, method = "intersect", cut_off_batch = 0.01, 
    cut_off_overall = 0.01, exprs = c("counts", "logcounts"), 
    colData_names = NULL, batch_names = NULL) {
    
    message("The assay named '", exprs[1], "' will be used to determine the proportion of zeroes for each batch")
    
    if (method == "union" & !is.matrix(SummarizedExperiment::assay(sce_list[[1]], exprs[1]))) {
        stop("The union method only supports matrix class in the sce object \n ")
    }
    
    n_batch <- length(sce_list)
    zero_list <- lapply(sce_list, 
                        function(x) DelayedArray::rowMeans(SummarizedExperiment::assay(x, exprs[1])  == 0))
    
    expressed_list <- lapply(zero_list, function(x) names(which(x <= 
        (1 - cut_off_batch))))
    for (i in seq_len(n_batch)) {
        sce_list[[i]] <- sce_list[[i]][expressed_list[[i]], ]
    }
    
    
    
    if (!method %in% c("union", "intersect")) {
        stop("The method parameter can only be either 'union' or 'intersect'. \n ")
    }
    
    
    
    if (method == "intersect") {
        keep <- Reduce(intersect, expressed_list)
        sce_list <- lapply(sce_list, function(x) x[keep, ])
        assay_list <- list()
        for (i in seq_len(length(exprs))) {
            assay_list[[i]] <- do.call(cbind, lapply(sce_list, 
                function(y) assay(y, exprs[i])))
        }
        names(assay_list) <- exprs
        colData_list <- do.call(DelayedArray::rbind, 
                                lapply(sce_list, function(y) colData(y)[, colData_names, drop = FALSE]))
        sce_combine <- SingleCellExperiment::SingleCellExperiment(assay = assay_list, 
            colData = colData_list)
    }
    
    if (method == "union") {
        keep <- Reduce(union, expressed_list)
        
        assay_list <- list()
        for (i in seq_len(length(exprs))) {
            assay_list[[i]] <- do.call(DelayedArray::cbind, 
                                       lapply(sce_list, function(x) {
                  mat <- matrix(0, nrow = length(keep), ncol = ncol(x), 
                                dimnames = list(keep, colnames(x)))
                  mat[rownames(x), ] <- SummarizedExperiment::assay(x, exprs[i])
                  return(mat)
                }))
        }
        names(assay_list) <- exprs
        colData_list <- do.call(rbind, lapply(sce_list, function(y) colData(y)[, 
            colData_names, drop = FALSE]))
        
        sce_combine <- SingleCellExperiment(assay = assay_list, 
            colData = colData_list)
        
        # zero_cbind <- apply(assay(sce_combine, exprs[1]), 1, 
        #     function(z) mean(z == 0))
        zero_cbind <- DelayedArray::rowMeans(assay(sce_combine, exprs[1]) == 0)
        sce_combine <- sce_combine[names(zero_cbind[zero_cbind <= 
            (1 - cut_off_overall)]), ]
    }
    
    if (is.null(batch_names)) {
        batch_names <- paste("batch", seq_len(n_batch))
    }
    
    sce_combine$batch <- rep(batch_names, unlist(lapply(sce_list, 
        ncol)))
    
    return(sce_combine)
}
SydneyBioX/scMerge documentation built on April 14, 2024, 3:22 a.m.