#' Correct for batch effects
#' @description Correct the merged count data based on rank values to obtain a set of reduced and corrected dimensions.
#' @details Concatenate the expression counts of all cells from different batches into one expression count object.
#' The merging is done on the set of union of DE genes obtained from the clustering of each batch.
#' @param object A list of SingleCellExperiment objects, each representing a SingleCellExperiment object from a single batch.
#' @param method character, one of c("default","fastmnn").
#' \code{default} mode performs the dropClust based correction followed by UMAP embedding.
#' The \code{fastmnn} option performs the mutual neighbourhood based correction which is implemented in the \code{batchelor} package.
#' when \code{FALSE} the batches are merged on the set of common genes across batches.
#' @return A SingleCellExperiment object with two new entry under the
#' \code{reducedDim()} container to store the reduced dimension components
#' with the name \code{"CComponents"} and the rank expression matrix named \code{"RankMat"}.
#' @param close_th for the method = default, specifies the value at which the
#' expression values of two genes will be considered as close pairs.
#' @param cells_th for the method default, specifies the value to
#' determine what proportion of total number of cells have close pairs.
#' @param components number of reduced dimensions to return.
#' @param ... \code{umap} arguments may be passed.
#' @importFrom SingleCellExperiment reducedDim normcounts
#' @export
#' @examples
#' \dontrun{
#' library(SingleCellExperiment)
#' ncells <- 100
#' ngenes <- 1200
#' lambda <-abs(rnorm(ngenes))
#' counts.1 <- matrix(rpois(ncells*ngenes, lambda = lambda), ncol=ncells, nrow=ngenes, byrow=TRUE)
#' rownames(counts.1) <- paste0("Gene", seq_len(ngenes))
#' colnames(counts.1) <- paste0("Cell", seq_len(ncells))
#' sce.1 <- SingleCellExperiment(assays = list(counts = counts.1))
#' rowData(sce.1)$Symbol <- paste0("Gene", seq_len(ngenes))
#'
#' lambda <-abs(rnorm(ngenes))
#' counts.2 <- matrix(rpois(ncells*ngenes, lambda = lambda), ncol=ncells, nrow=ngenes, byrow=TRUE)
#' rownames(counts.2) <- paste0("Gene", seq_len(ngenes))
#' colnames(counts.2) <- paste0("Cell", seq_len(ncells))
#' sce.2 <- SingleCellExperiment(assays = list(counts = counts.2))
#' rowData(sce.2)$Symbol <- paste0("Gene", seq_len(ngenes))
#'
#' mixed_sce <- Merge(list(sce.1, sce.2), use.de.genes =TRUE)
#' mixed_sce <- Correction(mixed_sce, close_th=0.1, cells_th=0.2)
#' }
Correction<-function(object, method="default", close_th=0.1, cells_th=0.1, components=2, ...){
batchids = object$Batch
temp<- CountNormalize(object)
if(method=="fastmnn"){
mat = Log2Normalize(normcounts(temp))
mnn.obj = batchelor::fastMNN(mat,batch = batchids, k=20, pc.input = FALSE,...)
reducedDim(temp,"CComponents") = reducedDim(mnn.obj,"corrected")
SummarizedExperiment::assay(temp, "reconstructed")<-SummarizedExperiment::assay(mnn.obj, "reconstructed")
} else{
if(!is.null(rowData(temp)$CommonDEGenes))
mat = Log2Normalize(normcounts(temp)[rowData(temp)$CommonDEGenes,])
else
mat = Log2Normalize(normcounts(temp))
corrected = as.matrix(.batcheffect.dc(t(mat), close_th = close_th, cells_th = cells_th))
cat("Embedding with UMAP...")
if(!is.null(temp$Sampling)){
umap_model = uwot::umap(corrected[temp$Sampling,], metric = 'cosine', n_components = components, ret_model=TRUE, ...)
umap_proj_dc = uwot::umap_transform(corrected, umap_model)
} else
umap_proj_dc = uwot::umap(corrected, metric = 'cosine', n_components = components, ...)
cat("Done\n")
reducedDim(temp, "RankMat")<-corrected
reducedDim(temp, "CComponents")<-umap_proj_dc
}
object@metadata[["dropClust"]] = c(object@metadata[["dropClust"]], "Correction")
return(temp)
}
.batcheffect.dc<-function(amatrix, close_th = 0.01, cells_th = 0.05){
### Gene Removal for Ranking
cat("Batch correcting...\n")
#removes gene-columns and returns a reduced expression matrix
#alpha marks the percentage of cells which need to be close for one of the genes to be removed
input <- list(matrix = as.matrix(amatrix),
close_threshold = close_th,
cells_threshold = cells_th,
nreduce = 1000)
remove_these <- reduce_genes_cpp(input)
reduced_matrix <- amatrix[, which(remove_these==0)]
cat("from",ncol(amatrix), "to",ncol(reduced_matrix),"genes.\n" )
# mute genes
# muted_matrix <- .mutegenes(reduced_matrix)
### Converting the given matrix to rank vectors
rank_mat <- .get_rank_mat_r(as.matrix(reduced_matrix))
dim(rank_mat)
whole = Matrix::Matrix(rank_mat, sparse = TRUE)
return(whole)
}
#
# .mutegenes<-function(matrix){
#
# }
#
### Converting the given matrix to rank vectors
.get_rank_mat_r <- function(matrix) {
# cat("Constructing Rank matrix...\n")
# Gets the gene names from the data frame
columns <- names(matrix)
# Creates a new data frame the same no of rows and columns
# This data frame will be returned
new_matrix <- matrix(nrow = dim(matrix)[1], ncol = dim(matrix)[2])
# Setting the columns of the new dataframe as the genes from the original matrix
# names(new_matrix) <- columns
# Iterating over the matrix and getting the rank vector for each cell
for (iter in 1:dim(matrix)[1]) {
# Gives the names of the genes in sorted order based on the gene-expression value
um_vec = matrix[iter,]
# nth_max_gene = order(um_vec,decreasing = T)[100]
# um_vec[which(um_vec<nth_max_gene)] = 0
new_matrix[iter,]<-rank(replace(um_vec, um_vec==0, NA), na.last = 'keep' )
}
new_matrix[is.na(new_matrix)] <- 0
# Return the rank-vectors dataframe
return(new_matrix)
}
# .reduce_genes_r<-function(input){
# mat = input$matrix
# close_threshold = input$close_threshold
# cells_threshold = input$cells_threshold
#
# # print(dim(mat))
# # print(close_threshold)
# # print(cells_threshold)
# ncells = nrow(mat)
# ngenes = ncol(mat)
# keep_genes = rep(0, ngenes)
#
# vote_matrix = mat.or.vec(nr = ngenes,nc = ngenes)
# score_g = c()
#
# for(i in 1:ngenes){
# for(j in 1:ngenes){
# # delta_ratios = abs(mat[,i]-mat[,j])/pmax(mat[,i],mat[j])
# signs = sign(mat[,i]-mat[,j])
# signs_1 = sum(signs > 0)
# signs_2 = sum(signs < 0)
# fraction = min(signs_1, signs_2)/max(signs_1, signs_2)
# score = ifelse(fraction>=0.5,1,0)
# # print(delta_ratios)
# # if(sum(delta_ratios<close_threshold) > (cells_threshold * ncells)) {
# # vote_matrix[i,j] = vote_matrix[i,j] - 1
# # }
# # else
# # vote_matrix[i,j] = vote_matrix[i,j] + 1
# }
# score_g = c(score_g, sum(score)/ngenes)
# }
#
# keep_genes = ifelse(score_g> cells_threshold, 1,0)
#
# # rs = rowSums(vote_matrix)
# # ordered = order(rs,decreasing = T)
# # qualified = setdiff(ordered, which(seq(ngenes)>0))
#
# return(keep_genes)
# }
#
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.