#' Estimate the cell type proportions of mixture bulk RNA
#'
#' This function estimates cell type proportions of mixture bulk RNA samples based on pre-trained cell type profiles.
#'
#' @param strBulk The full path to the query mixture bulk expression file.
#' Expression matrix separated by tabs with rows are genes, columns are samples.
#' First row includes the sample names or ids, while first column consists of gene symbols or gene ensembl id.
#' @param strProfile The full path to a pre-trained CellMap cell type profile.
#' The profile with ‘rds’ as file extension generated by \code{cellMapTraining} function.
#' @param strPrefix The prefix with path of the result files.
#' There are two files produced: a pdf file contains all cell type decomposition figures;
#' a tab separated table file including composition and p-values.
#' @param delCT Cell types should not be considered in the decomposition estimation.
#' A string with exact cell type names defined in the CellMap profile.
#' If more than one cell types needed to be removed, please separate them by commas (,). Default is \code{NULL}.
#' @param cellCol R colors for all cell types. A named vector of R colors, where names are cell type names.
#' Default is \code{NULL}, which means \code{$para$cellCol} from the provided CellMap profile will be used.
#' @param geneNameReady A boolean to indicate if the gene names in the query mixture bulk expression matrix is official symbol already.
#' The \code{FALSE} option also works with the official symbol is used in the expression matrix.
#' Default is \code{FALSE}, which enable to find official symbol by an R package called \code{biomaRt}.
#' @param ensemblPath The path to a folder where ensembl gene definition is/will be saved.
#' The ensembl gene definition file will be saved if it never run before.
#' Default is \emph{Data/} in the current working directory.
#' @param ensemblV The version of the ensembl to be used for the input query bulk expression. Default is 97.
#' @param bReturn A Boolean indicate if return object is needed.
#' \code{False}, no object returned but plots in a pdf as well as a tables in a tsv file.
#' \code{True}, return an R list object including details of raw decomposition results without generating any file.
#' @param bCutoff A numeric indicate the significant level. Default is 0.05.
#' @param core The number of computation nodes could be used. Default is 2.
#'
#' @return If \code{bReturn} is set to be \code{TRUE}, a named list object with detailed decomposition results is returned.
#' The following objects are in the list, and they can be accessed by ($) of the returned list object:
#' * \code{compoP} A matrix of the raw fitting coefficient for each sample (column) and each cell type (row).
#' It needed to normalize the sum of each column to be 1, in order to martch the output compisition table.
#' * \code{compoP} A matrix of the fitting p-values for each sample (column) and each cell type (row).
#' * \code{overallP} A vector of the overall fitting p-value for each sample.
#' * \code{rmse} A vector of the fitting RMSE for each sample.
#' * \code{coverR} A numeric indicate the ratio of cell type signature genes covered in the mixture bulk expression data
#' * \code{rawComp} A named list of all raw composition matrix, p-values, RMSE for each sample.
#' * \code{rawSets} A matrix of all sets of pure cell type combinations.
#' * \code{missingF} A vector of cell type signature genes which are not in the query bulk expresion data.
#' * \code{missingByCellType} A named list of cell type signature genes which are not in the query bulk expresion data for each cell type.
#' @examples
#' strMix <- system.file("extdata","bulk.txt",package="cellmap")
#' strProfile <- system.file("extdata","CNS6.rds",package="cellmap")
#' cellmap(strMix,strProfile,strPrefix="~/cellmap_CNS6_test")
#'
#' @export
cellmap <- function(strBulk,
strProfile,
strPrefix=substr(strBulk,1,nchar(strBulk)-4),
delCT=NULL,
cellCol=NULL,
geneNameReady=FALSE,
ensemblPath="Data/",
ensemblV=97,
bReturn=F,
pCutoff=0.05,
core=2){
eval(strBulk)
eval(strProfile)
oriSC <- strProfile
if(!file.exists(strProfile)){
strProfile <- paste("profile/",strProfile,".rds",sep="")
if(!file.exists(strProfile)){
strProfile <- system.file("extdata",basename(strProfile),package="cellmap")
if(!file.exists(strProfile)) stop(paste0(oriSC," cannot be found!"))
}
}
profile <- readRDS(strProfile)
if(!is.null(cellCol)) profile$para$cellCol[names(cellCol)] <- cellCol
X <- condensGene(read.table(strBulk,header=T,sep="\t",check.names=F,as.is=T),geneNameReady,ensemblV=ensemblV,ensemblPath=ensemblPath)#,row.names=1
## deconvolution --------
BiocParallel::register(BiocParallel::MulticoreParam(core))
strF <- paste0(strPrefix,"_CellMap_",profile$para$version,".pdf")
print(system.time({cellComp <- deconv(X,profile,rmCellType=delCT,modelForm=profile$para$modelForm)}))
#saveRDS(cellComp,file=gsub("pdf","rds",strF))
if(bReturn) return(cellComp)
pdf(strF,width=5+round(ncol(X)/3))
plotComposition(cellComp$composition,
profile$para$cellCol,pV=cellComp$compoP,cutoff.p=pCutoff,
ggplotFun=ggplot2::ggtitle(paste0("CellMap(",profile$para$version,") feature coverage:",cellComp$coverR,"%")))
tmp <- dev.off()
conn <- file(gsub("pdf$","tsv",strF),"w")
cat(paste('#Bulk:',strBulk,"Profile:",strProfile,"delCT:",paste(delCT,collapse=";"),"\n"),file=conn)
cat(paste0("CellMap (",profile$para$version,") feature coverage: ",cellComp$coverR,"%"),"\n",file=conn)
write.table(apply(cellComp$composition,2,function(x){return(round(x/sum(x),2))}),file=conn,sep="\t",col.names = NA,quote=F)
if(!is.null(cellComp$compoP)){
cat("pValue:\n",file=conn)
write.table(cellComp$compoP,file=conn,sep="\t",col.names = NA,quote=F)
}
if(!is.null(cellComp$overallP)){
cat("overall p-value:\n",file=conn)
write.table(matrix(cellComp$overallP,nrow=1,dimnames=list("overallP",names(cellComp$overallP))),
file=conn,sep="\t",col.names = NA,quote=F)
}
cat("\n\nmissing following cell type signature genes:\n",paste(cellComp$missingF,collapse="\n"),file=conn,sep="")
close(conn)
message("Decomposition on all samples is finished successfully!")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.