#require("Seurat")
#require("tictoc")
#require("tictoc")
#require("dplyr")
#require("cowplot")
#require("plyr")
#require("ggplot2")
#suppressPackageStartupMessages(library("harmony"))
#' Wraper for highly variable genes finding
#' @importFrom Seurat FindVariableFeatures `VariableFeatures<-`
#' @importFrom tibble rownames_to_column
#' @importFrom utils head str
#' @importFrom magrittr `%>%`
#' @importFrom dplyr arrange
#' @param seu object of \code{Seurat}
#' @param gene.exclude.df data.frame; gene blak list. Required column: seu.id.
#' @param n.top integer; number of top genes. (default: 1500)
#' @param measurement character; "counts", "TPM" or "cpm". (default: "counts")
#' @return a Seurat object
#' @export
run.HVG <- function(seu,gene.exclude.df,n.top=1500,measurement="counts")
{
seu <- FindVariableFeatures(object = seu,assay="RNA")
if(measurement=="TPM"){
#### TPM
hvg.gene.info <- seu@assays$RNA@meta.features %>%
tibble::rownames_to_column(var="geneSymbol") %>%
arrange(-vst.variance)
}else{
#### counts, cpm
hvg.gene.info <- seu@assays$RNA@meta.features %>%
tibble::rownames_to_column(var="geneSymbol") %>%
arrange((-vst.variance.standardized))
}
if(!is.null(gene.exclude.df)){
f.hvg <- !(hvg.gene.info$geneSymbol %in% gene.exclude.df[["seu.id"]]) &
!(grepl("^(RP[LS]|Rp[ls])",hvg.gene.info$geneSymbol,perl=T)) &
!(hvg.gene.info$geneSymbol %in% c("MALAT1","MALAT1-ENSG00000251562",
"Malat1",
"MALAT1-ENSG00000279576","MALAT1-ENSG00000278217"))
hvg.gene.info.flt <- hvg.gene.info[f.hvg,]
}else{
hvg.gene.info.flt <- hvg.gene.info
}
if(measurement=="TPM"){
hvg.gene.info.flt$rank.perc <- rank(-hvg.gene.info.flt$vst.variance)/nrow(hvg.gene.info.flt)
}else {
hvg.gene.info.flt$rank.perc <- rank(-hvg.gene.info.flt$vst.variance.standardized)/nrow(hvg.gene.info.flt)
}
VariableFeatures(seu) <- head(hvg.gene.info.flt,n=n.top)$geneSymbol
print(str(seu@assays$RNA@var.features))
print(head(hvg.gene.info.flt))
seu@assays$RNA@misc$meta.features.flt <- hvg.gene.info.flt
return(seu)
}
#' Wraper for running Seurat3 pipeline
#' @importFrom Seurat CreateSeuratObject SetAssayData GetAssayData CellCycleScoring AddModuleScore ScaleData SCTransform RunPCA ProjectDim RunUMAP RunTSNE FindNeighbors FindClusters Embeddings DimPlot NoLegend
#' @importFrom SingleCellExperiment `reducedDim<-`
#' @importFrom SummarizedExperiment assayNames assay `assay<-` rowData `rowData<-` colData `colData<-`
#' @importFrom harmony RunHarmony
#' @importFrom data.table data.table fread
#' @importFrom S4Vectors DataFrame
#' @importFrom sscVis ssc.build loginfo ssc.plot.tsne ssc.plot.violin
#' @importFrom sscClust ssc.DEGene.limma
#' @importFrom ggplot2 ggsave
#' @importFrom cowplot plot_grid save_plot
#' @importFrom RhpcBLASctl omp_set_num_threads
#' @importFrom doParallel registerDoParallel
#' @importFrom utils head str data
#' @importFrom tictoc tic toc
#' @param seu object of \code{Seurat}
#' @param sce object of \code{SingleCellExperiment}
#' @param out.prefix character; output prefix
#' @param gene.exclude.df data.frame; gene blak list. Required column: seu.id.
#' @param n.top integer; number of top genes. (default: 1500)
#' @param measurement character; "counts", "TPM" or "cpm". (default: "counts")
#' @param platform character; "10X", "SmartSeq2", "InDrop" etc.. (default: "10X")
#' @param opt.res character; optimal resolution (default: "1")
#' @param use.sctransform logical; whether use scTransform method (default: FALSE)
#' @param aid character; an ID (default: "PRJ")
#' @param plot.rd character vector; reducedDimNames used for plots (default: c("umap"))
#' @param opt.npc integer; optimal number of principal componets to use (default: 15)
#' @param ncores integer; number of CPU cores to use (default: 16)
#' @param cor.var character vector; Subset of c("S.Score","G2M.Score","DIG.Score1","ISG.Score1","percent.mito","batchV") or NULL. (default: c("S.Score","G2M.Score","DIG.Score1","percent.mito","batchV")).
#' @param ncell.deg integer; number of cell to downsample. used in the differentially expressed gene analysis. (default: 1500)
#' @param do.deg logical; whether perform the differentially expressed gene analysis. (default: FALSE)
#' @param do.scale logical; whether scale the expression data. (default: FALSE)
#' @param use.harmony logical; whether use the harmony method. (default: FALSE)
#' @param method.integration character; integration method. (default: NULL)
#' @param specie character; specie, one of "human", "mouse". (default: "human")
#' @param gene.mapping.table data.table; used for gene ID conversion. (default: NULL)
#' @param res.addition character vector; additional resolution parameters. Internally, resolutions from 0.1 to 2.4 will be used. (default: NULL)
#' @param run.stage integer; running stage. (default: 100)
#' @return a list contain a Seurat object and a SingleCellExperiment object
#' @details run the Seurat3 pipeline
#' @export
run.Seurat3 <- function(seu,sce,out.prefix,gene.exclude.df,n.top=1500,
measurement="counts",platform="10X",
opt.res="1",use.sctransform=F,aid="PRJ",plot.rd=c("umap"),
opt.npc=15,ncores=16,
#do.adj=T,
### remove this function: If certain variables are corrected (!is.null(cor.var) || cor.var!="NULL"), the pipeline will also correct for batchV and percent.mito
cor.var=c("S.Score","G2M.Score","DIG.Score1","percent.mito","batchV"),
ncell.deg=1500,do.deg=F,do.scale=F,
use.harmony=F,
method.integration=NULL,
specie="human",
gene.mapping.table=NULL,res.addition=NULL,
run.stage=100)
{
if(use.sctransform && platform!="SmartSeq2"){
opt.res <- sprintf("SCT_snn_res.%s",opt.res)
}else{
opt.res <- sprintf("RNA_snn_res.%s",opt.res)
}
cat(sprintf("opt.res: %s\n",opt.res))
if(measurement=="TPM"){
#### TPM
assay.name <- "log2TPM"
}else{
#### counts, cpm
assay.name <- "norm_exprs"
}
###
if(is.null(seu) && !is.null(sce)){
if("counts" %in% assayNames(sce)){
exp.mat <- assay(sce,"counts")
rownames(exp.mat) <- rowData(sce)[["seu.id"]]
seu <- CreateSeuratObject(exp.mat, min.cells = 0, min.features = 0,
meta.data = as.data.frame(colData(sce)), project = aid)
exp.mat <- assay(sce,assay.name)
rownames(exp.mat) <- rowData(sce)[["seu.id"]]
seu <- SetAssayData(seu, assay = "RNA", slot = 'data',new.data = exp.mat)
rm(exp.mat)
}else{
### norm_exprs, i.e. log2(x+1), expected
exp.mat <- assay(sce,assay.name)
rownames(exp.mat) <- rowData(sce)[["seu.id"]]
seu <- CreateSeuratObject(exp.mat, min.cells = 0, min.features = 0,
meta.data = as.data.frame(colData(sce)), project = aid)
## both the "counts" and "data" slots are the same as exp.mat
## nothing to do
}
}
if(is.null(gene.mapping.table)){
gene.mapping.table <- data.table(geneID=as.character(rownames(seu)),
seu.id=as.character(rownames(seu)),
display.name=as.character(rownames(seu)))
}
if(is.null(sce) && !is.null(seu)){
exp.mat <- GetAssayData(seu, slot = "counts",assay="RNA")
if(ncol(exp.mat) > 0 && nrow(exp.mat) > 0)
{
rownames(exp.mat) <- gene.mapping.table$geneID[match(rownames(exp.mat),
gene.mapping.table$seu.id)]
sce <- ssc.build(exp.mat,assay.name="counts")
rowData(sce)$geneID <- rownames(sce)
rowData(sce)$display.name <- gene.mapping.table[match(rownames(sce),
gene.mapping.table$geneID)
][["display.name"]]
names(rowData(sce)$display.name) <- rownames(sce)
rowData(sce)$seu.id <- gene.mapping.table[match(rownames(sce),
gene.mapping.table$geneID)][["seu.id"]]
print(head(rowData(sce)))
#print(all(colnames(seu)==rownames(colData(sce))))
#print(all(rownames(seu)==rowData(sce)$seu.id))
colData(sce) <- DataFrame(seu[[]])
}
exp.mat <- GetAssayData(seu, slot = "data",assay="RNA")
if(ncol(exp.mat) > 0 && nrow(exp.mat) > 0)
{
rownames(exp.mat) <- gene.mapping.table[match(rownames(exp.mat),
gene.mapping.table$seu.id)][["geneID"]]
if(!is.null(sce)){
assay(sce,assay.name) <- exp.mat
}else{
sce <- ssc.build(exp.mat,assay.name=assay.name)
rowData(sce)$geneID <- rownames(sce)
rowData(sce)$display.name <- gene.mapping.table[match(rownames(sce),
gene.mapping.table$geneID)
][["display.name"]]
names(rowData(sce)$display.name) <- rownames(sce)
rowData(sce)$seu.id <- gene.mapping.table[match(rownames(sce),
gene.mapping.table$geneID)][["seu.id"]]
print(head(rowData(sce)))
#print(all(colnames(seu)==rownames(colData(sce))))
#print(all(rownames(seu)==rowData(sce)$seu.id))
colData(sce) <- DataFrame(seu[[]])
}
}
if(is.null(sce)){
stop(sprintf("There are not counts or data slots in the RNA assay in the Seurat object!"))
}
#metadata(sce)$ssc[["variable.gene"]][["var.seurat"]] <- VariableFeatures(seu)
#reducedDim(sce,"seurat.pca") <- Embeddings(seu, reduction = "pca")
#reducedDim(sce,"seurat.umap") <- Embeddings(seu, reduction = "umap")
}
if(!all(colnames(sce)==colnames(seu))){
warning("seu and sce are not consistent!")
}
###### patch ######
if(!"percent.mito" %in% colnames(seu[[]]))
{
idx.pmt <- grep("^(percent.mito|percent.mt)$",colnames(seu[[]]),value=T,perl=T)
if(length(idx.pmt) > 0){
seu$percent.mito <- seu[[]][,idx.pmt[1]]
sce$percent.mito <- seu$percent.mito
}
}
if(!"patient" %in% colnames(seu[[]]))
{
idx.patient <- grep("^(patient|Patient|PatientID)$",colnames(seu[[]]),value=T,perl=T)
if(length(idx.patient) > 0){
seu$patient <- seu[[]][,idx.patient[1]]
}else{
seu$patient <- "PXX"
}
}
if(!"batchV" %in% colnames(seu[[]])) { seu$batchV <- seu$patient }
sce$patient <- seu$patient
sce$batchV <- seu$batchV
###################
nBatch <- length(table(sce$batchV))
loginfo(sprintf("Total batchs: %d",nBatch))
print(str(table(sce$batchV)))
#####
RhpcBLASctl::omp_set_num_threads(1)
doParallel::registerDoParallel(cores = ncores)
plot.all <- function(rd="umap",resolution.vec=seq(0.1,2.4,0.1))
{
if(!is.null(seu@meta.data[["loc"]])){
p <- DimPlot(seu,reduction=rd, group.by = "loc")
ggsave(sprintf("%s.%s.%s.png",out.prefix,rd,"loc"),width=5,height=4)
}
if(!is.null(seu@meta.data[["stype"]])){
p <- DimPlot(seu,reduction=rd, group.by = "stype")
ggsave(sprintf("%s.%s.%s.png",out.prefix,rd,"stype"),width=5,height=4)
}
if(!is.null(seu@meta.data[["cancerType"]])){
p <- DimPlot(seu,reduction=rd, group.by = "cancerType")
ggsave(sprintf("%s.%s.%s.png",out.prefix,rd,"cancerType"),width=5,height=4)
}
if(!is.null(seu@meta.data[["libraryID"]])){
if(length(unique(seu$libraryID))>20){
p <- DimPlot(seu,reduction=rd,label=F,label.size=2,
group.by = "libraryID") + NoLegend()
ggsave(sprintf("%s.%s.%s.png",out.prefix,rd,"libraryID"),width=4.5,height=4)
}else{
p <- DimPlot(seu,reduction=rd,label=T,label.size=2, group.by = "libraryID")
ggsave(sprintf("%s.%s.%s.png",out.prefix,rd,"libraryID"),width=6.5,height=4)
}
}
for(.ii in c("patient","batchV")){
if(!is.null(seu@meta.data[[.ii]])){
if(length(unique(seu@meta.data[[.ii]]))>20){
p <- DimPlot(seu,reduction=rd, group.by = .ii) + NoLegend()
ggsave(sprintf("%s.%s.%s.png",out.prefix,rd,.ii),width=4.5,height=4)
}else{
p <- DimPlot(seu,reduction=rd, group.by = .ii)
ggsave(sprintf("%s.%s.%s.png",out.prefix,rd,.ii),width=6.5,height=4)
}
}
}
plot.resolution.list <- list()
for(t.res in resolution.vec){
if(use.sctransform && platform!="SmartSeq2"){
cate.res <- sprintf("SCT_snn_res.%s",t.res)
}else{
cate.res <- sprintf("RNA_snn_res.%s",t.res)
}
plot.resolution.list[[cate.res]] <- DimPlot(seu, reduction = rd,
pt.size=0.1,label.size=2,
label = TRUE,
group.by=cate.res) +
NoLegend()
}
for(i in seq_len(length(plot.resolution.list)/4))
{
pp <- plot_grid(plotlist=plot.resolution.list[((i-1)*4+1):(i*4)],
ncol = 2,align = "hv")
save_plot(sprintf("%s.%s.res.%d.png",out.prefix,rd,i),pp,
ncol = 2, base_aspect_ratio=0.55)
}
plot.resolution.list <- list()
for(t.res in resolution.vec){
if(use.sctransform && platform!="SmartSeq2"){
cate.res <- sprintf("SCT_snn_res.%s",t.res)
}else{
cate.res <- sprintf("RNA_snn_res.%s",t.res)
}
plot.resolution.list[[cate.res]] <- ssc.plot.tsne(sce,columns = cate.res,
reduced.name = sprintf("seurat.%s",rd),
colSet=list(),size=0.1,label=3,
par.geneOnTSNE=list(scales="free",pt.order="random",pt.alpha=0.8),
base_aspect_ratio = 1.2)
}
#plot.resolution.list.debug <<- plot.resolution.list
#out.prefix.debug <<- out.prefix
for(i in seq_len(length(plot.resolution.list)/4))
{
pp <- plot_grid(plotlist=plot.resolution.list[((i-1)*4+1):(i*4)],
ncol = 2,align = "hv")
save_plot(sprintf("%s.%s.res.sceStyle.%d.png",out.prefix,rd,i),pp,
ncol = 2, base_aspect_ratio=0.9,base_height=5.5)
}
## gene on umap
loginfo(sprintf("bein plotting geneOnUmap ..."))
l_ply(seq_along(g.geneOnUmap.list),function(i){
gene.tmp <- intersect(g.geneOnUmap.list[[i]],rowData(sce)$display.name)
loginfo(sprintf("(begin) geneSet %s",names(g.geneOnUmap.list)[i]))
if(length(gene.tmp)>0){
p <- ssc.plot.tsne(sce,assay.name=assay.name,adjB=if(nBatch>1) "batchV" else NULL,
gene=gene.tmp,
par.geneOnTSNE=list(scales="free",pt.order="random",pt.alpha=0.8),
reduced.name=sprintf("seurat.%s",rd))
ggsave(sprintf("%s.seurat.%s.marker.%s.png",
out.prefix,rd,names(g.geneOnUmap.list)[i]),
width=10,
height=if(length(gene.tmp)>9) 11 else if(length(gene.tmp)>6) 8 else if(length(gene.tmp)>3) 5.4 else 2.7)
}
loginfo(sprintf("(end) geneSet %s",names(g.geneOnUmap.list)[i]))
},.parallel=T)
loginfo(sprintf("end plotting geneOnUmap."))
#### density
ssc.plot.tsne(sce,plotDensity=T,reduced.name=sprintf("seurat.%s",rd),
out.prefix=sprintf("%s.seurat.%s",out.prefix,rd))
colSet <- list()
ssc.plot.tsne(sce, columns = "percent.mito",
reduced.name = sprintf("seurat.%s",rd),
colSet=colSet,size=0.03,
par.geneOnTSNE = list(scales = "free",pt.order = "random"),
#vector.friendly=T,
my.ggPoint=ggrastr::geom_point_rast,
out.prefix = sprintf("%s.seurat.%s.groupBy.%s",out.prefix,rd,"percent.mito"),
base_aspect_ratio = 1.30)
ssc.plot.tsne(sce, columns = "nCount_RNA",
reduced.name = sprintf("seurat.%s",rd),
colSet=colSet,size=0.03,
par.geneOnTSNE = list(scales = "free",pt.order = "random"),
#vector.friendly=T,
my.ggPoint=ggrastr::geom_point_rast,
out.prefix = sprintf("%s.seurat.%s.groupBy.%s",out.prefix,rd,"nCount_RNA"),
base_aspect_ratio = 1.30)
ssc.plot.tsne(sce, columns = "nFeature_RNA",
reduced.name = sprintf("seurat.%s",rd),
colSet=colSet,size=0.03,
par.geneOnTSNE = list(scales = "free",pt.order = "random"),
#vector.friendly=T,
my.ggPoint=ggrastr::geom_point_rast,
out.prefix = sprintf("%s.seurat.%s.groupBy.%s",out.prefix,rd,"nFeature_RNA"),
base_aspect_ratio = 1.30)
}
old.par <- NULL
if(file.exists(sprintf("%s.run.par.txt",out.prefix))){
old.par <- fread(sprintf("%s.run.par.txt",out.prefix))
}
write.table(data.frame(opt.res=opt.res),
file=sprintf("%s.run.par.txt",out.prefix),row.names=F,quote=F,sep="\t")
if(file.exists(sprintf("%s.sce.rds",out.prefix)))
{
loginfo(sprintf("loading pre-calculated result ..."))
sce <- readRDS(sprintf("%s.sce.rds",out.prefix))
seu <- readRDS(sprintf("%s.seu.rds",out.prefix))
if(old.par$opt.res==opt.res){
return(list("seu"=seu,"sce"=sce))
}
}else
{
loginfo(sprintf("running Seurat pipeline ..."))
if(!is.null(gene.exclude.df) && !("S.Score" %in% cor.var) && !("G2M.Score" %in% cor.var)){
gene.exclude.df <- gene.exclude.df[!(category %in% c("cellCycle/proliferation")),]
}
seu <- run.HVG(seu,gene.exclude.df,n.top=n.top,measurement=measurement)
adj.cov <- cor.var
if(adj.cov[1]=="NULL") { adj.cov <- NULL }
if(!is.null(adj.cov)){
loginfo(sprintf("CellCycleScoring ..."))
a.env <- new.env()
if(specie=="human"){
data("cc.genes",package="Seurat",envir=a.env)
}else{
data("cc.genes.mouse",package="scPip",envir=a.env)
}
seu <- CellCycleScoring(seu, s.features = a.env[["cc.genes"]][["s.genes"]],
g2m.features = a.env[["cc.genes"]][["g2m.genes"]],
set.ident = FALSE)
#seu$CC.Difference <- seu$S.Score - seu$G2M.Score
loginfo(sprintf("AddModuleScore ..."))
dat.ext.dir <- system.file("extdata",package="scPip")
glist.HSP <- fread(sprintf("%s/byZhangLab.stress.glist.gz",dat.ext.dir))$geneSymbol
#glist.HSP <- intersect(glist.HSP,rownames(seu))
seu <- AddModuleScore(seu, features=list("DIG.Score"=glist.HSP), name="DIG.Score",
pool = NULL, nbin = 24, ctrl = 100)
glist.ISG <- fread(sprintf("%s/ISG.MSigDB.BROWNE_INTERFERON_RESPONSIVE_GENES.detected.glist.gz",dat.ext.dir))$geneSymbol
seu <- AddModuleScore(seu, features=list("ISG.Score"=glist.ISG), name="ISG.Score",
pool = NULL, nbin = 24, ctrl = 100)
### remove this function: if correct something, always correct for batchV and percent.mito
###if("percent.mito" %in% colnames(seu[[]])){
### adj.cov <- c(adj.cov,"percent.mito")
###}
###if(nBatch>1){
### adj.cov <- c(adj.cov,c("batchV"))
###}
if(!("percent.mito" %in% colnames(seu[[]]))){
adj.cov <- setdiff(adj.cov,"percent.mito")
}
if(!(nBatch>1)){
adj.cov <- setdiff(adj.cov,c("batchV"))
}
}
loginfo(sprintf("adj: %s\n",paste(adj.cov,collapse=",")))
print(head(seu[[]]))
loginfo(sprintf("do.scale: %s\n",do.scale))
loginfo(sprintf("Scale ..."))
if(use.sctransform && platform!="SmartSeq2"){
cat(sprintf("SCTransform:\n"))
seu <- SCTransform(seu, variable.features.n=n.top, do.scale=do.scale, vars.to.regress = adj.cov)
}else{
cat(sprintf("ScaleData:\n"))
seu <- ScaleData(object = seu,do.scale=do.scale,vars.to.regress = adj.cov)
}
if(run.stage==0){
return(list("seu"=seu,"sce"=sce))
}
#### PCA
if(length(opt.npc)==1){
opt.pc.used <- 1:opt.npc
}else{
opt.pc.used <- opt.npc
}
cat(sprintf("use PCs:\n"))
print(opt.pc.used)
seu <- RunPCA(object = seu,ndims.print = 1:5)
#print(x = seu[['pca']], dims = 1:5, nfeatures = 5, projected = FALSE)
#p <- VizDimLoadings(seu)
#ggsave(sprintf("%s.pca.01.pdf",out.prefix),width=7,height=14)
seu <- ProjectDim(object = seu)
cat(sprintf("genes associated with PCs:\n"))
print(x = seu[['pca']], dims = 1:30, nfeatures = 30, projected = TRUE)
pdf(sprintf("%s.pca.ht.01.pdf",out.prefix),width=14,height=18)
DimHeatmap(object = seu, dims = 1:15, cells = 500, balanced = TRUE,fast = TRUE)
dev.off()
pdf(sprintf("%s.pca.ht.02.pdf",out.prefix),width=14,height=18)
DimHeatmap(object = seu, dims = 16:30, cells = 500, balanced = TRUE,fast = TRUE)
dev.off()
#p <- ElbowPlot(object = seu,ndims=50)
#ggsave(sprintf("%s.pca.03.pdf",out.prefix),width=5,height=4)
loginfo(sprintf("use.harmony: %s",use.harmony))
use.rd <- "pca"
if(use.harmony || (!is.null(method.integration) && method.integration=="Harmony" ) ){
### dims.use is not working
### seu <- RunHarmony(seu, c("batchV"),dims.use=opt.pc.used,verbose=F)
dat.pca.used <- Embeddings(seu,reduction = "pca")[,opt.pc.used,drop=F]
suppressWarnings({
pcadata <- Seurat::CreateDimReducObject(
embeddings = dat.pca.used,
stdev = as.numeric(apply(dat.pca.used, 2, stats::sd)),
assay = seu[["pca"]]@assay.used,
key = "PC_"
)
})
seu[["pca"]] <- pcadata
###
seu <- RunHarmony(seu, c("batchV"),verbose=F)
use.rd <- "harmony"
opt.pc.used <- seq_len(ncol(Embeddings(seu,reduction = "harmony")))
}
if(!use.harmony && (!is.null(method.integration) && method.integration=="Scanorama") ){
##saveRDS(seu,file=sprintf("%s.debug.rds",out.prefix))
seu <- run.Scanorama(seu)
use.rd <- "Scanorama"
opt.pc.used <- 1:100
}
######### UMAP
tic("RunUMAP...")
seu <- RunUMAP(object = seu, reduction = use.rd,dims = opt.pc.used,verbose=F)
toc()
######### tSNE
if("tsne" %in% plot.rd){
tic("RunTSNE...")
seu <- RunTSNE(object = seu, reduction = use.rd,dims = opt.pc.used,verbose=F)
toc()
}
#######################################
#### clustring
seu <- FindNeighbors(object = seu, reduction = use.rd, dims = opt.pc.used)
resolution.vec <- seq(0.1,2.4,0.1)
seu <- FindClusters(object = seu,resolution = c(resolution.vec,res.addition),verbose=F)
for(t.res in resolution.vec){
if(use.sctransform && platform!="SmartSeq2"){
print(table(seu[[sprintf("SCT_snn_res.%s",t.res)]]))
aa.res <- sprintf("SCT_snn_res.%s",t.res)
}else{
print(table(seu[[sprintf("RNA_snn_res.%s",t.res)]]))
aa.res <- sprintf("RNA_snn_res.%s",t.res)
}
#sce[[aa.res]] <- seu.merged@meta.data[,aa.res]
}
#### for sscClust
reducedDim(sce,"seurat.pca") <- Embeddings(seu, reduction = "pca")
reducedDim(sce,"seurat.umap") <- Embeddings(seu, reduction = "umap")
if("tsne" %in% names(seu@reductions)){
reducedDim(sce,"seurat.tsne") <- Embeddings(seu, reduction = "tsne")
}
if(use.sctransform && platform!="SmartSeq2"){
idx.colRes <- grep("^SCT_snn_res",colnames(seu[[]]),value=T)
}else{
idx.colRes <- grep("^RNA_snn_res",colnames(seu[[]]),value=T)
}
for(idx in idx.colRes){
colData(sce)[[idx]] <- sprintf("C%02d",as.integer(as.character(seu[[]][,idx])))
}
### patch
colData(sce)[is.na(sce$majorCluster) | sce$majorCluster=="unknown","majorCluster"] <- ""
###
sce$ClusterID <- colData(sce)[[opt.res]]
print("all(colnames(sce)==colnames(seu))?")
print(all(colnames(sce)==colnames(seu)))
seu$ClusterID <- sce$ClusterID
for(i.rd in plot.rd){
plot.all(rd=i.rd)
}
}
sce$ClusterID <- colData(sce)[[opt.res]]
print("all(colnames(sce)==colnames(seu))?")
print(all(colnames(sce)==colnames(seu)))
seu$ClusterID <- sce$ClusterID
for(i.rd in plot.rd){
ssc.plot.tsne(sce, columns = "ClusterID",
reduced.name = sprintf("seurat.%s",i.rd),
colSet=list(),size=0.03,label=3,
par.geneOnTSNE = list(scales = "free",pt.order = "random"),
#vector.friendly=T,
my.ggPoint=ggrastr::geom_point_rast,
out.prefix = sprintf("%s.seurat.%s.groupBy.%s",out.prefix,i.rd,"ClusterID"),
base_aspect_ratio = 1.30)
}
##### save result
#write.table(colData(sce), sprintf("%s.clusterInfo.txt",out.prefix),sep = "\t",
# row.names = F, quote = F)
saveRDS(seu, file = sprintf("%s.seu.rds",out.prefix))
saveRDS(sce, file = sprintf("%s.sce.rds",out.prefix))
#seu <- readRDS(file = sprintf("%s.seu.rds",out.prefix))
#sce <- readRDS(file = sprintf("%s.sce.rds",out.prefix))
cat(sprintf("data saved!\n"))
#############################
## violin
if(!all(colnames(seu)==colnames(sce))){
warning("seu and sce are not consistent !")
}
nCls <- length(table(sce$ClusterID))
l_ply(seq_along(g.geneOnUmap.list),function(i){
gene.tmp <- intersect(g.geneOnUmap.list[[i]],rowData(sce)$display.name)
if(length(gene.tmp)>0){
p <- ssc.plot.violin(sce,assay.name=assay.name,
adjB=if(nBatch>1) "batchV" else NULL,
clamp = c(-4, 8), gene=gene.tmp, group.var="ClusterID")
ggsave(sprintf("%s.seurat.violin.marker.%s.png",
out.prefix,names(g.geneOnUmap.list)[i]),
width=if(nCls<=12) 6 else 8,height=8)
}
},.parallel=T)
### DE Genes
#### tic("DE genes")
#### de.out <- ssc.clusterMarkerGene(sce, assay.name=assay.name,
#### ncell.downsample=if(ncol(sce)>30000) 1000 else NULL,
#### group.var="ClusterID",
#### batch=if(nBatch>1) "batchV" else NULL,
#### out.prefix=sprintf("%s.de.opt.res",out.prefix),
#### n.cores=ncores, do.plot=T,
#### F.FDR.THRESHOLD=0.01,
#### pairwise.P.THRESHOLD=0.01,
#### pairwise.FC.THRESHOLD=0.25, verbose=F)
#### toc()
if(do.deg){
dir.create(sprintf("%s/limma",dirname(out.prefix)),F,T)
tic("limma")
de.out <- ssc.DEGene.limma(sce,assay.name=assay.name,ncell.downsample=ncell.deg,
group.var="ClusterID",batch=if(nBatch>1) "batchV" else NULL,
out.prefix=sprintf("%s/limma/%s",
dirname(out.prefix),basename(out.prefix)),
n.cores=ncores,verbose=3, group.mode="multiAsTwo",
T.logFC=if(platform=="SmartSeq2") 1 else 0.25)
toc()
saveRDS(de.out,file=sprintf("%s.de.out.limma.rda",
sprintf("%s/limma/%s",
dirname(out.prefix),
basename(out.prefix))))
}
return(list("seu"=seu,"sce"=sce))
}
#' Wraper for running scanpy pipeline
#' @importFrom reticulate import
#' @importFrom data.table data.table fread
#' @importFrom S4Vectors DataFrame
#' @importFrom sscVis loginfo
#' @importFrom ggplot2 ggsave
#' @importFrom cowplot plot_grid save_plot
#' @importFrom RhpcBLASctl omp_set_num_threads
#' @importFrom doParallel registerDoParallel
#' @importFrom utils head str data
#' @importFrom tictoc tic toc
#' @param adata object of \code{AnnData}
#' @param out.prefix character; output prefix
#' @param gene.exclude.df data.frame; gene blak list. Required column: seu.id.
#' @param n.top integer; number of top genes. (default: 1500)
#' @param opt.res character; optimal resolution (default: "1")
#' @param aid character; an ID (default: "PRJ")
#' @param plot.rd character vector; reducedDimNames used for plots (default: c("umap"))
#' @param opt.npc integer; optimal number of principal componets to use (default: 15)
#' @param ncores integer; number of CPU cores to use (default: 16)
#' @param res.test double; resolutions to test (default: seq(0.1,2.4,0.1) )
#' @param cor.var character vector; Subset of c("S_score","G2M_score","DIG.Score","ISG.Score","percent.mito") or NULL. (default: c("S_score","G2M_score","DIG.Score","percent.mito")).
#' @param ncell.deg integer; number of cell to downsample. used in the differentially expressed gene analysis. (default: 1500)
#' @param do.deg logical; whether perform the differentially expressed gene analysis. (default: FALSE)
#' @param hvg.batch.minNCells integer; required minimum number of cells in each batch. Batch with < hvg.batch.minNCells will not be used for HVG finding. (default: 100)
#' @param use.harmony logical; whether use the harmony method. (default: TRUE)
#' @param method.integration character; integration method. (default: NULL)
#' @param specie character; specie, one of "human", "mouse". (default: "human")
#' @param gene.mapping.table data.table; used for gene ID conversion. (default: NULL)
#' @param res.addition character vector; additional resolution parameters. Internally, resolutions from 0.1 to 2.4 will be used. (default: NULL)
#' @param run.stage integer; running stage. (default: 100)
#' @return a list contain an object of \code{AnnData}
#' @details run the scanpy pipeline
#' @export
run.scanpy <- function(adata,out.prefix,gene.exclude.df,n.top=1500,
#measurement="counts",platform="10X",
opt.res="1",
#use.sctransform=F,
aid="PRJ",plot.rd=c("umap"),
opt.npc=15,ncores=16,
res.test=seq(0.1,2.4,0.1),
#do.adj=T,
### remove this function: If certain variables are corrected (!is.null(cor.var) || cor.var!="NULL"), the pipeline will also correct for batchV and percent.mito
#cor.var=c("S_score","G2M_score","DIG.Score","percent.mito","batchV"),
cor.var=c("S_score","G2M_score","DIG.Score","percent.mito"),
ncell.deg=1500,do.deg=F,
hvg.batch.minNCells=100,
#do.scale=F,
use.harmony=T,
method.integration=NULL,
specie="human",
gene.mapping.table=NULL,res.addition=NULL,
run.stage=100)
{
require("reticulate")
sc <- import("scanpy")
matplotlib <- import("matplotlib")
#matplotlib$use('Agg')
plt <- import("matplotlib.pyplot")
plt$switch_backend('Agg')
sc$settings$autoshow = FALSE
sc$set_figure_params(dpi=300,dpi_save=300,fontsize=12)
cat(sprintf("opt.res: %s\n",opt.res))
###
if(is.null(gene.mapping.table)){
gene.mapping.table <- data.table(geneID=as.character(adata$var$gene_ids),
seu.id=as.character(rownames(adata$var)),
display.name=as.character(rownames(adata$var)))
}
###### patch ######
if(!"percent.mito" %in% colnames(adata$obs))
{
idx.pmt <- grep("^(percent.mito|percent.mt)$",colnames(adata$obs),value=T,perl=T)
if(length(idx.pmt) > 0){ adata$obs$percent.mito <- adata$obs[,idx.pmt[1]] }
}
if(!"patient" %in% colnames(adata$obs))
{
idx.patient <- grep("^(patient|Patient|PatientID)$",colnames(adata$obs),value=T,perl=T)
if(length(idx.patient) > 0){
adata$obs$patient <- adata$obs[,idx.patient[1]]
}else{
adata$obs$patient <- "PXX"
}
}
if(!"batchV" %in% colnames(adata$obs)) { adata$obs$batchV <- adata$obs$patient }
###################
nBatch <- length(table(adata$obs$batchV))
loginfo(sprintf("Total batchs: %d",nBatch))
print(str(table(adata$obs$batchV)))
#####
RhpcBLASctl::omp_set_num_threads(1)
doParallel::registerDoParallel(cores = ncores)
plot.general <- function(rd="umap")
{
####
t.par <- list("loc"=c(4,5),
"stype"=c(4,5),
"cancerType"=c(4,5),
"phase"=c(4,5),
"S_score"=c(4,5),
"G2M_score"=c(4,5),
"total_counts"=c(4,5),
"percent.mito"=c(4,5),
"libraryID"=NULL,
"patient"=NULL,
"batchV"=NULL)
for(tt in names(t.par)){
if(!is.null(adata$obs[[tt]])){
if(tt %in% c("libraryID","patient","batchV")){
if(length(unique(adata$obs$libraryID))>20){
###plt$figure(figsize=c(4.5,4), dpi=300)
sp <- plt$subplots(figsize=c(10,4))
sc$pl$umap(adata, color=c(tt),ax=sp[[2]])
}else{
###plt$figure(figsize=c(5.0,4), dpi=300)
sp <- plt$subplots(figsize=c(7.5,4))
sc$pl$umap(adata, color=c(tt),ax=sp[[2]])
}
}else{
##plt$figure(figsize=t.par[[tt]], dpi=300)
sp <- plt$subplots(figsize=c(5,4))
sc$pl$umap(adata, color=c(tt),ax=sp[[2]])
}
plt$tight_layout()
plt$savefig(sprintf("%s.%s.%s.png",out.prefix,rd,tt))
}
}
#### gene on umap
loginfo(sprintf("bein plotting geneOnUmap ..."))
l_ply(seq_along(g.geneOnUmap.list),function(i){
gene.tmp <- intersect(g.geneOnUmap.list[[i]],rownames(adata$var))
loginfo(sprintf("(begin) geneSet %s",names(g.geneOnUmap.list)[i]))
if(length(gene.tmp)>0){
adata.plot <- adata[,rownames(adata$var) %in% gene.tmp]
sc$pp$scale(adata.plot)
plt$figure(figsize=c(3.5,4), dpi=300)
##sc$pl$umap(adata, ncols=as.integer(3), color=gene.tmp,vmin="p05",vmax="p95",frameon=F)
sc$pl$umap(adata.plot, ncols=as.integer(3), color=gene.tmp, color_map="YlOrRd", vmin=-1,vmax=5, frameon=F)
plt$savefig(sprintf("%s.%s.marker.%s.png",out.prefix,rd,names(g.geneOnUmap.list)[i]))
adata.plot <- NULL
gc()
}
loginfo(sprintf("(end) geneSet %s",names(g.geneOnUmap.list)[i]))
},.parallel=F)
loginfo(sprintf("end plotting geneOnUmap."))
### density
### "percent.mito"
### "nCount_RNA"
### "nFeature_RNA"
}
plot.resolution <- function(rd="umap",resolution.vec=seq(0.1,2.4,0.1))
{
####
for(i in seq_len(length(resolution.vec)/4))
{
plt$figure(figsize=c(5,4), dpi=300)
sc$pl$umap(adata, ncols=as.integer(2), color=sprintf("leiden.%s",resolution.vec[((i-1)*4+1):(i*4)]))
plt$savefig(sprintf("%s.%s.res.%d.png",out.prefix,rd,i))
}
}
old.par <- NULL
if(file.exists(sprintf("%s.run.par.txt",out.prefix))){
old.par <- fread(sprintf("%s.run.par.txt",out.prefix))
}
write.table(data.frame(opt.res=opt.res),
file=sprintf("%s.run.par.txt",out.prefix),row.names=F,quote=F,sep="\t")
if(file.exists(sprintf("%s.scanpy.h5ad",out.prefix)))
{
loginfo(sprintf("loading pre-calculated result ..."))
adata <- sc$read_h5ad(sprintf("%s.scanpy.h5ad",out.prefix))
if(old.par$opt.res==opt.res){
return(list("adata"=adata))
}
}else
{
if(file.exists(sprintf("%s.scanpy.step1.h5ad",out.prefix)))
{
loginfo(sprintf("loading h5ad without clustering ..."))
adata <- sc$read_h5ad(sprintf("%s.scanpy.step1.h5ad",out.prefix))
}else{
loginfo(sprintf("running scanpy pipeline ..."))
if(!is.null(gene.exclude.df) && !("S_score" %in% cor.var) && !("G2M_score" %in% cor.var)){
gene.exclude.df <- gene.exclude.df[!(category %in% c("cellCycle/proliferation")),]
}
#### variable genes
{
###sc$pp$highly_variable_genes(adata)
### if flavor="seurat_v3", count data is expected
adata_tmp <- NULL
my.flavor <- "seurat"
if(my.flavor=="seurat_v3"){
adata_tmp <- adata$raw$to_adata()
sc$pp$filter_genes(adata_tmp, min_cells=3)
}else{
adata_tmp <- adata
}
cellDistLib.vec <- sort(unclass(table(adata_tmp$obs$libraryID)),decreasing=T)
f.lib <- names(cellDistLib.vec)[cellDistLib.vec > hvg.batch.minNCells]
adata_tmp <- adata_tmp[adata_tmp$obs$libraryID %in% f.lib]
f.gene <- rownames(adata_tmp$var) %in% gene.exclude.df[["geneSymbol"]] |
(grepl("^(RP[LS]|Rp[ls])",rownames(adata_tmp$var),perl=T))
adata_tmp <- adata_tmp[,!f.gene]
sc$pp$highly_variable_genes(adata_tmp,flavor=my.flavor,n_top_genes=as.integer(n.top),batch_key="batchV")
hvg.glist <- rownames(adata_tmp$var)[adata_tmp$var$highly_variable]
#sc$pp$highly_variable_genes(adata_countData,flavor="seurat_v3",n_top_genes=100000L,batch_key="batchV")
#sc$pp$highly_variable_genes(adata,flavor="seurat",n_top_genes=100000L,batch_key="batchV")
#adata_countData$var$inBlkGList <- (rownames(adata_countData$var) %in% c(gene.exclude.df[["geneSymbol"]],
# "MALAT1","MALAT1-ENSG00000251562","Malat1",
# "MALAT1-ENSG00000279576","MALAT1-ENSG00000278217")) |
# (grepl("^(RP[LS]|Rp[ls])",rownames(adata_countData$var),perl=T))
#hvg.gene.info <- as.data.table(adata_countData$var,keep.rownames="geneSymbol")
#hvg.glist <- head(hvg.gene.info[order(-variances_norm),][inBlkGList==F,][["geneSymbol"]],n.top)
#print("all(rownames(adata_countData$var)==rownames(adata$var))")
#print(all(rownames(adata_countData$var)==rownames(adata$var)))
#for(xx in c("highly_variable","highly_variable_rank","means","variances","variances_norm","inBlkGList")){
# adata$var[[xx]] <- adata_countData$var[[xx]]
#}
adata$var$highly_variable <- rownames(adata$var) %in% hvg.glist
adata_tmp <- NULL
gc()
}
##### calculate some scores
adj.cov <- cor.var
if(adj.cov[1]=="NULL") { adj.cov <- NULL }
if(!is.null(adj.cov)){
loginfo(sprintf("CellCycleScoring ..."))
a.env <- new.env()
if(specie=="human"){
data("cc.genes",package="Seurat",envir=a.env)
}else{
data("cc.genes.mouse",package="scPip",envir=a.env)
}
##sc$pp$scale(adata)
sc$tl$score_genes_cell_cycle(adata,
s_genes=a.env[["cc.genes"]][["s.genes"]],
g2m_genes=a.env[["cc.genes"]][["g2m.genes"]])
####seu$CC.Difference <- seu$S.Score - seu$G2M.Score
loginfo(sprintf("AddModuleScore ..."))
dat.ext.dir <- system.file("extdata",package="scPip")
glist.HSP <- fread(sprintf("%s/byZhangLab.stress.glist.gz",dat.ext.dir))$geneSymbol
glist.HSP <- intersect(glist.HSP,rownames(adata$var))
sc$tl$score_genes(adata,gene_list=glist.HSP,score_name="DIG.Score",ctrl_size=length(glist.HSP))
glist.ISG <- fread(sprintf("%s/ISG.MSigDB.BROWNE_INTERFERON_RESPONSIVE_GENES.detected.glist.gz",dat.ext.dir))$geneSymbol
sc$tl$score_genes(adata,gene_list=glist.ISG,score_name="ISG.Score",ctrl_size=length(glist.ISG))
### remove this function: if correct something, always correct for batchV and percent.mito
###if("percent.mito" %in% colnames(seu[[]])){
### adj.cov <- c(adj.cov,"percent.mito")
###}
###if(nBatch>1){
### adj.cov <- c(adj.cov,c("batchV"))
###}
if(!("percent.mito" %in% colnames(adata$obs))){
adj.cov <- setdiff(adj.cov,"percent.mito")
}
if(!(nBatch>1)){
adj.cov <- setdiff(adj.cov,c("batchV"))
}
adata$obs$batchV <- as.character(adata$obs$batchV)
loginfo(sprintf("adj: %s\n",paste(adj.cov,collapse=",")))
print(head(adata$obs,n=4))
}
#####
{
adata_hvg <- adata[,adata$var$highly_variable==TRUE]
}
#####
if(!is.null(adj.cov)){
loginfo(sprintf("regress_out ..."))
tic("regress_out")
sc$pp$regress_out(adata_hvg, adj.cov)
toc()
}
tic("scale")
sc$pp$scale(adata_hvg)
toc()
##sc$tl$pca(adata_hvg, svd_solver='arpack',n_comps=as.integer(opt.npc))
sc$tl$pca(adata_hvg, svd_solver='arpack')
adata$obsm[["X_pca"]] <- adata_hvg$obsm[["X_pca"]]
npc <- ncol(adata$obsm[["X_pca"]])
adata$obsm[["X_pca_top"]] <- adata$obsm[["X_pca"]][,seq_len(min(opt.npc,npc)),drop=F]
adata_hvg <- NULL
gc()
loginfo(sprintf("use.harmony: %s",use.harmony))
use.rd <- "X_pca_top"
tic("neighbors ...")
if(use.harmony || (!is.null(method.integration) && method.integration=="Harmony")){
sc$external$pp$harmony_integrate(adata, "batchV",basis=use.rd,max_iter_harmony=30L)
sc$pp$neighbors(adata, n_neighbors=as.integer(20), n_pcs=opt.npc,use_rep="X_pca_harmony")
}else{
sc$pp$neighbors(adata, n_neighbors=as.integer(20), n_pcs=opt.npc,use_rep=use.rd)
}
toc()
tic("umap ...")
sc$tl$umap(adata)
toc()
adata$write_h5ad(sprintf("%s.scanpy.step1.h5ad",out.prefix))
plot.general()
}
#### clustring
#res.test <- seq(0.1,2.4,0.1)
tic("leiden ...")
res.cls <- llply(res.test,function(res.i){
tic(sprintf("____ leiden with resolution %s ...",res.i))
###adata.i <- adata[,adata$var$highly_variable==TRUE]
adata.i <- adata[,1:10]
sc$tl$leiden(adata.i, resolution=res.i)
cls.i <- sprintf("C%02d",adata.i$obs[["leiden"]])
toc()
adata.i <- NULL
gc()
return(cls.i)
},.parallel=T)
names(res.cls) <- sprintf("leiden.%s",res.test)
for(xx in names(res.cls)){
adata$obs[[xx]] <- res.cls[[xx]]
}
toc()
adata$obs[["ClusterID"]] <- adata$obs[[sprintf("leiden.%s",opt.res)]]
##### save result
adata$write_h5ad(sprintf("%s.scanpy.h5ad",out.prefix))
cat(sprintf("data saved!\n"))
plot.resolution(resolution.vec=res.test)
}
#####
{
adata$obs[["ClusterID"]] <- adata$obs[[sprintf("leiden.%s",opt.res)]]
###plt$figure(figsize=c(4,5), dpi=300)
sp <- plt$subplots(figsize=c(5,4))
sc$pl$umap(adata, color="ClusterID",ax=sp[[2]])
plt$tight_layout()
plt$savefig(sprintf("%s.%s.%s.png",out.prefix,"umap","ClusterID"))
}
#############################
## violin
if(F)
{
nCls <- length(table(adata$obs$ClusterID))
l_ply(seq_along(g.geneOnUmap.list),function(i){
gene.tmp <- intersect(g.geneOnUmap.list[[i]],rownames(adata$var))
glist.i <- g.geneOnUmap.list[i]
glist.i[[1]] <- gene.tmp
if(length(gene.tmp)>0){
adata.plot <- adata[,rownames(adata$var) %in% gene.tmp]
#sc$pp$scale(adata.plot)
plt$figure(figsize=c(8,8), dpi=300)
###sc$pl$umap(adata, ncols=as.integer(3), color=gene.tmp,vmax="p99",frameon=F)
##sc$pl$stacked_violin(adata.plot, glist.i, groupby='ClusterID',swap_axes=F,vmin=-1,vmax=5)
sc$pl$stacked_violin(adata.plot, glist.i, groupby='ClusterID',swap_axes=F,vmin=0,vmax=8)
plt$savefig(sprintf("%s.violin.marker.%s.png",out.prefix,names(glist.i)))
adata.plot <- NULL
gc()
}
},.parallel=F)
}
if(do.deg)
{
dir.create(sprintf("%s/deg",dirname(out.prefix)),F,T)
tic("deg")
sc$tl$rank_genes_groups(adata,'ClusterID', method='t-test',use_raw=F)
#sc$tl$rank_genes_groups(adata,'ClusterID', method='t-test',groups=list('C12'),use_raw=F)
toc()
dedf = sc$get$rank_genes_groups_df(adata,group=NULL)
saveRDS(dedf,file=sprintf("%s.scanpy.deg.ttest.rds",out.prefix))
###adata$write_h5ad(sprintf("%s.scanpy.wDEG.h5ad",out.prefix))
###plt$figure(figsize=c(5,4), dpi=300)
sp <- plt$subplots(figsize=c(5,4))
sc$pl$rank_genes_groups(adata, n_genes=25L, sharey=FALSE,ax=sp[[2]])
plt$savefig(sprintf("%s.deg.marker.%s.png",out.prefix,"00"))
#
# tic("limma")
# de.out <- ssc.DEGene.limma(sce,assay.name=assay.name,ncell.downsample=ncell.deg,
# group.var="ClusterID",batch=if(nBatch>1) "batchV" else NULL,
# out.prefix=sprintf("%s/limma/%s",
# dirname(out.prefix),basename(out.prefix)),
# n.cores=ncores,verbose=3, group.mode="multiAsTwo",
# T.logFC=if(platform=="SmartSeq2") 1 else 0.25)
# toc()
#
# saveRDS(de.out,file=sprintf("%s.de.out.limma.rda",
# sprintf("%s/limma/%s",
# dirname(out.prefix),
# basename(out.prefix))))
}
return(list("adata"=adata))
}
#' Wraper for running Scanorama
#' @importFrom Seurat CreateSeuratObject SetAssayData GetAssayData CellCycleScoring AddModuleScore ScaleData SCTransform RunPCA ProjectDim RunUMAP RunTSNE FindNeighbors FindClusters Embeddings DimPlot NoLegend
#' @importFrom SingleCellExperiment `reducedDim<-`
#' @importFrom SummarizedExperiment assayNames assay `assay<-` rowData `rowData<-` colData `colData<-`
#' @param seu object of \code{Seurat}
#' @param col.batch character; column name indicating batches (default: "batchV")
#' @param assay.slot character; which slot to be used? (default: "data")
#' @param ... ; passed to scanorama$integrate
#' @return a Seurat object
#' @details run the Scanorama
#' @export
run.Scanorama <- function(seu,col.batch="batchV",assay.slot="data",...)
{
require("reticulate")
require("plyr")
seu@meta.data[[col.batch]] <- as.character(seu[[]][[col.batch]])
b.vec <- unique(seu@meta.data[[col.batch]])
datasets <- lapply(b.vec,function(x){
seu.x <- seu[, seu[[]][[col.batch]] == x]
####ERROR: Data sets must be numpy array or scipy.sparse.csr_matrix, received type <class 'scipy.sparse.csc.csc_matrix'>.
####t(GetAssayData(seu.x,assay.slot)[VariableFeatures(seu),])
t(as.matrix(GetAssayData(seu.x,assay.slot)[VariableFeatures(seu),]))
})
### cannot have names
###names(datasets) <- b.vec
genes_list <- lapply(b.vec,function(x){ VariableFeatures(seu) })
### cannot have names
###names(genes_list) <- b.vec
scanorama <- import('scanorama')
integrated.data <- scanorama$integrate(datasets, genes_list,...)
rd.scanorama <- do.call(rbind,integrated.data[[1]])
rownames(rd.scanorama) <- colnames(seu)
colnames(rd.scanorama) <- sprintf("Scanorama_%d",seq_len(ncol(rd.scanorama)))
suppressWarnings({
rd.data <- Seurat::CreateDimReducObject(
embeddings = rd.scanorama,
stdev = as.numeric(apply(rd.scanorama, 2, stats::sd)),
key = "Scanorama_"
)
})
seu[["Scanorama"]] <- rd.data
return(seu)
}
#' Identification of gamma delta T cells (Fred's method)
#' @importFrom SingleCellExperiment rowData
#' @importFrom SummarizedExperiment assay assayNames
#' @importFrom sscClust classify.outlier
#' @param obj object of \code{SingleCellExperiment}; assay "counts" is required.
#' @param GSx character vector; gdT marker gene list. (default: c("CD3D","CD3E","TRDC","TRGC1","TRGC2")).
#' @param GSy character vector; CD8 T cell marker gene list. (default: c("CD8A","CD8B")).
#' @param col.name character; prefix of column names, to be added to obj. (default: "Score.gammaDeltaT")
#' @param th.score double; threshold of the score. (default: 0.35)
#' @param out.prefix character; output prefix. (default: NULL)
#' @return a SingleCellExperiment object
#' @export
cal.signatureScore.gdT.Fred <- function(obj,GSx=c("CD3D","CD3E","TRDC","TRGC1","TRGC2"),
GSy=c("CD8A","CD8B"),
col.name="Score.gammaDeltaT",th.score=0.35,out.prefix=NULL)
{
if(!all(GSx %in% rowData(obj)$display.name) || !all(GSy %in% rowData(obj)$display.name)){
warning(sprintf("not all genes in GSx and GSy in the obj !!\n"))
return(obj)
}
if(!("counts" %in% assayNames(obj))){
return(obj)
}
f.gene <- which(rowData(obj)$display.name %in% GSx)
GSx <- rowData(obj)$display.name[f.gene]
f.gene <- which(rowData(obj)$display.name %in% GSy)
GSy <- rowData(obj)$display.name[f.gene]
count.tot <- colSums(assay(obj,"counts"))
GSx.tot <- colSums(assay(obj,"counts")[names(GSx),])
GSy.tot <- colSums(assay(obj,"counts")[names(GSy),])
GSx.score <- GSx.tot/count.tot
GSy.score <- -GSy.tot/count.tot
GSx.score <- (GSx.score-min(GSx.score))/(max(GSx.score)-min(GSx.score))
GSy.score <- (GSy.score-min(GSy.score))/(max(GSy.score)-min(GSy.score))
comb.score <- GSx.score * GSy.score
colData(obj)[[col.name]] <- comb.score
colData(obj)[[sprintf("%s.pred.th",col.name)]] <- (comb.score > th.score)
print(table(obj$stype,obj[[sprintf("%s.pred.th",col.name)]]))
bin.tb <- sscClust::classify.outlier(comb.score,out.prefix=out.prefix,e.name="Score",th.score=th.score)
all(bin.tb$sample==colnames(obj))
colData(obj)[[sprintf("%s.pred.auto",col.name)]] <-(bin.tb$score.cls.tb$classification==1)
print(table(obj$stype,obj[[sprintf("%s.pred.auto",col.name)]]))
freq.tb <- unclass(table(obj$stype,obj[[sprintf("%s.pred.auto",col.name)]]))
freq.tb <- 100*sweep(freq.tb,1,rowSums(freq.tb),"/")
### subtypes: delta 1 and delta 2
gene.GC1 <- c("TRGC1")
gene.GC2 <- c("TRGC2")
f.gene <- which(rowData(obj)$display.name %in% gene.GC1)
gene.GC1 <- rowData(obj)$display.name[f.gene]
f.gene <- which(rowData(obj)$display.name %in% gene.GC2)
gene.GC2 <- rowData(obj)$display.name[f.gene]
exp.diff <- assay(obj,"norm_exprs")[names(gene.GC1),] - assay(obj,"norm_exprs")[names(gene.GC2),]
f.isGammDeltaT <- obj[[sprintf("%s.pred.auto",col.name)]]==T
colData(obj)[[sprintf("%s.auto.subtype",col.name)]] <- "NotGammaDeltaT"
colData(obj)[[sprintf("%s.auto.subtype",col.name)]][exp.diff==0 & f.isGammDeltaT] <- "undetermined"
colData(obj)[[sprintf("%s.auto.subtype",col.name)]][exp.diff>0 & f.isGammDeltaT] <- "delta2"
colData(obj)[[sprintf("%s.auto.subtype",col.name)]][exp.diff<0 & f.isGammDeltaT] <- "delta1"
colData(obj)[[sprintf("%s.auto.subtype",col.name)]] <- factor(colData(obj)[[sprintf("%s.auto.subtype",col.name)]],
levels=c("delta1","delta2","undetermined","NotGammaDeltaT"))
table(obj[[sprintf("%s.pred.auto",col.name)]],obj[[sprintf("%s.auto.subtype",col.name)]])
return(obj)
}
#' Identification of gamma delta T cells (simple average-threshold method)
#' @importFrom SingleCellExperiment rowData
#' @importFrom SummarizedExperiment assay
#' @importFrom data.table setDT
#' @importFrom ggplot2 ggplot geom_density theme_bw geom_vline facet_wrap ggsave aes_string
#' @param obj object of \code{SingleCellExperiment}
#' @param out.prefix character; output prefix. (default: NULL)
#' @param assay.name character vector; which assay to use. (default: "norm_exprs").
#' @param vis.v double vector; for vertical lines in visulization. (default: c(0.25,0.5)).
#' @param Th.CD3 double; threshold for T cell signature. (default: 0.25)
#' @param Th.DC double; threshold for delta receptor constant chain. (default: 0.25)
#' @param Th.GC1 double; threshold for gamma receptor constant chain 1. (default: 0.25)
#' @param Th.GC2 double; threshold for gamma receptor constant chain 2. (default: 0.25)
#' @return a SingleCellExperiment object
#' @export
inSilico.TGammaDelta <- function(obj,out.prefix=NULL,assay.name="norm_exprs",vis.v=c(0.25,0.5),
Th.CD3=0.25,Th.DC=0.25,Th.GC1=0.25,Th.GC2=0.25)
{
#library("data.table")
#library("ggplot2")
gene.to.test <- c("CD3D","CD3G","TRDC","TRGC1","TRGC2")
f.gene <- which(rowData(obj)$display.name %in% gene.to.test)
gene.to.test <- structure(rowData(obj)$display.name[f.gene],names=rownames(obj)[f.gene])
gene.to.test <- gene.to.test[order(gene.to.test)]
dat.plot <- as.data.frame(t(as.matrix(assay(obj,assay.name)[names(gene.to.test),])))
setDT(dat.plot,keep.rownames=T)
colnames(dat.plot)[-1] <- gene.to.test
dat.plot[,"CD3"] <- apply(dat.plot[,c("CD3D","CD3G")],1,mean)
dat.plot.melt <- melt((dat.plot),id.vars="rn")
colnames(dat.plot.melt) <- c("cell","gene","norm_exprs")
if(!is.null(out.prefix)){
dir.create(dirname(out.prefix),F,T)
p <- ggplot(dat.plot.melt, aes_string(x="norm_exprs", fill = "gene", colour = "gene")) +
geom_density(alpha = 0.1) +
theme_bw() +
geom_vline(xintercept = vis.v,linetype=2) +
facet_wrap(~gene,ncol=3,scales="free_y")
ggsave(sprintf("%s.inSiliso.marker.density.pdf",out.prefix),width=7,height=3.5)
}
obj$TGammaDelta <- "unknown"
obj$TGammaDelta[dat.plot[["CD3"]] > Th.CD3 & dat.plot[["TRDC"]] > Th.DC ] <- "undetermined"
obj$TGammaDelta[dat.plot[["CD3"]] > Th.CD3 & dat.plot[["TRDC"]] > Th.DC & ( dat.plot[["TRGC1"]] > Th.GC1 & dat.plot[["TRGC2"]] < Th.GC2 ) ] <- "delta2"
obj$TGammaDelta[dat.plot[["CD3"]] > Th.CD3 & dat.plot[["TRDC"]] > Th.DC & ( dat.plot[["TRGC1"]] < Th.GC1 & dat.plot[["TRGC2"]] > Th.GC2 ) ] <- "delta1"
table(obj$TGammaDelta)
#bin.tb <- sscClust::classify.outlier(dat.plot[["TRDC"]],out.prefix=out.prefix,e.name="TRDC",th.score=0.25)
##write.table(colData(obj),sprintf("%s.cellInfo.txt",out.prefix),row.names=F,sep="\t",quote=F)
return(obj)
}
#' Identification of T cells (simple average-threshold method)
#' @importFrom SingleCellExperiment rowData
#' @importFrom SummarizedExperiment assay
#' @importFrom sscVis ssc.scale
#' @importFrom data.table setDT
#' @importFrom ggpubr ggdensity
#' @importFrom ggplot2 ggplot geom_density theme_bw theme geom_vline facet_wrap ggsave
#' @param obj object of \code{SingleCellExperiment} or \code{AnnDataR6}
#' @param out.prefix character; output prefix. (default: NULL)
#' @param assay.name character vector; which assay to use. (default: "norm_exprs").
#' @param vis.v double vector; for vertical lines in visulization. (default: c(0.25,0.5)).
#' @param Th.CD3 double; threshold for T cell signature (mean of CD3D, CD3G). (default: 0.25)
#' @param Th.CD8 double; threshold for CD8 (mean of CD8A, CD8B). (default: 0.25)
#' @param Th.CD4 double; threshold for CD4 (expression of CD4). (default: 0.25)
#' @param Th.TH double; threshold for Thelper signature (mean of CD4, CD40LG). (default: 0.25)
#' @param Th.TR double; threshold for Treg signature (mean of CD4, FOXP3). (default: 0.25)
#' @param Th.DC double; threshold for delta receptor constant chain. (default: 0.25)
#' @param Th.GC1 double; threshold for gamma receptor constant chain 1. (default: 0.25)
#' @param Th.GC2 double; threshold for gamma receptor constant chain 2. (default: 0.25)
#' @param do.zscore logical; whether use zscore for calculation. (default: FALSE)
#' @param do.rescue logical; whether use "rescue" mode. (default: FALSE)
#' @return a SingleCellExperiment object
#' @details columns stype and gdType will be added to the obj
#' @export
inSilico.TCell <- function(obj, out.prefix, assay.name="norm_exprs",vis.v=c(0.25,0.5),
Th.CD3=0.25,Th.CD8=0.5,Th.CD4=0.5,Th.TH=0.25,Th.TR=0.25,do.zscore=F,
Th.DC=0.25,Th.GC1=0.25,Th.GC2=0.25,
do.rescue=T)
{
#### in silico classification
#library("data.table")
#library("ggplot2")
#library("ggpubr")
##gene.to.test <- c("CD4","CD8A","CD8B","CD3D","CD3E","CD3G","CD40LG","FOXP3","IL2RA")
gene.to.test <- c("CD4","CD8A","CD8B","CD3D","CD3G","CD40LG","FOXP3","IL2RA",
"TRDC","TRGC1","TRGC2")
dat.plot <- NULL
if(class(obj)[1]=="SingleCellExperiment"){
f.gene <- which(rowData(obj)$display.name %in% gene.to.test)
if(do.zscore){
obj.z <- obj[f.gene,]
obj.z <- ssc.scale(obj.z,gene.symbol=gene.to.test,assay.name=assay.name,
adjB="batchV",do.scale=T)
gene.to.test <- structure(rowData(obj.z)$display.name,names=rownames(obj.z))
gene.to.test <- gene.to.test[order(gene.to.test)]
dat.plot <- as.data.frame(t(as.matrix(assay(obj.z,sprintf("%s.scale",assay.name))[names(gene.to.test),])))
colnames(dat.plot) <- gene.to.test
}else{
gene.to.test <- structure(rowData(obj)$display.name[f.gene],names=rownames(obj)[f.gene])
gene.to.test <- gene.to.test[order(gene.to.test)]
dat.plot <- as.data.frame(t(as.matrix(assay(obj,assay.name)[names(gene.to.test),])))
colnames(dat.plot) <- gene.to.test
}
}else if(class(obj)[1]=="AnnDataR6"){
f.gene <- which(rownames(obj$var) %in% gene.to.test)
obj.tmp <- obj[,f.gene]
if(do.zscore){ sc$pp$scale(obj.tmp) }
dat.plot <- as.data.frame(as.matrix(obj.tmp$X))
}
##dat.plot[,"CD3"] <- apply(dat.plot[,c("CD3D","CD3E","CD3G")],1,mean)
dat.plot[,"CD3"] <- apply(dat.plot[,c("CD3D","CD3G")],1,mean)
dat.plot[,"CD8"] <- apply(dat.plot[,c("CD8A","CD8B")],1,mean)
dat.plot[,"TH"] <- apply(dat.plot[,c("CD4","CD40LG")],1,mean)
dat.plot[,"TR"] <- apply(dat.plot[,c("CD4","FOXP3")],1,mean)
dat.plot.melt <- melt(as.matrix(dat.plot))
colnames(dat.plot.melt) <- c("cell","gene","norm_exprs")
if(!file.exists(dirname(out.prefix))){
dir.create(dirname(out.prefix),F,T)
}
#p <- ggplot(dat.plot.melt, aes(norm_exprs, fill = gene, colour = gene)) +
# geom_density(alpha = 0.1) +
p <- ggdensity(dat.plot.melt,x="norm_exprs",fill="gene",color="gene",
alpha=0.1) +
geom_vline(xintercept = vis.v,linetype=2) +
facet_wrap(~gene,ncol=3,scales="free_y") +
theme_bw() +
theme(legend.position="none")
ggsave(sprintf("%s.inSiliso.marker.density.pdf",out.prefix),width=7,height=6)
stype.vec <- rep("unknown",nrow(dat.plot))
stype.vec[dat.plot[,"CD3"] > Th.CD3 & dat.plot[,"CD8"] > Th.CD8 & dat.plot[,"CD4"] <= Th.CD4] <- "CD8"
stype.vec[dat.plot[,"CD3"] > Th.CD3 & dat.plot[,"CD8"] > Th.CD8 & dat.plot[,"CD4"] > Th.CD4] <- "DP"
stype.vec[dat.plot[,"CD3"] > Th.CD3 & dat.plot[,"CD8"] <= Th.CD8 & dat.plot[,"CD4"] > Th.CD4] <- "CD4"
stype.vec[dat.plot[,"CD3"] > Th.CD3 & dat.plot[,"CD8"] <= Th.CD8 & dat.plot[,"CD4"] <= Th.CD4] <- "DN"
stype.strict.vec <- stype.vec
#####
has.gd.gene <- F
x.gdType <- NULL
if(all(c("TRDC","TRGC1","TRGC2") %in% colnames(dat.plot))){
x.gdType <- rep("unknown",nrow(dat.plot))
x.gdType[ dat.plot[,"CD3"] > Th.CD3 & dat.plot[,"TRDC"] > Th.DC ] <- "undetermined"
x.gdType[ dat.plot[,"CD3"] > Th.CD3 & dat.plot[,"TRDC"] > Th.DC &
(dat.plot[,"TRGC1"] > Th.GC1 & dat.plot[,"TRGC2"] <= Th.GC2 ) ] <- "delta2"
x.gdType[ dat.plot[,"CD3"] > Th.CD3 & dat.plot[,"TRDC"] > Th.DC &
(dat.plot[,"TRGC1"] <= Th.GC1 & dat.plot[,"TRGC2"] > Th.GC2 ) ] <- "delta1"
has.gd.gene <- T
}else{
warning(sprintf("There are no gd genes found in the data\n"))
}
#####
f.cell <- stype.vec=="DN" & (dat.plot[,"TH"] > Th.TH | dat.plot[,"TR"] > Th.TR)
###f.cell <- obj$stype=="DN" & dat.plot[,"CD4"] > 0 & (dat.plot[,"TH"] > Th.TH | dat.plot[,"TR"] > Th.TR)
#print(table(obj$stype))
cat(sprintf("numbe of cells can be rescued: %d\n",sum(f.cell)))
stype.rescue.vec <- NULL
if(do.rescue){
### todo: add requirement: DN and not gamma delta T ?
stype.rescue.vec <- stype.vec
stype.rescue.vec[f.cell] <- "CD4"
}
if(class(obj)[1]=="SingleCellExperiment"){
obj$stype <- stype.vec
obj$stype.strict <- stype.strict.vec
obj$gdType <- x.gdType
obj$stype.rescue <- stype.rescue.vec
}else if(class(obj)[1]=="AnnDataR6"){
obj$obs$stype <- stype.vec
obj$obs$stype.strict <- stype.strict.vec
obj$obs$stype.rescue <- stype.rescue.vec
}
####write.table(colData(obj),sprintf("%s.cellInfo.txt",out.prefix),row.names=F,sep="\t",quote=F)
return(obj)
}
#' calculate the signature score of one potential contaminated cell types (simple average-threshold method)
#' @importFrom SummarizedExperiment assay `colData<-`
#' @importFrom Seurat GetAssayData
#' @importFrom data.table data.table melt
#' @importFrom Matrix colMeans rowMeans
#' @importFrom ggplot2 ggplot geom_density geom_vline facet_wrap ggsave aes_string
#' @importFrom ggpubr theme_pubr
#' @param obj object of \code{SingleCellExperiment} or \code{Seurat}
#' @param out.prefix character; output prefix. (default: NULL)
#' @param assay.name character vector; which assay to use. (default: "norm_exprs").
#' @param g.name character; signature name of the cell type. (default: "plasmaB").
#' @param g.test character vector; signature genes of the cell type. (default: c("CD79A", "JCHAIN", "SDC1")).
#' @param score.t double; threshold for signature score (mean of g.test). (default: 1)
#' @param vis.v double vector; for vertical lines in visulization. (default: c(0.25,0.5,1)).
#' @return an object of \code{SingleCellExperiment} or \code{Seurat}
#' @details Two columns will be added to the obj, "%s.score" and "%s.class, where %s is the g.name.
#' @export
fill.contamination <- function(obj,out.prefix,assay.name="norm_exprs",
g.name="plasmaB",g.test=c("CD79A", "JCHAIN", "SDC1"),
score.t=0.75,vis.v=c(0.25,0.5,0.75,1))
{
#require("ggplot2")
#require("data.table")
#require("Matrix")
sigSize <- length(g.test)
g.test.i <- g.test
if(class(obj)[1]=="SingleCellExperiment"){
g.test <- ssc.displayName2id(obj, display.name = g.test)
if(length(g.test) < sigSize){
warning(sprintf("it seems some genes of %s are not in the data.",paste(g.test.i,collapse=",")))
return(obj)
}
exp.data <- assay(obj,assay.name)[g.test,,drop=F]
rownames(exp.data) <- names(g.test)
Sig.Score.tb <- data.table(cell=colnames(obj),Sig.Score= colMeans(exp.data))
Sig.Score.tb <- cbind(Sig.Score.tb,t(as.matrix(exp.data)))
colData(obj)[[sprintf("%s.score",g.name)]] <- Sig.Score.tb$Sig.Score
colData(obj)[[sprintf("%s.class",g.name)]] <- obj[[sprintf("%s.score",g.name)]] > score.t
}else if(class(obj)[1]=="Seurat"){
g.test <- intersect(g.test,rownames(obj))
if(length(g.test) < sigSize){
warning(sprintf("it seems some genes of %s are not in the data.",paste(g.test.i,collapse=",")))
return(obj)
}
exp.data <- GetAssayData(obj,"data")[g.test,,drop=F]
Sig.Score.tb <- data.table(cell=colnames(obj),Sig.Score= colMeans(exp.data))
Sig.Score.tb <- cbind(Sig.Score.tb,t(as.matrix(exp.data)))
obj[[sprintf("%s.score",g.name)]] <- Sig.Score.tb$Sig.Score
obj[[sprintf("%s.class",g.name)]] <- obj[[sprintf("%s.score",g.name)]] > score.t
}else if(class(obj)[1]=="AnnDataR6"){
g.test <- intersect(g.test,rownames(obj$var))
if(length(g.test) < sigSize){
warning(sprintf("it seems some genes of %s are not in the data.",paste(g.test.i,collapse=",")))
return(obj)
}
obj.tmp <- obj[,rownames(obj$var) %in% g.test]
Sig.Score.tb <- data.table(cell=rownames(obj.tmp$obs),Sig.Score=Matrix::rowMeans(obj.tmp$X))
Sig.Score.tb <- cbind(Sig.Score.tb,(as.matrix(obj.tmp$X)))
obj$obs[[sprintf("%s.score",g.name)]] <- Sig.Score.tb$Sig.Score
obj$obs[[sprintf("%s.class",g.name)]] <- obj$obs[[sprintf("%s.score",g.name)]] > score.t
}else{
return(obj)
}
Sig.Score.tb.melt <- melt(Sig.Score.tb,id="cell")
colnames(Sig.Score.tb.melt) <- c("cell","gene","norm_exprs")
if(!file.exists(dirname(out.prefix))){
dir.create(dirname(out.prefix),F,T)
}
p <- ggplot(Sig.Score.tb.melt, aes_string(x="norm_exprs", fill = "gene", colour = "gene")) +
geom_density(alpha = 0.1) +
geom_vline(xintercept = vis.v,linetype=2) +
facet_wrap(~gene,ncol=2,scales="free_y") +
theme_pubr() +
theme(legend.position="none")
ggsave(sprintf("%s.marker.%s.density.pdf",out.prefix,g.name),
width=6,
height=2.5*round(0.5+length(g.test)/2))
return(obj)
}
#' calculate the proliferation score
#' @importFrom SingleCellExperiment rowData
#' @importFrom SummarizedExperiment assay
#' @importFrom sscClust classify.outlier
#' @importFrom data.table as.data.table data.table
#' @param obj object of \code{SingleCellExperiment}
#' @param gene.prol character vector; genes to use.
#' @param assay.name character vector; which assay to use. (default: "norm_exprs").
#' @param out.prefix character; output prefix. (default: NULL)
#' @param method character; method to use. (default: "mean").
#' @return a list
#' @details calculate the proliferation score.
#' @export
calProliferationScore <- function(obj,gene.prol,assay.name="norm_exprs",out.prefix=NULL,method="mean")
{
f.gene <- rowData(obj)[,"display.name"] %in% gene.prol
exp.sub <- as.matrix(assay(obj[f.gene,],assay.name))
f.zero <- apply(exp.sub,1,function(x){ all(x==0) })
if(sum(f.zero) > 0){
cat(sprintf("Number of gene with value zero in all cells: %d\n",sum(f.zero)))
}
exp.sub <- exp.sub[!f.zero,]
dat.score <- NULL
out.tb <- NULL
if(method=="mean")
{
score.prol <- colMeans(exp.sub)
dat.score <- classify.outlier(score.prol,out.prefix=out.prefix)
out.tb <- as.data.table(dat.score$score.cls.tb)
colnames(out.tb)[1] <- "cellID"
}
#### else if(method=="AUCell") {
#### require("AUCell")
#### #####
#### #f.gene <- rowData(obj)[,"display.name"] %in% gene.prol
#### #exp.sub <- as.matrix(assay(obj,assay.name))
#### exp.sub <- (assay(obj,assay.name))
#### rownames(exp.sub) <- rowData(obj)[,"display.name"]
#### f.zero <- apply(exp.sub,1,function(x){ all(x==0) })
#### if(sum(f.zero) > 0){
#### cat(sprintf("Number of gene with value zero in all cells: %d\n",sum(f.zero)))
#### }
#### exp.sub <- exp.sub[!f.zero,]
####
#### #####
#### pdf(sprintf("%s.buildRankings.1.pdf",out.prefix),width=7,height=7)
#### cells_rankings <- AUCell_buildRankings(exp.sub)
#### dev.off()
####
#### geneSets <- list("prol"=intersect(rownames(exp.sub),gene.prol))
#### #### geneSets <- GSEABase::GeneSet(genes, setName="geneSet1") # alternative
#### cells_AUC <- AUCell_calcAUC(geneSets, cells_rankings, aucMaxRank=nrow(cells_rankings)*0.05)
####
#### pdf(sprintf("%s.exploreThresholds.1.pdf",out.prefix),width=7,height=7)
#### #par(mfrow=c(3,3))
#### set.seed(123)
#### cells_assignment <- AUCell_exploreThresholds(cells_AUC, plotHist=TRUE, nCores=1, assign=TRUE)
#### dev.off()
####
#### if("L_k2" %in% rownames(cells_assignment$prol$aucThr$thresholds)){
#### th.prol <- cells_assignment$prol$aucThr$thresholds["L_k2","threshold"]
#### }else{
#### th.prol <- 0.09
#### }
####
#### ##geneSetName <- rownames(cells_AUC)[grep("prol", rownames(cells_AUC))]
#### pdf(sprintf("%s.exploreThresholds.2.pdf",out.prefix),width=7,height=7)
#### AUCell_plotHist(cells_AUC["prol",], aucThr=th.prol)
#### abline(v=th.prol)
#### dev.off()
####
#### out.tb <- data.table(cellID=colnames(cells_AUC),
#### proliferationScore.bin=as.integer(getAUC(cells_AUC)["prol",]>th.prol),
#### proliferationScore=getAUC(cells_AUC)["prol",])
#### out.tb$classification <- out.tb$proliferationScore.bin
#### dat.score <- cells_AUC
####
#### my.dat.score <- classify.outlier(getAUC(cells_AUC)["prol",],out.prefix=out.prefix)
#### out.tb[,myCls:=my.dat.score$score.cls.tb[cellID,"classification"]]
####
#### }
return(list("out.tb"=out.tb,"detail"=dat.score))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.