scPip: pipeline for scRNA-seq data analysis

#!/usr/bin/env Rscript

suppressPackageStartupMessages(library("argparse"))

parser <- ArgumentParser()
parser$add_argument("-a", "--aFile", type="character", required=TRUE, help="input seu file list")
parser$add_argument("-b", "--bFile", type="character", required=TRUE, help="input sce file list")
parser$add_argument("-o", "--outPrefix", type="character", required=TRUE, help="outPrefix")
parser$add_argument("-q", "--specie", type="character", default="human", help="one of human, mouse [default %(default)s]")
parser$add_argument("-c", "--stype", type="character", help="only analyze stype specified (default all)")
parser$add_argument("-u", "--geneIDFile", type="character", help="gene id mapping file")
##parser$add_argument("-d", "--npc", type="integer",default=15L, help="[default %(default)s]")
parser$add_argument("-d", "--npc", type="character",default="15", help="[default %(default)s]")
parser$add_argument("-n", "--ncores", type="integer",default=16L, help="[default %(default)s]")
parser$add_argument("-m", "--measurement",type="character",default="counts",help="[default %(default)s]")
parser$add_argument("-r", "--resolution",type="character",default="2",help="[default %(default)s]")
parser$add_argument("-t", "--scTransform",action="store_true",default=FALSE,help="[default %(default)s]")
parser$add_argument("-y", "--harmony",action="store_true",default=FALSE,help="[default %(default)s]")
parser$add_argument("-i", "--integration",type="character",help="[default %(default)s]")
parser$add_argument("-g", "--deg",action="store_true",default=FALSE,help="[default %(default)s]")
parser$add_argument("-w", "--ncellDEG",type="integer",default=1500,
                    help="number of cells to downsample to for each group. used in DEG analysis. [default %(default)s]")
parser$add_argument("-s", "--scale",action="store_true",default=FALSE,help="[default %(default)s]")
parser$add_argument("-j", "--corVar", type="character", default="S.Score,G2M.Score,DIG.Score1,percent.mito,batchV",
		    help="subset of S.Score,G2M.Score,DIG.Score1,ISG.Score1, or NULL. [default %(default)s]")
parser$add_argument("-f", "--filterout",type="character",help="Format is COLUMN_ID:COLUMN_VAL_1,COLUMN_VAL_2,COLUMN_VAL_3. Filter out cells with COLUMN_ID in one of COLUMN_VAL_1, COLUMN_VAL_2, and COLUMN_VAL_3.")
parser$add_argument("-k", "--keep",type="character",help="Format is COLUMN_ID:COLUMN_VAL_1,COLUMN_VAL_2,COLUMN_VAL_3. Keep only cells with COLUMN_ID in one of COLUMN_VAL_1, COLUMN_VAL_2, and COLUMN_VAL_3.")
parser$add_argument("-x", "--removeContamination",type="character",help="comma separated string indicates subset of predefined signature (plasmaB, caf, epi, T, cd8, and mac) will be calculated and cells with high signature scores will be removed. For example, use plasmaB:0.75,caf:0.75,epi:0.75,T:0.25 for myeloid cell analysis. The numer after colon is the threshold")
parser$add_argument("-p", "--platform",type="character",default="10X",
                    help="platform such as 10X, SmartSeq2 [default %(default)s)]")
args <- parser$parse_args()
print(args)

############## tune parametrs  ########

seu.file <- args$aFile
sce.file <- args$bFile
out.prefix <- args$outPrefix
###opt.npc <- args$npc
opt.npc <- eval(parse(text = args$npc))
opt.ncores <- args$ncores
opt.measurement <- args$measurement
opt.platform <- args$platform
opt.stype <- args$stype
opt.resolution <- args$resolution
opt.scTransform <- args$scTransform
opt.harmony <- args$harmony
opt.integration <- args$integration
opt.doDEG <- args$deg
opt.ncell.deg <- args$ncellDEG
opt.scale <- args$scale
opt.cor.var <- if(args$corVar=="") c("") else unlist(strsplit(args$corVar,",",perl=T))
opt.filterout <- args$filterout
opt.keep <- args$keep
opt.geneIDFile <- args$geneIDFile
opt.removeContamination <- args$removeContamination
opt.specie <- args$specie

dir.create(dirname(out.prefix),F,T)

saveRDS(args,file=sprintf("%s.args.rds",out.prefix))
###args <- readRDS(file=sprintf("%s.args.rds",out.prefix))

############## tune parametrs  ########
suppressMessages(library("sscVis"))
suppressMessages(library("sscClust"))
suppressMessages(library("Seurat"))
suppressMessages(library("tictoc"))
suppressMessages(library("plyr"))
suppressMessages(library("dplyr"))
suppressMessages(library("tibble"))
suppressMessages(library("doParallel"))
suppressMessages(library("Matrix"))
suppressMessages(library("data.table"))
suppressMessages(library("R.utils"))
suppressMessages(library("gplots"))
suppressMessages(library("ggplot2"))
suppressMessages(library("ggpubr"))
suppressMessages(library("cowplot"))
suppressMessages(library("limma"))
suppressMessages(library("reticulate"))
suppressMessages(library("scPip"))
options(stringsAsFactors = FALSE)

dat.ext.dir <- system.file("extdata",package="scPip")
if(opt.specie=="human"){
    ###gene.exclude.file <- sprintf("%s/exclude.gene.misc.human.v3.RData",dat.ext.dir)
    gene.exclude.file <- sprintf("%s/exclude.gene.misc.human.v4.RData",dat.ext.dir)
}else if(opt.specie=="mouse"){
    ###gene.exclude.file <- sprintf("%s/exclude.gene.misc.mouse.v3.RData",dat.ext.dir)
    gene.exclude.file <- sprintf("%s/exclude.gene.misc.mouse.v4.RData",dat.ext.dir)
}else{
    gene.exclude.file <- NULL
}

if(!is.null(gene.exclude.file)){
    env.misc <- loadToEnv(gene.exclude.file)
    g.all.gene.ignore.df <- env.misc$all.gene.ignore.df
    g.all.gene.ignore.df %>% head
}else{
    g.all.gene.ignore.df <- NULL
}

######################

gene.mapping.table <- NULL
if(!is.null(opt.geneIDFile) && file.exists(opt.geneIDFile) && grepl("\\.rds$",opt.geneIDFile,perl=T)){
    gene.mapping.table <- readRDS(opt.geneIDFile)
}

seu <- NULL
sce <- NULL

if(seu.file!="-" && file.exists(seu.file)){
    if(grepl("\\.rds$",seu.file)){
        seu <- readRDS(seu.file)
    }else{
        env.a <- loadToEnv(seu.file)
        obj.name.a <- names(env.a)[1]
        seu <- env.a[[obj.name.a]]
        rm(env.a)
    }
}
if(sce.file!="-" && file.exists(sce.file)){
    if(grepl("\\.rds$",sce.file)){
        sce <- readRDS(sce.file)
    }else{
        env.b <- loadToEnv(sce.file)
        obj.name.b <- names(env.b)[1]
        sce <- env.b[[obj.name.b]]
        rm(env.b)
    }
}

if(!is.null(seu) && !is.null(opt.stype)){
	seu <- seu[,seu$stype==opt.stype]
}

if(!is.null(seu) && "percent.mito" %in% colnames(seu[[]])){
	if(max(seu$percent.mito) <1){
		seu  <- subset(seu, subset = percent.mito<0.1)
	}else{
		seu  <- subset(seu, subset = percent.mito<10)
	}
}

##### check & clean sce #####
if(!is.null(sce) && "percent.mito" %in% colnames(colData(sce))){
    if(all(is.na(sce$percent.mito))){
        sce$percent.mito <- NULL
    }
}

if(!is.null(sce) && !("seu.id" %in% colnames(rowData(sce)))){
    rowData(sce)[["seu.id"]] <- gsub("_","-",rowData(sce)[["display.name"]])
}

if(!is.null(sce) && ("libraryID" %in% colnames(colData(sce))) && all(is.na(sce$libraryID))){
    sce$libraryID <- "Unk"
}



if(!is.null(seu) && !is.null(opt.filterout)){
	if(!file.exists(opt.filterout)){
		col.filter <- unlist(strsplit(opt.filterout,":"))[1]
		col.value <- unlist(strsplit(unlist(strsplit(opt.filterout,":"))[2],","))
        if(col.filter %in% colnames(seu[[]])){
            cat(sprintf("filter cells with %s in c(%s)\n",col.filter,paste(col.value,collapse=",")))
            f.cell <- seu[[]][,col.filter] %in% col.value
            print(summary(f.cell))
            seu <- seu[,!f.cell]
        }else{
            warning(sprintf("The meta-data doesnot contain %s\n",col.filter))
        }
	}
}

if(!is.null(opt.keep)){
    col.keep <- unlist(strsplit(opt.keep,":"))[1]
    col.value <- unlist(strsplit(unlist(strsplit(opt.keep,":"))[2],","))
    if(!file.exists(opt.keep)){
        if(!is.null(seu)){
            if(col.keep %in% colnames(seu[[]])){
                cat(sprintf("keep only cells with %s in c(%s)\n",col.keep,paste(col.value,collapse=",")))
                f.cell <- seu[[]][,col.keep] %in% col.value
                print(summary(f.cell))
                seu <- seu[,f.cell]
            }else{
                warning(sprintf("The meta-data doesnot contain %s\n",col.keep))
            }
        }else if(!is.null(sce)){
            if(col.keep %in% colnames(colData(sce))){
                cat(sprintf("keep only cells with %s in c(%s)\n",col.keep,paste(col.value,collapse=",")))
                f.cell <- colData(sce)[,col.keep] %in% col.value
                print(summary(f.cell))
                sce <- sce[,f.cell]
            }else{
                warning(sprintf("The meta-data doesnot contain %s\n",col.keep))
            }
        }
    }
}

if(!is.null(opt.removeContamination)){
    ### opt.removeContamination <- "plasmaB:0.75,caf:0.75,epi:0.75,T:0.25"

    g.cont.gene.list <- list("plasmaB"=c("JCHAIN"),
                             "B"=c("CD79A","MS4A1"),
                             "caf"=c("COL1A2", "COL1A1", "COL3A1","LUM"),
                             "epi"=c("KRT18","KRT19","EPCAM"),
                             "T"=c("CD3D","CD3G"),
                             "cd8"=c("CD8A","CD8B"),
                             "mac"=c("LYZ","C1QA","C1QB","CD68"))

    sig.vec <- unlist(strsplit(opt.removeContamination,","))
    sig.name <- unname(sapply(sig.vec,function(x){ unlist(strsplit(x,":"))[1] }))
    sig.thre <- unname(sapply(sig.vec,function(x){ as.numeric(unlist(strsplit(x,":"))[2]) }))

    for(i in seq_along(sig.name)){
        loginfo(sprintf("calculate signature score of %s cells ...",sig.name[i]))
        seu <- fill.contamination(seu,out.prefix,
                                  g.name=sig.name[i],
                                  g.test=g.cont.gene.list[[sig.name[i]]],
                                  score.t=sig.thre[i],
                                  vis.v=c(0.25,0.5,0.75,1))
    }
   
    idx.sig.class <- intersect(sprintf("%s.class",sig.name),colnames(seu[[]]))
    f.cont.mtx <- seu[[]][,idx.sig.class,drop=F]
    f.cont <- rowSums(f.cont.mtx) > 0
    loginfo(sprintf("A total number of potential contamination: %d\n",sum(f.cont)))
    print(colSums(f.cont.mtx==T))
    seu <- seu[,!f.cont]

}

if(!is.null(seu) && !is.null(sce)){
    f.cell <- intersect(colnames(seu),colnames(sce))
    seu <- seu[,f.cell]
    sce <- sce[,colnames(seu)]
}

tic("run.Seurat3")
obj.list <- run.Seurat3(seu,sce,out.prefix,
                        gene.exclude.df=g.all.gene.ignore.df,
                        n.top=1500,
						measurement=opt.measurement,platform=opt.platform,
                        use.sctransform=opt.scTransform,
                        use.harmony=opt.harmony,
                        method.integration=opt.integration,
                        do.deg=opt.doDEG,
                        ncell.deg=opt.ncell.deg,
                        gene.mapping.table=gene.mapping.table,
                        ###do.adj=T,do.scale=F,
			            cor.var=opt.cor.var,
                        do.scale=opt.scale,
                        plot.rd=c("umap"),
						opt.res=opt.resolution,
                        opt.npc=opt.npc,ncores=opt.ncores)
toc()
Japrin/scPip documentation built on April 14, 2025, 10:32 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
Japrin/scPip
pipeline for scRNA-seq data analysis

inst/script/run.seurat3.basic.R
In Japrin/scPip: pipeline for scRNA-seq data analysis

R Package Documentation

Browse R Packages

We want your feedback!

Japrin/scPip pipeline for scRNA-seq data analysis

inst/script/run.seurat3.basic.R In Japrin/scPip: pipeline for scRNA-seq data analysis

R Package Documentation

Browse R Packages

We want your feedback!

Japrin/scPip
pipeline for scRNA-seq data analysis

inst/script/run.seurat3.basic.R
In Japrin/scPip: pipeline for scRNA-seq data analysis