annoSpecies <- function(expr.data,
cell.manifest,
gene.manifest,
hg.mm.thres = 0.9,
mix.anno = c("human" = "hg19", "mouse" = "mm10")){
gene.manifest$Species <- substr(gene.manifest$EnsemblID, 1, 4)
nUMI <- Matrix::colSums(expr.data)
hg.gene <- subset(gene.manifest, Species == mix.anno["human"])$Symbol
umiHgFrac = Matrix::colSums(expr.data[hg.gene,]) / nUMI
mm.gene <- subset(gene.manifest, Species == mix.anno["mouse"])$Symbol
umiMmFrac = Matrix::colSums(expr.data[mm.gene,]) / nUMI
cell.species <- rep("middle", length(umiHgFrac))
cell.species[umiHgFrac >= hg.mm.thres] <- "human"
cell.species[umiMmFrac >= hg.mm.thres] <- "mouse"
cell.manifest$umiHgFrac = umiHgFrac
cell.manifest$umiMmFrac = umiMmFrac
cell.manifest$cell.species = cell.species
return(list(cell.manifest = cell.manifest,
gene.manifest = gene.manifest))
}
getSingleSpecies <- function(expr.data,
cell.manifest,
gene.manifest,
species = "human",
hg.mm.thres = 0.9,
mix.anno = c("human" = "hg19", "mouse" = "mm10")){
anno.res <- annoSpecies(expr.data, cell.manifest, gene.manifest,
hg.mm.thres, mix.anno = mix.anno)
cell.manifest <- subset(anno.res$cell.manifest, cell.species == species)
gene.manifest <- subset(anno.res$gene.manifest, Species == mix.anno[species])
if(dim(gene.manifest)[1] == 0){
stop("Error in 'getSingleSpecies': no ", mix.anno[species], " genes data found.\n")
}
if(dim(cell.manifest)[1] == 0){
stop("Error in 'getSingleSpecies': no ", species, " cells data found.\n")
}
gene.manifest$Symbol <- substr(gene.manifest$Symbol, 6, 60)
gene.manifest$EnsemblID <- substr(gene.manifest$EnsemblID, 6, 60)
rownames(expr.data) <- substr(rownames(expr.data), 6, 60)
expr.data <- expr.data[gene.manifest$Symbol, cell.manifest$barcodes]
return(list(expr.data = expr.data,
cell.manifest = cell.manifest,
gene.manifest = gene.manifest))
}
getGeneManifest <- function(data.path, only.expr = T) {
gene.loc <- paste0(data.path, '/features.tsv.gz')
if(!grepl("feature_bc_matrix", data.path)) {
gene.loc <- paste0(data.path, '/genes.tsv')
}
gene.manifest <- read.delim(gene.loc, header = FALSE, stringsAsFactors = FALSE)
colnames(gene.manifest) <-
c("EnsemblID", "Symbol", "Type")[1:dim(gene.manifest)[2]]
# gene.manifest$Symbol <- gsub("_", "-", make.unique(gene.manifest$Symbol))
if(only.expr & "Type" %in% colnames(gene.manifest)){
gene.manifest <- subset(gene.manifest, Type == "Gene Expression")
}
return(gene.manifest)
}
addGeneAnno <- function (gene.manifest, species = "human"){
annotation <- rep("other", dim(gene.manifest)[1])
if(species == "human"){
mito.genes <- grep('^MT-', gene.manifest$Symbol, value = TRUE)
ribo.genes <- grep('^RPL|^RPS|^MRPL|^MRPS', gene.manifest$Symbol, value = TRUE)
diss.genes <- read.table(system.file("txt", "diss-genes.txt", package = "scCancer"),
header = F, stringsAsFactors = F)$V1
}else if(species == "mouse"){
mito.genes <- grep('^mt-', gene.manifest$Symbol, value = TRUE)
ribo.genes <- grep('^Rpl|^Rps|^Mrpl|^Mrps', gene.manifest$Symbol, value = TRUE)
diss.genes <- read.table(system.file("txt", "diss-genes.txt", package = "scCancer"),
header = F, stringsAsFactors = F)$V1
diss.genes <- getMouseGene(diss.genes)
}
annotation[gene.manifest$Symbol %in% mito.genes] <- "mitochondrial"
annotation[gene.manifest$Symbol %in% ribo.genes] <- "ribosome"
annotation[gene.manifest$Symbol %in% diss.genes] <- "dissociation"
gene.manifest$Annotation <- annotation
return(gene.manifest)
}
#' prepareData
#'
#' @param samplePath A path containing the cell ranger processed data.
#' @inheritParams runScStatistics
#'
#' @return A list of expr.data, cell.manifest, gene.manifest, raw.data, min.nUMI, cr.version and run.emptydrop
#' @export
#'
prepareData <- function(samplePath,
species = "human",
hg.mm.mix = F,
hg.mm.thres = 0.9,
mix.anno = c("human" = "hg19", "mouse" = "mm10")) {
raw.data = T
data.path <- get10Xpath(samplePath, raw.data = raw.data)
if(is.null(data.path)){
raw.data = F
data.path <- get10Xpath(samplePath, raw.data = raw.data)
if(is.null(data.path)){
stop("Cannot find the raw data or filtered data.\n")
}else{
cat("- Warning in 'prepareData': Cannot find the raw data, and use the filtered data instead.\n")
}
}
expr.data <- Read10Xdata(data.dir = data.path, only.expr = T)
# rownames(expr.data) <- gsub("_", "-", rownames(expr.data))
cr.version <- getCRversion(data.path)
run.emptydrop <- F
if(raw.data){
filter.path <- get10Xpath(samplePath, raw.data = F)
if(!is.null(filter.path) & cr.version == "Cell Ranger (version >= 3)"){
filtered.cell <- getBarcodes(filter.path)
}else{
if(requireNamespace("DropletUtils", quietly = TRUE)){
run.emptydrop <- T
e.out <- emptyDrops(expr.data)
is.cell <- e.out$FDR <= 0.01
filtered.cell <- colnames(expr.data)[which(is.cell)]
}else{
if(!is.null(filter.path)){
cat("- Warning in 'prepareData': Package 'DropletUtils' isn't installed and the pipeline will use the supplied CR2 filtered data instead.\n")
filtered.cell <- getBarcodes(filter.path)
}else{
stop("Please install 'DropletUtils' package or provide filtered data.\n")
}
}
}
}
## gene.manifest
gene.manifest <- getGeneManifest(data.path, only.expr = T)
gene.manifest$Symbol <- make.unique(gene.manifest$Symbol)
## cell.manifest
nGene = Matrix::colSums(expr.data > 0)
nUMI = Matrix::colSums(expr.data)
droplet.type <- rep("cell", dim(expr.data)[2])
names(droplet.type) <- colnames(expr.data)
if(raw.data){
min.nUMI <- min(nUMI[colnames(expr.data) %in% filtered.cell])
droplet.type[!(names(droplet.type) %in% filtered.cell)] <- "empty"
}else{
min.nUMI <- min(nUMI)
}
cell.manifest = data.frame(
barcodes = colnames(expr.data),
droplet.type = droplet.type,
nUMI = nUMI,
nGene = nGene
)
rm(nGene, nUMI, droplet.type)
cell.manifest = cell.manifest[cell.manifest$nUMI > 0, ]
expr.data <- expr.data[, cell.manifest$barcodes]
## hg.mm.mix : multi-species
if(hg.mm.mix){
anno.res <- annoSpecies(expr.data, cell.manifest, gene.manifest,
hg.mm.thres, mix.anno = mix.anno)
cell.manifest <- subset(anno.res$cell.manifest, cell.species == species)
gene.manifest <- subset(anno.res$gene.manifest, Species == mix.anno[species])
if(dim(gene.manifest)[1] == 0){
stop("Error in 'getSingleSpecies': no ", mix.anno[species], " genes data found.\n")
}
if(dim(cell.manifest)[1] == 0){
stop("Error in 'getSingleSpecies': no ", species, " cells data found.\n")
}
gene.manifest$Symbol <- substr(gene.manifest$Symbol, 6, 60)
gene.manifest$EnsemblID <- substr(gene.manifest$EnsemblID, 6, 60)
rownames(expr.data) <- substr(rownames(expr.data), 6, 60)
rm(anno.res)
expr.data <- expr.data[gene.manifest$Symbol, cell.manifest$barcodes]
}
gene.manifest <- addGeneAnno(gene.manifest, species = species)
geneType <- c("mitochondrial", "ribosome", "dissociation")
for(tt in geneType) {
cur.genes <- subset(gene.manifest, Annotation == tt, select = c("Symbol"))$Symbol
cur.percent <- Matrix::colSums(expr.data[cur.genes,]) / Matrix::colSums(expr.data)
cur.percent[is.na(cur.percent)] = 0
cur.name = paste0(substr(tt, 1, 4), ".percent")
cell.manifest[[cur.name]] = cur.percent
}
gene.manifest$Symbol <- gsub("_", "-", gene.manifest$Symbol)
results <- list(expr.data = expr.data,
cell.manifest = cell.manifest,
gene.manifest = gene.manifest,
raw.data = raw.data,
min.nUMI = min.nUMI,
cr.version = cr.version,
run.emptydrop = run.emptydrop)
return(results)
}
cellsPlot <- function(cell.manifest, plot.type = "histogram") {
cur.color <- c("cell" = '#825f87', "empty" = '#aeacac')
if(plot.type == "histogram"){
p <- ggplot() +
geom_histogram(
data = subset(cell.manifest, droplet.type == "empty"),
mapping = aes(x = nUMI, fill = "empty"),
bins = 200, alpha = 0.7) +
geom_histogram(
data = subset(cell.manifest, droplet.type == "cell"),
mapping = aes(x = nUMI, fill = "cell"),
bins = 200, alpha = 0.7) +
geom_freqpoly(data = cell.manifest, mapping = aes(x = nUMI), bins = 200) +
labs(y = "Droplet number") +
scale_fill_manual(
name = 'Droplet type',
guide = 'legend',
# labels = c("cell", "empty"),
values = cur.color) +
scale_x_continuous(trans = 'log10') + scale_y_continuous(trans = 'log10') +
guides(fill = guide_legend(override.aes = list(size = 1, alpha = 0.7))) +
ggplot_config(base.size = 6) +
theme(legend.position = "bottom")
}else if(plot.type == "rankplot"){
tmp.df <- cell.manifest[order(cell.manifest["nUMI"], decreasing = TRUE),]
xi <- 1:dim(tmp.df)[1]
tmp.df$xi = xi
p <- ggplot(tmp.df, aes(x = xi, y = nUMI, color = droplet.type)) +
geom_point(cex = 1, alpha = 0.5) +
labs(x = "Droplets", y = "nUMI") +
scale_color_manual(values = cur.color, name = "Droplet type") +
scale_x_continuous(trans = 'log10') + scale_y_continuous(trans = 'log10') +
guides(color = guide_legend(override.aes = list(size = 5))) +
ggplot_config(base.size = 6) +
theme(legend.position = "bottom")
}else{
stop("Error in 'cellPlot': invalid 'plot.type' argument.\n")
}
return(p)
}
calcThres <- function(cell.manifest, values = c("nUMI", "nGene")){
high.thresholds <- list()
for(value in values){
outliers <- getOutliers(cell.manifest[[value]])
high.thresholds[[value]] <- round(min(outliers), 3)
}
return(high.thresholds)
}
histPlot <- function(cell.manifest, value, xlines = c()){
p <- ggplot(cell.manifest, aes(x = .data[[value]])) +
geom_histogram(bins = 200, position = "stack", fill = "#a788ab") +
labs(x = value, y = "Cell number") +
ggplot_config(base.size = 6)
for(x in xlines){
p <- p + geom_vline(xintercept = x, colour = "red", linetype = "dashed")
}
return(p)
}
marginPlot <- function(cell.manifest, value, color = "#a788ab", xlines = c(), ylines = c()){
p <- ggplot(cell.manifest, aes(y = .data[[value]], x = nUMI)) +
geom_point(cex = 0.8, alpha = 0.1, color = color) +
labs(y = value) +
ggplot_config(base.size = 7)
for(x in xlines){
p <- p + geom_vline(xintercept = x, colour = "red", linetype = "dashed")
}
for(y in ylines){
p <- p + geom_hline(yintercept = y, colour = "red", linetype = "dashed")
}
p <- ggMarginal(p, bins = 200, type = "histogram", fill = color, color = color)
return(p)
}
getBgPercent <- function(cell.manifest.all, expr.data, bg.low = 1, bg.up = 10){
cell.manifest.all$barcodes <- rownames(cell.manifest.all)
bg.bars <- subset(cell.manifest.all, nUMI >= bg.low & nUMI <= bg.up)$barcodes
if(length(bg.bars) == 0){
result <- NULL
}else{
bg.sum <- Matrix::rowSums(expr.data[, bg.bars])
bg.percent <- bg.sum / sum(bg.sum)
result <- data.frame(row.names = rownames(expr.data),
est = bg.percent,
counts = bg.sum)
}
return(result)
}
getNcell <- function(cell.manifest, expr.data){
tmp.mat <- expr.data[, cell.manifest$barcodes]
nCell <- Matrix::rowSums(tmp.mat > 0)
return(nCell)
}
getDetectRate <- function(nCell, n){
detect.rate <- nCell / n
return(detect.rate)
}
getExprProp <- function(cell.manifest, expr.data){
tmp.mat <- expr.data[, cell.manifest$barcodes]
expr.frac <- t(t(tmp.mat) / Matrix::colSums(tmp.mat))
return(expr.frac)
}
getLowFrac <- function(expr.frac, bg.percent){
expr.frac <- as(expr.frac, "dgTMatrix")
expr.frac@x <- expr.frac@x - bg.percent[expr.frac@i + 1]
low.frac <- Matrix::rowSums(expr.frac < 0) / dim(expr.frac)[2]
return(low.frac)
}
getPropMedian <-function(expr.frac, nCell){
prop.median <- rep(0, length(nCell))
names(prop.median) <- names(nCell)
tmp.sel.genes <- names(nCell)[nCell >= dim(expr.frac)[2] / 2]
prop.median[tmp.sel.genes] <- apply(expr.frac[tmp.sel.genes, ], 1, median)
return(prop.median)
}
updateGeneM <- function(gene.manifest,
bg.percent = NULL,
nCell = NULL,
detect.rate = NULL,
prop.median = NULL,
low.frac = NULL){
addGeneM <- function(gene.manifest, values, name){
if(!is.null(values)){
gene.manifest[[name]] <- values
}
return(gene.manifest)
}
gene.manifest <- addGeneM(gene.manifest, nCell, "nCell")
gene.manifest <- addGeneM(gene.manifest, prop.median, "prop.median")
gene.manifest <- addGeneM(gene.manifest, bg.percent, "bg.percent")
gene.manifest <- addGeneM(gene.manifest, detect.rate, "detect.rate")
gene.manifest <- addGeneM(gene.manifest, low.frac, "low.frac")
return(gene.manifest)
}
genePropPlot <- function(gene.manifest, expr.frac){
show.num = 100
if("bg.percent" %in% colnames(gene.manifest)){
gene.manifest <- gene.manifest[order(gene.manifest$bg.percent, decreasing = T), ]
}else{
gene.manifest <- gene.manifest[order(gene.manifest$prop.median, decreasing = T), ]
}
gene.show <- head(gene.manifest$Symbol, show.num)
rownames(gene.manifest) <- gene.manifest$Symbol
gene.manifest <- gene.manifest[gene.show, ]
rate.df.plot <- melt(as.data.frame(as.matrix(t(expr.frac[gene.show, ]))), id.vars = NULL)
rate.df.plot$Annotation <- factor(gene.manifest[as.character(rate.df.plot$variable), ]$Annotation,
levels = c("mitochondrial", "ribosome", "dissociation", "other"), ordered = T)
rate.df.plot$variable <- factor(rate.df.plot$variable,
levels = rev(gene.show), ordered = TRUE)
sub.ix <- c(rep("1-50", 50), rep("51-100", 50))
names(sub.ix) <- gene.show
rate.df.plot$subPlot <- factor(sub.ix[as.character(rate.df.plot$variable)],
levels = c("1-50", "51-100"), ordered = T)
if("bg.percent" %in% colnames(gene.manifest)){
bg.df <- data.frame(ix = c(1:50, 1:50) + 0.1,
frac = rev(gene.manifest[gene.show, ]$bg.percent),
type = "Background proportion")
bg.df$subPlot <- factor(sub.ix[as.character(rev(gene.show))],
levels = c("1-50", "51-100"), ordered = T)
}else{
bg.df <- NULL
}
gene.colors <- c(other = "#d56f6d", ribosome = "#f29721", dissociation = "#65a55d", mitochondrial = "#3778bf")
p <- ggplot() + coord_flip() +
scale_color_manual(values = "red", name = "") +
scale_fill_manual(values = gene.colors, name = "Gene:") +
scale_y_continuous(breaks = c(0.0001, 0.001, 0.01, 0.1, 1),
labels = c("0.0001", "0.001", "0.01", "0.1", "1"),
trans = 'log10') +
theme_bw() +
theme(axis.title.y = element_blank(),
legend.position = "top",
legend.title = element_text(face="bold"))
p1 <- p + geom_boxplot(data = subset(rate.df.plot, subPlot == "1-50"),
mapping = aes(x = variable, y = value, fill = Annotation),
outlier.size = 0.1, alpha = 0.8) +
xlab("") + ylab(paste0("Gene proportion of total UMI (1-50)"))
p2 <- p + geom_boxplot(data = subset(rate.df.plot, subPlot == "51-100"),
mapping = aes(x = variable, y = value, fill = Annotation),
outlier.size = 0.1, alpha = 0.8) +
xlab("") + ylab(paste0("Gene proportion of total UMI (51-100)"))
all.p <- p + geom_boxplot(data = rate.df.plot,
mapping = aes(x = variable, y = value, fill = Annotation),
outlier.size = 0.1, alpha = 0.8)
if(!is.null(bg.df)){
p1 <- p1 + geom_point(data = subset(bg.df, subPlot == "1-50"),
aes(x = ix, y = frac, color = type), shape = "*", size = 5)
p2 <- p2 + geom_point(data = subset(bg.df, subPlot == "51-100"),
aes(x = ix, y = frac, color = type), shape = "*", size = 5)
all.p <- all.p + geom_point(data = bg.df, aes(x = ix, y = frac, color = type), shape = "*", size = 5)
}
p <- grid_arrange_shared_legend(p1, p2, all.p = all.p, ncol = 2, nrow = 1)
# return(list(rdf = rate.df.plot, bdf = bg.df, p1 = p1, p2 = p2, all.p = all.p, p = p))
return(p)
}
bgCellScatter <- function(gene.manifest){
tmp.gene.mani <- subset(gene.manifest, nCell > 10)
tmp.gene.mani$Annotation <- factor(tmp.gene.mani$Annotation, ordered = T,
levels = c("mitochondrial", "ribosome", "dissociation", "other"))
gene.colors <- c(other = "#d56f6d", ribosome = "#f29721", dissociation = "#65a55d", mitochondrial = "#3778bf")
p <- ggplot() +
geom_point(tmp.gene.mani,
mapping = aes(x = prop.median, y = bg.percent, color = Annotation),
alpha = 0.3) +
scale_color_manual(values = gene.colors, name = "Gene:") +
coord_fixed() +
scale_x_continuous(trans = 'log10',
limits = c(0.00001, 0.1),
breaks = c(0.00001, 0.0001, 0.001, 0.01, 0.1),
labels = c("0.00001", "0.0001", "0.001", "0.01", "0.1")) +
scale_y_continuous(trans = 'log10',
limits = c(0.00001, 0.1),
breaks = c(0.00001, 0.0001, 0.001, 0.01, 0.1),
labels = c("0.00001", "0.0001", "0.001", "0.01", "0.1")) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "grey") +
ggplot_config(base.size = 6) +
theme(legend.justification = c(0,1), legend.position = c(0.01,1)) +
guides(color = guide_legend(override.aes = list(size = 2),
keywidth = 0.1,
keyheight = 0.15,
default.unit = "inch")) +
labs(x = "Median of gene proportion in cells", y = "Gene proportion in background")
return(p)
}
bgDetScatter <- function(gene.manifest){
tmp.gene.mani <- subset(gene.manifest, nCell > 10)
tmp.gene.mani$Annotation <- factor(tmp.gene.mani$Annotation, ordered = T,
levels = c("mitochondrial", "ribosome", "dissociation", "other"))
gene.colors <- c(other = "#d56f6d", ribosome = "#f29721", dissociation = "#65a55d", mitochondrial = "#3778bf")
p <- ggplot() +
geom_point(tmp.gene.mani,
mapping = aes(x = detect.rate, y = bg.percent, color = Annotation),
alpha = 0.3) +
scale_color_manual(values = gene.colors, name = "Gene") +
coord_fixed(1/4) +
scale_y_continuous(trans = 'log10',
limits = c(0.00001, 0.1),
breaks = c(0.00001, 0.0001, 0.001, 0.01, 0.1),
labels = c("0.00001", "0.0001", "0.001", "0.01", "0.1")) +
ggplot_config(base.size = 6) +
theme(legend.justification = c(0,1), legend.position = c(0.01,1)) +
guides(color = guide_legend(override.aes = list(size = 2),
keywidth = 0.1,
keyheight = 0.15,
default.unit = "inch")) +
labs(x = "Gene detection rate in cells", y = "Gene proportion in background")
return(p)
}
#' runScStatistics
#'
#' perform cell QC, gene QC, visualization and give suggested thresholds.
#'
#' @param dataPath A path containing the cell ranger processed data.
#' Under this path, folders 'filtered_feature_bc_matrix' and 'raw_feature_bc_matrix' exist generally.
#' @param savePath A path to save the results files(suggest to create a foler named by sample name).
#' @param authorName A character string for authors name and will be shown in the report.
#' @param sampleName A character string giving a label for this sample.
#' @param species A character string indicating what species the sample belong to.
#' Must be one of "human"(default) and "mouse".
#' @param hg.mm.mix A logical value indicating whether the sample is a mix of
#' human cells and mouse cells(such as PDX sample).
#' If TRUE, the arguments 'hg.mm.thres' and 'mix.anno' should be set to corresponding values.
#' @param hg.mm.thres A float-point threshold within [0.5, 1] to identify human and mouse cells.
#' Cells with UMI percentage of single species larger than the threshold are labeled human or mouse cells.
#' The default is 0.6.
#' @param mix.anno A vector to indicate the prefix of genes from different species.
#' The default is c("human" = "hg19", "mouse" = "mm10").
#' @param bg.spec.genes A list of backgroud specific genes, which are used to remove ambient genes' influence.
#' @param bool.runSoupx A logical value indicating whether to estimate contamination fraction by SoupX.
#' @param genReport A logical value indicating whether to generate a .html/.md report (suggest to set TRUE).
#'
#' @return A results list with all useful objects used in the function.
#' @export
#'
#' @import Matrix knitr ggplot2 SoupX
#' @importFrom markdown markdownToHTML
#' @importFrom ggExtra ggMarginal
#' @importFrom reshape2 melt
#' @importFrom gridExtra arrangeGrob
#' @importFrom grid unit.c grid.newpage grid.draw
#' @importFrom dplyr "%>%" top_n group_by
#' @importFrom cowplot get_legend plot_grid
#' @importFrom grDevices boxplot.stats colorRampPalette
#' @importFrom stats cor density filter median quantile sd
#' @importFrom utils read.delim read.table write.csv write.table
#'
runScStatistics <- function(dataPath, savePath,
authorName = NULL,
sampleName = "sc",
species = "human",
hg.mm.mix = F,
hg.mm.thres = 0.6,
mix.anno = c("human" = "hg19", "mouse" = "mm10"),
bg.spec.genes = NULL,
bool.runSoupx = F,
genReport = T){
message("[", Sys.time(), "] START: RUN scStatistics")
# results <- as.list(environment())
checkStatArguments(as.list(environment()))
# if(bool.runSoupx){
# bool.runSoupx <- F
# }
if(!dir.exists(file.path(savePath, "figures/"))){
dir.create(file.path(savePath, "figures/"), recursive = T)
}
suppressWarnings( dataPath <- normalizePath(dataPath, "/") )
suppressWarnings( savePath <- normalizePath(savePath, "/") )
message("[", Sys.time(), "] -----: data preparation")
all <- prepareData(samplePath = dataPath,
species = species,
hg.mm.mix = hg.mm.mix,
hg.mm.thres = hg.mm.thres,
mix.anno = mix.anno)
expr.data <- all$expr.data
cell.manifest <- all$cell.manifest
gene.manifest <- all$gene.manifest
raw.data <- all$raw.data
min.nUMI <- all$min.nUMI
cr.version <- all$cr.version
run.emptydrop <- all$run.emptydrop
rm(all)
message("[", Sys.time(), "] -----: cell calling")
if(raw.data){
p.cells.1 <- cellsPlot(cell.manifest, plot.type = "histogram")
p.cells.2 <- cellsPlot(cell.manifest, plot.type = "rankplot")
suppressWarnings(
ggsave(filename = file.path(savePath, "figures/cells-distr-hist.png"),
p.cells.1, dpi = 300, height = 3, width = 4)
)
suppressWarnings(
ggsave(filename = file.path(savePath, "figures/cells-distr-rank.png"),
p.cells.2, dpi = 300, height = 3, width = 4)
)
}else{
p.cells.1 <- NULL
p.cells.2 <- NULL
}
message("[", Sys.time(), "] -----: nUMI & nGene distribution plot")
cell.manifest.all <- cell.manifest
cell.manifest <- subset(cell.manifest, droplet.type == "cell")
cell.threshold <- calcThres(cell.manifest,
values = c("nUMI", "nGene", "mito.percent", "ribo.percent", "diss.percent"))
p.nUMI <- histPlot(cell.manifest, value = "nUMI", xlines = c(cell.threshold$nUMI))
p.nGene <- histPlot(cell.manifest, value = "nGene", xlines = c(200, cell.threshold$nGene))
ggsave(filename = file.path(savePath, "figures/nUMI-distr.png"),
p.nUMI, dpi = 300, height = 2.5, width = 4)
ggsave(filename = file.path(savePath, "figures/nGene-distr.png"),
p.nGene, dpi = 300, height = 2.5, width = 4)
message("[", Sys.time(), "] -----: mito & ribo & diss distribution plot")
p.mito <- marginPlot(cell.manifest, value = "mito.percent", color = "#6d9fd5",
xlines = c(cell.threshold$nUMI), ylines = c(cell.threshold$mito.percent))
p.ribo <- marginPlot(cell.manifest, value = "ribo.percent", color = "#f6b969",
xlines = c(cell.threshold$nUMI), ylines = c(cell.threshold$ribo.percent))
p.diss <- marginPlot(cell.manifest, value = "diss.percent", color = "#94c08e",
xlines = c(cell.threshold$nUMI), ylines = c(cell.threshold$diss.percent))
ggsave(filename = file.path(savePath, "figures/mito-distr.png"),
p.mito, dpi = 300, height = 4, width = 4)
ggsave(filename = file.path(savePath, "figures/ribo-distr.png"),
p.ribo, dpi = 300, height = 4, width = 4)
ggsave(filename = file.path(savePath, "figures/diss-distr.png"),
p.diss, dpi = 300, height = 4, width = 4)
message("[", Sys.time(), "] -----: gene statistics")
bg.result <- getBgPercent(cell.manifest.all, expr.data, bg.low = 1, bg.up = 10)
bg.percent <- bg.result$est
nCell <- getNcell(cell.manifest, expr.data)
detect.rate <- getDetectRate(nCell, n = dim(cell.manifest)[1])
expr.frac <- getExprProp(cell.manifest, expr.data)
# low.frac <- getLowFrac(expr.frac, bg.percent)
prop.median <- getPropMedian(expr.frac, nCell)
gene.manifest <- updateGeneM(gene.manifest,
bg.percent = bg.percent,
nCell = nCell,
detect.rate = detect.rate,
prop.median = prop.median,
low.frac = NULL)
message("[", Sys.time(), "] -----: gene proportion plot")
suppressWarnings(
p.geneProp <- genePropPlot(gene.manifest, expr.frac)
)
suppressWarnings(
ggsave(filename = file.path(savePath, "figures/geneProp.png"),
p.geneProp, dpi = 300, height = 8, width = 8)
)
if(!is.null(bg.percent) && raw.data){
p.bg.cell <- bgCellScatter(gene.manifest)
p.bg.detect <- bgDetScatter(gene.manifest)
suppressWarnings(
ggsave(filename = file.path(savePath, "figures/bg-cell-scatter.png"),
p.bg.cell, dpi = 300, height = 4, width = 4)
)
suppressWarnings(
ggsave(filename = file.path(savePath, "figures/bg-detect-scatter.png"),
p.bg.detect, dpi = 300, height = 4, width = 4)
)
}else{
p.bg.cell <- NULL
p.bg.detect <- NULL
}
if(bool.runSoupx){
message("[", Sys.time(), "] -----: ambient genes (SoupX)")
suppressWarnings( cls.path <- file.path(dataPath, "analysis/clustering/graphclust/clusters.csv") )
bool.cls.info <- file.exists(cls.path)
if(is.null(bg.percent) | !raw.data){
bool.runSoupx <- F
cat("- Warning in 'SoupX': Cannot identify background distribution. So skip this step.\n")
}
if(!bool.cls.info){
bool.runSoupx <- F
cat("- Warning in 'SoupX': Cannot find clustering information file produced by cell ranger (`", normalizePath(cls.path),
"`). So skip this step.\n", sep = "")
}
}
if(bool.runSoupx){
soupx.obj = load10X(dataPath, verbose = F)
soupx.obj = autoEstCont(soupx.obj)
contamination.frac = soupx.obj$fit$rhoEst
saveRDS(soupx.obj, file.path(savePath, "soupx-object.RDS"))
}else{
# bg.spec.genes = NULL
soupx.obj = NULL
contamination.frac = NULL
# p.bg.genes = NULL
}
message("[", Sys.time(), "] -----: resutls saving")
filter.thres <- list(
Index = c("nUMI", "nGene", "mito.percent", "ribo.percent", "diss.percent"),
Low.threshold = c(0, 200, -Inf, -Inf, -Inf),
High.threshold = c(cell.threshold$nUMI,
cell.threshold$nGene,
cell.threshold$mito.percent,
cell.threshold$ribo.percent,
cell.threshold$diss.percent)
)
filter.thres <- data.frame(filter.thres)
write.table(gene.manifest, file = file.path(savePath, "geneManifest.txt"),
quote = F, sep = "\t", row.names = F)
write.table(cell.manifest.all, file = file.path(savePath, "cellManifest-all.txt"),
quote = F, sep = "\t", row.names = F)
write.table(filter.thres, file = file.path(savePath, "cell.QC.thres.txt"),
quote = F, sep = "\t", row.names = F)
nList <- c(dim(cell.manifest.all)[1], dim(cell.manifest)[1])
tmp <- cell.manifest[cell.manifest$nUMI < cell.threshold$nUMI, ]
nList <- c(nList, dim(tmp)[1])
tmp <- tmp[tmp$nGene >= 200, ]
nList <- c(nList, dim(tmp)[1])
tmp <- tmp[tmp$nGene < cell.threshold$nGene, ]
nList <- c(nList, dim(tmp)[1])
tmp <- tmp[tmp$mito.percent < cell.threshold$mito.percent, ]
nList <- c(nList, dim(tmp)[1])
tmp <- tmp[tmp$ribo.percent < cell.threshold$ribo.percent, ]
nList <- c(nList, dim(tmp)[1])
tmp <- tmp[tmp$diss.percent < cell.threshold$diss.percent, ]
nList <- c(nList, dim(tmp)[1])
rm(tmp)
results <- list(dataPath = dataPath,
savePath = savePath,
authorName = authorName,
sampleName = sampleName,
species = species,
hg.mm.mix = hg.mm.mix,
mix.anno = mix.anno,
hg.mm.thres = hg.mm.thres,
expr.data = expr.data,
cell.manifest = cell.manifest,
gene.manifest = gene.manifest,
cell.threshold = cell.threshold,
filter.thres = filter.thres,
p.cells.1 = p.cells.1,
p.cells.2 = p.cells.2,
p.nUMI = p.nUMI,
p.nGene = p.nGene,
p.mito = p.mito,
p.ribo = p.ribo,
p.diss = p.diss,
p.geneProp = p.geneProp,
p.bg.cell = p.bg.cell,
p.bg.detect = p.bg.detect,
min.nUMI = min.nUMI,
cr.version = cr.version,
raw.data = raw.data,
run.emptydrop = run.emptydrop,
bool.runSoupx = bool.runSoupx,
# bg.spec.genes = bg.spec.genes,
soupx.obj = soupx.obj,
contamination.frac = contamination.frac,
nList = nList)
## generate report
if(genReport){
message("[", Sys.time(), "] -----: report generating")
# results$cell.manifest <- subset(cell.manifest.all, droplet.type == "cell")
if(!dir.exists(file.path(savePath, 'report-figures/'))){
dir.create(file.path(savePath, 'report-figures/'), recursive = T)
}
suppressWarnings(
knit(system.file("rmd", "main-scStat.Rmd", package = "scCancer"),
file.path(savePath,'report-scStat.md'), quiet = T)
)
markdownToHTML(file.path(savePath,'report-scStat.md'),
file.path(savePath, 'report-scStat.html'))
# results$cell.manifest <- cell.manifest.all
}
message("[", Sys.time(), "] END: Finish scStatistics\n\n")
return(results)
}
#' genStatReport
#'
#' @param results A list generated by 'runScStatistics'
#' @inheritParams runScStatistics
#'
#' @return NULL
#' @export
#'
genStatReport <- function(results, savePath){
message("[", Sys.time(), "] -----: report generating")
if(!dir.exists(savePath)){
dir.create(savePath, recursive = T)
}
results[['savePath']] <- normalizePath(savePath, "/")
savePath <- normalizePath(savePath, "/")
if(!dir.exists(file.path(savePath, 'report-figures/'))){
dir.create(file.path(savePath, 'report-figures/'), recursive = T)
}
suppressWarnings(
knit(system.file("rmd", "main-scStat.Rmd", package = "scCancer"),
file.path(savePath,'report-scStat.md'), quiet = T)
)
markdownToHTML(file.path(savePath,'report-scStat.md'),
file.path(savePath, 'report-scStat.html'))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.