options(knitr.table.format = "html") 
options(scipen=10)
knitr::opts_chunk$set(echo = TRUE, fig.path = file.path(results$savePath, 'report-figures//'))

title <- "scCancer"
if(!is.null(results$sampleName)){
  title <- paste0(results$sampleName, "  -  ", title)
}

if(!is.null(results$authorName)){
  userName <- results$authorName
}else{
  userName <- Sys.getenv("USERNAME")
}
reportMark <- Sys.time()
if(userName != ""){
  reportMark <- paste0(userName, " , ", reportMark)
}

h.i <- 1
h.ii <- 1

r title


r reportMark

r h.i Cell statistics

if(file.exists(file.path(results$dataPath, "web_summary.html"))){
  file.copy(file.path(results$dataPath, "web_summary.html"), file.path(results$savePath, "report-cellRanger.html"), overwrite = T)
  cat("* Here is the [summary report](./report-cellRanger.html) from `Cell Ranger`.", sep = "")
}

r h.i.r h.ii Cell calling


cat("* Raw data (containing all barcodes) cannot be found, and only filtered data are supplied. So cell-calling doesn't be performed and the analyses for background distribution is omitted.\n")
cat("* For the filtered data, ", results$nList[2], " cells are identified (min.nUMI = `", results$min.nUMI, "`).\n", sep="")
h.ii <- h.ii + 1
# print(results$raw.data)

r h.i.r h.ii The number of UMIs and detected genes in cells

After the cell calling by r results$cr.version, we further perform quality control to filter droplets with low quality cells according to nUMI (total number of UMIs) and nGene (total number of detected genes).

For nUMI : * Suggested threshold to filter cells with extremely large nUMI : r results$cell.threshold$nUMI. + Using this threshold, r sum(results$cell.manifest$nUMI >= results$cell.threshold$nUMI) cells will be filtered.

For nGene : Suggested threshold to filter cells with extremely large nGene : r results$cell.threshold$nGene. + Using this threshold, r sum(results$cell.manifest$nGene >= results$cell.threshold$nGene) cells will be filtered. Suggested threshold to filter cells with extremely small nGene : 200. + Using this threshold, r sum(results$cell.manifest$nGene < 200) cells will be filtered.

Comment: The suggested thresholds (except the lower bound of nGene, which is set by convention) are determined based on the their distributions. Using them, the outliers identified will be filtered. The same below.

plot_grid(results$p.nUMI, results$p.nGene, ncol = 2)

(Hi-res image: left, right)

h.i <- h.i + 1
h.ii <- 1

r h.i Gene statistics

The number of genes expressed in at least one cell : r sum(results$gene.manifest$nCell > 0).

r h.i.r h.ii Mitochondrial genes

Summary of mitochondrial genes percentage (mito.percent) in cells:

format(summary(results$cell.manifest$mito.percent), digits = 3)
results$p.mito

(Hi-res image: view)

h.ii <- h.ii + 1

r h.i.r h.ii Ribosome genes

Summary of ribosome genes percentage (ribo.percent) in cells:

format(summary(results$cell.manifest$ribo.percent), digits = 3)
results$p.ribo

(Hi-res image: view)

h.ii <- h.ii + 1

r h.i.r h.ii Dissociation associated genes

Summary of dissociation associated genes percentage (diss.percent) in cells:

format(summary(results$cell.manifest$diss.percent), digits = 3)
results$p.diss

(Hi-res image: view)

h.ii <- h.ii + 1

r h.i.r h.ii Ambient RNAs

r h.i.r h.ii.1 Highly-expressed genes

In order to analyze the gene expression profiles in detail and identify
highly-expressed genes in background mRNAs from lysed cells, we calculate some metrics as shown below.

if("bg.percent" %in% colnames(results$gene.manifest)){
  cat("* `bg.percent` : the expression proportion for each gene in background distribution (all droplets with `nUMI <= 10`).\n")
}

Here is a plot showing the distributions of gene proportion in cells for the first 100 genes (ordered by their proportion in background bg.percent). And the points (genes) are colored according to whether they belongs to mitochondrial, ribosome, or dissociation associated genes.

if("bg.percent" %in% colnames(results$gene.manifest)){
  cat("The red star signs mark the genes’ proportion in background.\n")
}
grid::grid.draw(results$p.geneProp)

(Hi-res image: view)

if("bg.percent" %in% colnames(results$gene.manifest)){
  cat("The plot below shows the relationship between `bg.percent` and `prop.median`, `bg.percent` and `detect.rate`.\n")
}
if("bg.percent" %in% colnames(results$gene.manifest)){
  plot_grid(results$p.bg.cell, results$p.bg.detect, ncol = 2)
}
if("bg.percent" %in% colnames(results$gene.manifest)){
  cat("<p align=\"right\">(Hi-res image: <a href=\"./figures/bg-cell-scatter.png\">left</a>, <a href=\"./figures/bg-detect-scatter.png\">right</a>)</p>\n")
  # cat("(Hi-res image\n")
}
h.ii <- h.ii + 1

h.i <- h.i + 1
h.ii <- 1

r h.i Output

r h.i.r h.ii Thresholds to filter droplets

According to the results of statistics and visualization, we propose following thresholds to filter cells:

# results$filter.thres %>% knitr::kable("html")
kable(results$filter.thres)

Using these thresholds, the number of cells vary as follows:

r paste0("Raw : ", results$nList[1]) -> r paste0("cellranger3 : ", results$nList[2]) -> r paste0("nUMI<", results$cell.threshold$nUMI, " : ", results$nList[3]) -> r paste0("nGene>=200 : ", results$nList[4]) -> r paste0("nGene<", results$cell.threshold$nGene, " : ", results$nList[5]) -> r paste0("mito.percent<", round(results$cell.threshold$mito.percent, 3), " : ", results$nList[6]) -> r paste0("ribo.percent<", round(results$cell.threshold$ribo.percent, 3), " : ", results$nList[7]) -> r paste0("diss.percent<", round(results$cell.threshold$diss.percent, 3), " : ", results$nList[8])

h.ii <- 1

r h.i.r h.ii Output files

Running this script generates following files:

r.i <- 8
  1. Html report : report-scStat.html.
  2. Markdown report : report-scStat.md.
  3. Figure files : figures/.
  4. Figures used in the report: report-figures/.
  5. Text file with cell manifest : cellManifest-all.txt.
  6. Text file with suggested thresholds as above : cell.QC.thres.txt.
  7. Text file with gene manifest : geneManifest.txt.
cat(r.i, ". **RDS file with SoupX object** :[soupx-object.RDS](./).", 
    sep = "")
r.i <- r.i + 1
if(file.exists(file.path(results$dataPath, "web_summary.html"))){
  cat("9. **Cell ranger html report** (Copy from the source data folder):\n")
  cat("[report-cellRanger.html](./report-cellRanger.html).\n", sep = "")
}



© G-Lab, Tsinghua University



wguo-research/scCancer documentation built on May 26, 2024, 9:12 p.m.