In liuqivandy/scRNABatchQC: a package for quality control of multiple single cell RNAseq data

library(RColorBrewer)
library(knitr)
library(kableExtra)
library(reshape2)
library(ggplot2)
library(data.table)
library(DT)
library(RCurl)
library(htmltools)

knitr::opts_chunk$set(echo = TRUE, autodep=TRUE, cache.comments=FALSE, message=FALSE, warning=FALSE, dpi=300)

addLinkTag<-function(text,link) {
  result<-paste0("<a href='",link,"' target='_blank'>",text,"</a>")
  return(result)
}

figRef <- local({
  tag <- numeric()
  created <- logical()
  used <- logical()
  function(label, caption, prefix = options("figcap.prefix"), 
           sep = options("figcap.sep"), prefix.highlight = options("figcap.prefix.highlight"), trunk.eval=TRUE) {
    if(trunk.eval){ 
      i <- which(names(tag) == label)
      if (length(i) == 0) {
        i <- length(tag) + 1
        tag <<- c(tag, i)
        names(tag)[length(tag)] <<- label
        used <<- c(used, FALSE)
        names(used)[length(used)] <<- label
        created <<- c(created, FALSE)
        names(created)[length(created)] <<- label
      }
      if (!missing(caption)) {
        created[label] <<- TRUE
        paste0(prefix.highlight, prefix, " ", i, sep, prefix.highlight, 
               " ", caption)
      } else {
        used[label] <<- TRUE
        paste(prefix, tag[label])
      }
    }
  }
})
options(figcap.prefix = "Figure", figcap.sep = ":", figcap.prefix.highlight = "**")

tabRef <- local({
  tag <- numeric()
  created <- logical()
  used <- logical()
  function(label, caption, prefix = options("tabcap.prefix"), 
           sep = options("tabcap.sep"), prefix.highlight = options("tabcap.prefix.highlight"), trunk.eval=TRUE) {
    if(trunk.eval){
      i <- which(names(tag) == label)
      if (length(i) == 0) {
        i <- length(tag) + 1
        tag <<- c(tag, i)
        names(tag)[length(tag)] <<- label
        used <<- c(used, FALSE)
        names(used)[length(used)] <<- label
        created <<- c(created, FALSE)
        names(created)[length(created)] <<- label
      }
      if (!missing(caption)) {
        created[label] <<- TRUE
        paste0(prefix.highlight, prefix, " ", i, sep, prefix.highlight, 
              " ", caption)
      } else {
        used[label] <<- TRUE
        paste(prefix, tag[label])
      }
    }
  }
})
options(tabcap.prefix = "Table", tabcap.sep = ":", tabcap.prefix.highlight = "**")

getHeatmapHeight<-function(hdata){
  max(10, nrow(hdata) / 10)
}

isHeatmapFigure<-function(datam){
  if (is.null(datam)){
    return(FALSE)
  }
  return (ncol(datam)>1 & nrow(datam)>1)
}

getHeatmapCaption<-function(datam, feature){
  if (isHeatmapFigure(datam)){
    return (figRef(feature))
  }else{
    return (tabRef(feature))
  }
}

plotHeatmap<-function(datam,feature=c("hvg","hvgPath","PCg","PCgPath","diffg","diffgPath")){
  keytitle<-switch(feature, "hvg"="zval","hvgPath"="-logFDR","PCg"="logFC","PCgPath"="-logFDR","diffg"="logFC","diffgPath"="-logFDR")
  maintitle<-switch(feature, "hvg"="Highly variable genes","hvgPath"="Pathways enriched in HVGs","PCg"="PC-related genes","PCPath"="Pathways enriched in PC-related genes","diffg"="Differentially expressed genes","diffgPath"="Pathways enriched in DEGs")
  if (isHeatmapFigure(datam)){
    if (feature== "hvg" | feature== "PCg" | feature=="diffg"){ 
      colv=heatmapColors
      } 
    else {
      colv=heatmapPathwayColors
    }
    heatmap.2(datam, margins = heatmapMargins,  keysize=heatmapKeysize, col=colv,main=maintitle,key.title=keytitle,key.xlab="",key.ylab="",cexCol=1)
  } 
  else {
    warning("there is only one dataset/comparison or one gene/pathway detected and heatmap figure is not generated")
      print(kable(as.matrix(datam),caption=tabRef(feature, maintitle),col.names=paste0(colnames(datam),"(",keytitle,")")) %>% 
              kable_styling() %>%
              htmltools::HTML())
    }
}

#library(scRNABatchQC)
#load("test.rdata")
plotData<-params$data
allsces <- plotData$sces

sces<-list()
unvalid<-list()

for (p in names(allsces)) {
  sce<-allsces[[p]]
  if(sce@metadata$valid){
    sces[p] = sce
  }else{
    unvalid[p] = sce
  }
}

if(length(allsces) < 9){
  scolors=brewer.pal(max(3, length(allsces)), "Set1")[1:length(allsces)]
}else{
  scolors=rainbow(length(allsces))
}
names(scolors)<-names(allsces)

hasValid<-length(sces) > 0
hasNotValid<-length(unvalid) > 0

if(hasValid){
  hvgBiologicalSimilarity <- .getBiologicalSimilarity(sces, objectName = "hvg", valueName = "zval")
  hvgPathways <- .getMultiplePathways(sces, metaObjectName = "hvgPathway")
  pc1geneBiologicalSimilarity <- .getBiologicalSimilarity(sces, objectName = "pc1genes", valueName = "logFC")
  pc1Pathways <- .getMultiplePathways(sces, metaObjectName = "pc1Pathway")

  hasComparison<-length(sces) > 1

  heatmapKeysize<-1
  heatmapColors<-bluered(75)
  heatmapPathwayColors<-colorpanel(75,low="white",high="red")
  heatmapMargins<-c(10,15)

  hasPathway<-!is.null(hvgPathways)

  hasPC1Pathway<-!is.null(pc1Pathways)

  hasDiffGenes<-!is.null(plotData$scesMerge@metadata$diffFC$genes)

  hasDiffPathway<-!is.null(plotData$scesMerge@metadata$diffFC$pathways)
}else{
  hasComparison<-FALSE
  hasPathway<-FALSE
  hasPC1Pathway<-FALSE
  hasDiffGenes<-FALSE
  hasDiffPathway<-FALSE
}

Overview

scRNABatchQC provides both metrics and diagnostic graphics to assess the similarity/difference on technical factors, expression profiles, and biological features across multiple scRNA-seq experiments, which helps identify potential batch effects. scRNABatchQC report includes five sections:

QC Summary - Overview of quality control in each scRNA-seq experiment.
Technical View - Technical features across multiple scRNA-seq experiments.
Biological View - Biological features across multiple scRNA-seq experiments.
Expression Similarity - Expression similarity across multiple scRNA-seq experiments.
Pairwise Difference - Genes and pathways that drive the pairwise difference across multiple scRNA-seq experiments.

QC Summary

curTable<-plotData$tableSummary
print(kable(curTable, caption=tabRef("tableSummary", "Summary of Quality Metrics."), row.names=FALSE) %>% 
      kable_styling() %>% 
          row_spec(which(curTable$Valid=="FALSE"), color = "black", background = "bisque") %>% 
          htmltools::HTML())

r tabRef("tableSummary") contains a summary of quality metrics generated by the scRNABatchQC package.

A short description of Table 1 metrics is provided below:

EID - The name of the experiment
Count - The total number of counts in the experiment
Cell - The total number of cells in the experiment
Gene - The total number of genes in the experiment
R-Count - The range of the total number of counts in a cell across all cells [min-median-max].
R-Gene - The range of the total number of genes identified in a cell across all cells [min-median-max]
mtRNA - The maximum percentage of reads mapped to mitochondria-encoded genes in all cells
rRNA - The maximum percentage of reads mapped to genes encoded for ribosomal proteins in all cells
F-Count - The number of cells filtered by the total number of counts
C-Count - The cutoff to remove the cell if the total counts of the cell is less than C-Count
F-Gene - The number of cells filtered by the total number of detected genes
C-Gene - The cutoff to remove the cell if the total genes of the cell is less than C-Gene
F-mt - The number of cells filtered by the mtRNA percentage
C-mt - The cutoff to remove the cell if the percentage of mtRNA is larger than C-mt
F - The total number of cells filtered by either counts, genes, or mtRNA criteria
V-Count - The number of cell passed filtering
Valid - If the sample was unvalid due to extremly low cell count (less than 10)

unvalidSampleNames<-names(unvalid)
cat('<p style="color:red">WARNING: The following samples may not be included in some of QC analysis due to extremely low cell counts:',  paste0(unvalidSampleNames, collapse=', '), "<p>")

Technical View

This section checks 11 technical features and aggregates results across multiple scRNA-seq experiments, which include the total number of counts, the number of expressed genes, percentage of reads mapped to mitochondria-encoded genes and ribosomal proteins, the expression cumulative plot, the detection rate, the mean-variance trend, and the variance explained by technical features.

print(plotDensity(allsces,feature="total_counts", featureLabel="The total number of counts", scolors=scolors, lineSize=plotData$lineSize))

r figRef("density_total_counts") shows the distribution of the total number of counts in a cell in each scRNA-seq experiment. Different experiments are denoted by different colors. Cells with very low number of read counts, suggesting RNAs are not efficiently captured, are considered as low quality and removed from the downstream analysis. Different count distributions across experiments are indicative of the presence of batch effects.

print(plotDensity(allsces,feature="total_features", featureLabel="The number of expressed genes", scolors=scolors,lineSize=plotData$lineSize))

r figRef("density_total_features") shows the distribution of the number of expressed genes in a cell in each scRNA-seq experiment. Different experiments are denoted by different colors. Cells with very few expressed genes are considered as low quality and removed from the downstream analysis. Different distributions on gene numbers across experiments are indicative of the presence of batch effects.

print(plotDensity(allsces,feature="pct_counts_Mt", featureLabel="The proportion of reads mapped to mitochondria-encoded genes", scolors=scolors,lineSize=plotData$lineSize))

r figRef("density_pct_counts_Mt") shows the distribution of the proportion of reads mapped to mitochondria-encoded genes in a cell in each scRNA-seq experiment. Different experiments are denoted by different colors. A cell with high proportion of reads mapped to mitochondria-encoded genes is considered as low quality and removed from the downstream analysis. Caution should be paid when one experiment has higher proportion of mitochondrial reads than others.

print(plotDensity(allsces,feature="pct_counts_rRNA", featureLabel="The proportion of reads mapped to genes encoded for ribosomal proteins", scolors=scolors,lineSize=plotData$lineSize))

r figRef("plotRibosomalRNA") shows the distribution of the proportion of reads mapped to genes encoded for ribosomal proteins in a cell in each scRNA-seq experiment, which is denoted by different color. Caution should be paid when one experiment has higher proportion of ribosomal gene reads than others.

print(plotGeneCountDistribution(allsces, scolors,lineSize=plotData$lineSize))

r figRef("genesCountdistribution") shows the cumulative expression for the top 500 highly expressed genes. The plot is generated based on the average count of each gene, that is, the top 500 highly expressed genes are those with the highest average count. This plot describes how reads are distributed across genes in each scRNA-seq experiment. A cumulative graph with rapid increase at the beginning suggests a library with low complexity where very few genes consume most reads. In contrast, a cumulative graph with slow increase at the beginning indicates that reads are more evenly distributed across genes. Different cumulative expression plots across multiple scRNA-seq experiments are indicative of the presence of batch effects.

print(plotAveCountVSdetectRate(allsces, scolors,lineSize=plotData$lineSize))

r figRef("aveCountVSdetectRate") shows the percentage of cells expressing each gene plotted against its log-average count. Generally the detection rate for a gene depends on its expression. Different detection rates across scRNA-seq experiments are indicative of the presence of batch effects.

liuqivandy/scRNABatchQC documentation built on March 24, 2021, 11:01 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

liuqivandy/scRNABatchQC
a package for quality control of multiple single cell RNAseq data

In liuqivandy/scRNABatchQC: a package for quality control of multiple single cell RNAseq data

Overview

QC Summary

Technical View

R Package Documentation

Browse R Packages

We want your feedback!

liuqivandy/scRNABatchQC a package for quality control of multiple single cell RNAseq data

In liuqivandy/scRNABatchQC: a package for quality control of multiple single cell RNAseq data

Overview

QC Summary

Technical View

R Package Documentation

Browse R Packages

We want your feedback!

liuqivandy/scRNABatchQC
a package for quality control of multiple single cell RNAseq data