library(RColorBrewer)
library(knitr)
library(kableExtra)
library(reshape2)
library(ggplot2)
library(data.table)
library(DT)
library(RCurl)
library(htmltools)

knitr::opts_chunk$set(echo = TRUE, autodep=TRUE, cache.comments=FALSE, message=FALSE, warning=FALSE, dpi=300)

addLinkTag<-function(text,link) {
  result<-paste0("<a href='",link,"' target='_blank'>",text,"</a>")
  return(result)
}

figRef <- local({
  tag <- numeric()
  created <- logical()
  used <- logical()
  function(label, caption, prefix = options("figcap.prefix"), 
           sep = options("figcap.sep"), prefix.highlight = options("figcap.prefix.highlight"), trunk.eval=TRUE) {
    if(trunk.eval){ 
      i <- which(names(tag) == label)
      if (length(i) == 0) {
        i <- length(tag) + 1
        tag <<- c(tag, i)
        names(tag)[length(tag)] <<- label
        used <<- c(used, FALSE)
        names(used)[length(used)] <<- label
        created <<- c(created, FALSE)
        names(created)[length(created)] <<- label
      }
      if (!missing(caption)) {
        created[label] <<- TRUE
        paste0(prefix.highlight, prefix, " ", i, sep, prefix.highlight, 
               " ", caption)
      } else {
        used[label] <<- TRUE
        paste(prefix, tag[label])
      }
    }
  }
})
options(figcap.prefix = "Figure", figcap.sep = ":", figcap.prefix.highlight = "**")

tabRef <- local({
  tag <- numeric()
  created <- logical()
  used <- logical()
  function(label, caption, prefix = options("tabcap.prefix"), 
           sep = options("tabcap.sep"), prefix.highlight = options("tabcap.prefix.highlight"), trunk.eval=TRUE) {
    if(trunk.eval){
      i <- which(names(tag) == label)
      if (length(i) == 0) {
        i <- length(tag) + 1
        tag <<- c(tag, i)
        names(tag)[length(tag)] <<- label
        used <<- c(used, FALSE)
        names(used)[length(used)] <<- label
        created <<- c(created, FALSE)
        names(created)[length(created)] <<- label
      }
      if (!missing(caption)) {
        created[label] <<- TRUE
        paste0(prefix.highlight, prefix, " ", i, sep, prefix.highlight, 
              " ", caption)
      } else {
        used[label] <<- TRUE
        paste(prefix, tag[label])
      }
    }
  }
})
options(tabcap.prefix = "Table", tabcap.sep = ":", tabcap.prefix.highlight = "**")

getHeatmapHeight<-function(hdata){
  max(10, nrow(hdata) / 10)
}

isHeatmapFigure<-function(datam){
  if (is.null(datam)){
    return(FALSE)
  }
  return (ncol(datam)>1 & nrow(datam)>1)
}

getHeatmapCaption<-function(datam, feature){
  if (isHeatmapFigure(datam)){
    return (figRef(feature))
  }else{
    return (tabRef(feature))
  }
}

plotHeatmap<-function(datam,feature=c("hvg","hvgPath","PCg","PCgPath","diffg","diffgPath")){
  keytitle<-switch(feature, "hvg"="zval","hvgPath"="-logFDR","PCg"="logFC","PCgPath"="-logFDR","diffg"="logFC","diffgPath"="-logFDR")
  maintitle<-switch(feature, "hvg"="Highly variable genes","hvgPath"="Pathways enriched in HVGs","PCg"="PC-related genes","PCPath"="Pathways enriched in PC-related genes","diffg"="Differentially expressed genes","diffgPath"="Pathways enriched in DEGs")
  if (isHeatmapFigure(datam)){
    if (feature== "hvg" | feature== "PCg" | feature=="diffg"){ 
      colv=heatmapColors
      } 
    else {
      colv=heatmapPathwayColors
    }
    heatmap.2(datam, margins = heatmapMargins,  keysize=heatmapKeysize, col=colv,main=maintitle,key.title=keytitle,key.xlab="",key.ylab="",cexCol=1)
  } 
  else {
    warning("there is only one dataset/comparison or one gene/pathway detected and heatmap figure is not generated")
      print(kable(as.matrix(datam),caption=tabRef(feature, maintitle),col.names=paste0(colnames(datam),"(",keytitle,")")) %>% 
              kable_styling() %>%
              htmltools::HTML())
    }
}
#library(scRNABatchQC)
#load("test.rdata")
plotData<-params$data
allsces <- plotData$sces

sces<-list()
unvalid<-list()

for (p in names(allsces)) {
  sce<-allsces[[p]]
  if(sce@metadata$valid){
    sces[p] = sce
  }else{
    unvalid[p] = sce
  }
}

if(length(allsces) < 9){
  scolors=brewer.pal(max(3, length(allsces)), "Set1")[1:length(allsces)]
}else{
  scolors=rainbow(length(allsces))
}
names(scolors)<-names(allsces)

hasValid<-length(sces) > 0
hasNotValid<-length(unvalid) > 0

if(hasValid){
  hvgBiologicalSimilarity <- .getBiologicalSimilarity(sces, objectName = "hvg", valueName = "zval")
  hvgPathways <- .getMultiplePathways(sces, metaObjectName = "hvgPathway")
  pc1geneBiologicalSimilarity <- .getBiologicalSimilarity(sces, objectName = "pc1genes", valueName = "logFC")
  pc1Pathways <- .getMultiplePathways(sces, metaObjectName = "pc1Pathway")

  hasComparison<-length(sces) > 1

  heatmapKeysize<-1
  heatmapColors<-bluered(75)
  heatmapPathwayColors<-colorpanel(75,low="white",high="red")
  heatmapMargins<-c(10,15)

  hasPathway<-!is.null(hvgPathways)

  hasPC1Pathway<-!is.null(pc1Pathways)

  hasDiffGenes<-!is.null(plotData$scesMerge@metadata$diffFC$genes)

  hasDiffPathway<-!is.null(plotData$scesMerge@metadata$diffFC$pathways)
}else{
  hasComparison<-FALSE
  hasPathway<-FALSE
  hasPC1Pathway<-FALSE
  hasDiffGenes<-FALSE
  hasDiffPathway<-FALSE
}

Overview

scRNABatchQC provides both metrics and diagnostic graphics to assess the similarity/difference on technical factors, expression profiles, and biological features across multiple scRNA-seq experiments, which helps identify potential batch effects. scRNABatchQC report includes five sections:

QC Summary

curTable<-plotData$tableSummary
print(kable(curTable, caption=tabRef("tableSummary", "Summary of Quality Metrics."), row.names=FALSE) %>% 
      kable_styling() %>% 
          row_spec(which(curTable$Valid=="FALSE"), color = "black", background = "bisque") %>% 
          htmltools::HTML())

r tabRef("tableSummary") contains a summary of quality metrics generated by the scRNABatchQC package.

A short description of Table 1 metrics is provided below:

unvalidSampleNames<-names(unvalid)
cat('<p style="color:red">WARNING: The following samples may not be included in some of QC analysis due to extremely low cell counts:',  paste0(unvalidSampleNames, collapse=', '), "<p>")

Technical View

This section checks 11 technical features and aggregates results across multiple scRNA-seq experiments, which include the total number of counts, the number of expressed genes, percentage of reads mapped to mitochondria-encoded genes and ribosomal proteins, the expression cumulative plot, the detection rate, the mean-variance trend, and the variance explained by technical features.


print(plotDensity(allsces,feature="total_counts", featureLabel="The total number of counts", scolors=scolors, lineSize=plotData$lineSize))

r figRef("density_total_counts") shows the distribution of the total number of counts in a cell in each scRNA-seq experiment. Different experiments are denoted by different colors. Cells with very low number of read counts, suggesting RNAs are not efficiently captured, are considered as low quality and removed from the downstream analysis. Different count distributions across experiments are indicative of the presence of batch effects.


print(plotDensity(allsces,feature="total_features", featureLabel="The number of expressed genes", scolors=scolors,lineSize=plotData$lineSize))

r figRef("density_total_features") shows the distribution of the number of expressed genes in a cell in each scRNA-seq experiment. Different experiments are denoted by different colors. Cells with very few expressed genes are considered as low quality and removed from the downstream analysis. Different distributions on gene numbers across experiments are indicative of the presence of batch effects.


print(plotDensity(allsces,feature="pct_counts_Mt", featureLabel="The proportion of reads mapped to mitochondria-encoded genes", scolors=scolors,lineSize=plotData$lineSize))

r figRef("density_pct_counts_Mt") shows the distribution of the proportion of reads mapped to mitochondria-encoded genes in a cell in each scRNA-seq experiment. Different experiments are denoted by different colors. A cell with high proportion of reads mapped to mitochondria-encoded genes is considered as low quality and removed from the downstream analysis. Caution should be paid when one experiment has higher proportion of mitochondrial reads than others.


print(plotDensity(allsces,feature="pct_counts_rRNA", featureLabel="The proportion of reads mapped to genes encoded for ribosomal proteins", scolors=scolors,lineSize=plotData$lineSize))

r figRef("plotRibosomalRNA") shows the distribution of the proportion of reads mapped to genes encoded for ribosomal proteins in a cell in each scRNA-seq experiment, which is denoted by different color. Caution should be paid when one experiment has higher proportion of ribosomal gene reads than others.


print(plotGeneCountDistribution(allsces, scolors,lineSize=plotData$lineSize))

r figRef("genesCountdistribution") shows the cumulative expression for the top 500 highly expressed genes. The plot is generated based on the average count of each gene, that is, the top 500 highly expressed genes are those with the highest average count. This plot describes how reads are distributed across genes in each scRNA-seq experiment. A cumulative graph with rapid increase at the beginning suggests a library with low complexity where very few genes consume most reads. In contrast, a cumulative graph with slow increase at the beginning indicates that reads are more evenly distributed across genes. Different cumulative expression plots across multiple scRNA-seq experiments are indicative of the presence of batch effects.


print(plotAveCountVSdetectRate(allsces, scolors,lineSize=plotData$lineSize))

r figRef("aveCountVSdetectRate") shows the percentage of cells expressing each gene plotted against its log-average count. Generally the detection rate for a gene depends on its expression. Different detection rates across scRNA-seq experiments are indicative of the presence of batch effects.





liuqivandy/scRNABatchQC documentation built on March 24, 2021, 11:01 p.m.