library(Seurat)
library(dplyr)
library(cowplot)
library(RColorBrewer)
library(ggplot2)
library(knitr)
library(kableExtra)
library(SingleCellExperiment)
library(scater)
library(gridExtra)
library(grid)
library(ggpubr)
library(patchwork)
library(singleCellTK)
if(!exists("headingDR")) headingDR <- "#"
if(!exists("headingDR2")) headingDR2 <- "##"

cat(headingDR, " PCA {.tabset .tabset-fade}\n\n")

To reduce the adverse effects of the inherent technical noise in single-cell data on downstream clustering, reduced dimensions are computed where each dimension represents a set of correlated features. However, an important task is to select the number of components that should be utilized by the downstream methods. To select the number of principal components that should be used in the downstream analysis, JackStraw and Elbow plot provide convenience in the selection of the significant components that contain most of the variability.

pcaParams <- list(
  inSCE = data,
  nPCs = pc.count,
  verbose = FALSE)
#data <- do.call("runSeuratPCA", pcaParams)
data <- runSeuratPCA(inSCE = data, nPCs = pc.count, verbose = FALSE)
jackStrawParams <- list(
  inSCE = data,
  useAssay = "seuratScaledData",
  dims = pc.count,
  numReplicate = 100,
  propFreq = 0.025
)
#data <- do.call("runSeuratJackStraw", jackStrawParams)
data <- runSeuratJackStraw(data, useAssay = "seuratScaledData", dims = pc.count)
cat(headingDR2, " Elbow Plot {-}\n\n")
cat("Elbow plot is plotted between the principal components (x-axis) and their standard deviation (y-axis), where a clear elbow between the components highlights the cut-off for the no. of components that should be selected as significant for downstream analysis.")
ElbowPlot <- plotSeuratElbow(
  inSCE = data,
  ndims = pc.count,
  interactive = FALSE
)
ElbowPlot
if(!is.null(metadata(data)$seurat$obj@reductions$pca@jackstraw)){
  PC_Matrix <- metadata(data)$seurat$obj@reductions$pca@jackstraw@overall.p.values

  significant_PC <- which.min(PC_Matrix[,2] < 0.01) - 1
if (!exists("significant_PC") || significant_PC == 0){
  significant_PC <- pc.count
  }
}else{
  significant_PC <- pc.count
}
cat("> PCA was applied to the scaled subset matrix that contained the ", variable.features, " top most variable genes to transform the data into reduced dimensions for clustering in the downstream analysis. Significant principal components will show a strong enrichment of genes with low p-values from the JackStraw plot or PC cutoff could also be determined roughly by looking at where a clear elbow is depicted in the elbow plot. The first ", significant_PC, " components were selected as the cutoff. These ", significant_PC, " principal components were determined to hold the true dimensionality of the dataset and will be used in the subsequent clustering analysis.")
cat(headingDR2, " JackStraw Plot {-}\n\n")
cat("JackStraw computes significance values for all genes in all principal components and plots these genes as a QQ-plot for each component and compares them with the uniform distribution (shown in dotted line). Overall p-values are also computed for each component, where statistically significant principal components can be selected for downstream analysis. Visually, components that lie above the dotted uniform distribution line should be selected.")
JackStrawPlot <- plotSeuratJackStraw(
  inSCE = data,
  dims = pc.count,
  xmax = 0.05) + guides(col = guide_legend(ncol = 1)) + theme(
    legend.text = element_text(size = 6), legend.key.size = unit(0.2, "cm"))
JackStrawPlot
PC.separated.height <- 3.5 * ceiling(pc.count/3)
pcaParams$inSCE <- NULL
pcaParams$significant_PC <- significant_PC
metadata(data)$seurat$sctk$report$pcaParams <- pcaParams
cat("> PCA was applied to the scaled subset matrix that contained the ", variable.features, " top most variable genes to transform the data into reduced dimensions for clustering in the downstream analysis. Significant principal components will show a strong enrichment of genes with low p-values from the JackStraw plot or PC cutoff could also be determined roughly by looking at where a clear elbow is depicted in the elbow plot. The first ", significant_PC, " components were selected as the cutoff. These ", significant_PC, " principal components were determined to hold the true dimensionality of the dataset and will be used in the subsequent clustering analysis.")
cat(headingDR2, " PC Heatmap {-}\n\n")
cat("Heatmaps are plotted for each component that highlight the primary sources of heterogeneity in terms of features in the data. Both cells and features are ordered according to their PCA scores and this heatmap can be utilized to explore highly correlated feature sets in the data.")
pcHeatmapParams <- list(
  inSCE = data,
  useAssay = "seuratScaledData",
  useReduction = "pca",
  dims = pc.count,
  balanced = TRUE,
  ncol = 4,
  fast = FALSE
)
#heatmap <- do.call("runSeuratHeatmap", pcHeatmapParams)  
heatmap <- runSeuratHeatmap(inSCE = data, useAssay = "seuratScaledData", useReduction = "pca", dims = pc.count, balanced = TRUE, ncol = 4, fast = FALSE)
heatmap
pcHeatmapParams$inSCE <- NULL
metadata(data)$seurat$plots$heatmap <- pcHeatmapParams
cat("> PCA was applied to the scaled subset matrix that contained the ", variable.features, " top most variable genes to transform the data into reduced dimensions for clustering in the downstream analysis. Significant principal components will show a strong enrichment of genes with low p-values from the JackStraw plot or PC cutoff could also be determined roughly by looking at where a clear elbow is depicted in the elbow plot. The first ", significant_PC, " components were selected as the cutoff. These ", significant_PC, " principal components were determined to hold the true dimensionality of the dataset and will be used in the subsequent clustering analysis.")
cat("# Session Information\n\n")
sessionInfo()


compbiomed/singleCellTK documentation built on May 8, 2024, 6:58 p.m.