all_times <- list()  # store the time for each chunk
knitr::knit_hooks$set(time_it = local({
  now <- NULL
  function(before, options) {
    if (before) {
      now <<- Sys.time()
    } else {
      res <- difftime(Sys.time(), now, units = "secs")
      all_times[[options$label]] <<- res
    }
  }
}))
knitr::opts_chunk$set(
  tidy = TRUE,
  tidy.opts = list(width.cutoff = 95),
  message = FALSE,
  warning = FALSE,
  time_it = TRUE
)

Statistics Functions

scCustomize contains a couple simple helper functions to return metrics of interest.

For this vignette I will be utilizing pbmc3k dataset from the SeuratData package.

# Load Packages
library(ggplot2)
library(patchwork)
library(dplyr)
library(magrittr)
library(Seurat)
library(scCustomize)
library(qs)

# Load example dataset for tutorial
pbmc <- pbmc3k.SeuratData::pbmc3k.final
# Update pbmc check
pbmc <- UpdateSeuratObject(pbmc)

Now let's add some extra meta data for use with tutorial

# Add mito and ribo data
pbmc <- Add_Mito_Ribo(object = pbmc, species = "human")

# Add random sample and group variables
pbmc$orig.ident <- sample(c("sample1", "sample2", "sample3", "sample4"), size = ncol(pbmc), replace = TRUE)
pbmc@meta.data$group[pbmc@meta.data$orig.ident == "sample1" | pbmc@meta.data$orig.ident == "sample3"] <- "Group 1"
pbmc@meta.data$group[pbmc@meta.data$orig.ident == "sample2" | pbmc@meta.data$orig.ident == "sample4"] <- "Group 2"


pbmc@meta.data$treatment[pbmc@meta.data$orig.ident == "sample1"] <- "Control"
pbmc@meta.data$treatment[pbmc@meta.data$orig.ident == "sample2" | pbmc@meta.data$orig.ident == "sample4" | pbmc@meta.data$orig.ident == "sample3"] <- "Treated"

# Add dummy module score 
pbmc <- AddModuleScore(object = pbmc, features = list(c("CD3E", "CD4", "THY1", "TCRA")), name = "module_score")

Cells Per Identity

It can be really helpful to know the number and percentage of cells per identity/cluster during analysis.
scCustomize provides the Cluster_Stats_All_Samples function to make this easy.

cluster_stats <- Cluster_Stats_All_Samples(seurat_object = pbmc)

cluster_stats
cluster_stats <- Cluster_Stats_All_Samples(seurat_object = pbmc)

cluster_stats %>%
  kableExtra::kbl() %>%
  kableExtra::kable_styling(bootstrap_options = c("bordered", "condensed", "responsive", "striped")) 

By default Cluster_Stats_All_Samples uses "orig.ident" as the group variable but user can specify any @meta.data slot variable.

cluster_stats <- Cluster_Stats_All_Samples(seurat_object = pbmc, group_by_var = "group")

cluster_stats
cluster_stats <- Cluster_Stats_All_Samples(seurat_object = pbmc, group_by_var = "group")

cluster_stats %>%
  kableExtra::kbl() %>%
  kableExtra::kable_styling(bootstrap_options = c("bordered", "condensed", "responsive", "striped"))  

Plotting Cluster/Identity Proportions

Understanding the proportion of cells per cluster and how that may change across a group can be important aspect of single cell analysis. scCustomize contains function Proportion_Plot to provide some visualization of cell proportions.

Plotting proportionality can be tricky especially when dealing with clusters of very disparate sizes (especially small clusters) so plese be aware to examine raw data as well (see Cluster_Stats_All_Samples()).

Also note that when splitting plot by group there are now error bars on these plots for interpretation of variance and other plotting methods should be used to convery such information.

Basic Proportion Plots

Proportion_Plot can either return a bar graph or pie graph depending on parameter value for

Proportion_Plot(seurat_object = pbmc, plot_type = "bar")

Proportion_Plot(seurat_object = pbmc, plot_type = "pie")
p1 <- Proportion_Plot(seurat_object = pbmc, plot_type = "bar")

p2 <- Proportion_Plot(seurat_object = pbmc, plot_type = "pie")

wrap_plots(p1, p2)

Plot across groups

Like other plots you can also use Proportion_Plot to split plot across any categorical meta.data variable.

Proportion_Plot(seurat_object = pbmc, plot_type = "bar", split.by = "treatment")
Proportion_Plot(seurat_object = pbmc, plot_type = "pie", split.by = "treatment")

Plot raw cell numbers

The default for Proportion_Plot is to plot the proportion of cells and not the raw number of cells per identity because that can be deceptive. However, in some cases it can be useful and so users can change the plot_scale parameter to plot raw numbers of cells instead of proportions.

Proportion_Plot(seurat_object = pbmc, plot_type = "bar", split.by = "treatment", plot_scale = "count")

Percent of Cells Expressing Feature(s)

It can also be informative to understand the percent of cells/nuclei that express a given feature or set of features.
scCustomize provides the Percent_Expressing function to return these results.

percent_express <- Percent_Expressing(seurat_object = pbmc, features = c("CD4", "CD8A"))
percent_express <- Percent_Expressing(seurat_object = pbmc, features = c("CD4", "CD8A"))

percent_express %>%
  kableExtra::kbl() %>%
  kableExtra::kable_styling(bootstrap_options = c("bordered", "condensed", "responsive", "striped"))  

Change grouping variable

By default the function groups expression across @active.ident (see above) but user can specify difference variable if desired.

percent_express <- Percent_Expressing(seurat_object = pbmc, features = c("CD4", "CD8A"), group_by = "orig.ident")
percent_express <- Percent_Expressing(seurat_object = pbmc, features = c("CD4", "CD8A"), group_by = "orig.ident")

percent_express %>%
  kableExtra::kbl() %>%
  kableExtra::kable_styling(bootstrap_options = c("bordered", "condensed", "responsive", "striped"))  

Split within groups

User can also supply a split_by variable to quantify expression within group split by meta data variable

percent_express <- Percent_Expressing(seurat_object = pbmc, features = c("CD4", "CD8A"), split_by = "group")
percent_express <- Percent_Expressing(seurat_object = pbmc, features = c("CD4", "CD8A"), split_by = "group")

percent_express %>%
  kableExtra::kbl() %>%
  kableExtra::kable_styling(bootstrap_options = c("bordered", "condensed", "responsive", "striped"))  

Set threshold of expression

Users can also supply a threshold value to quantify percent of cells expressing feature(s) above a certain threshold (be sure to note which slot is being used to quantify expression when setting thresholds).
NOTE: Percent_Expressing currently only supports single threshold across all features

percent_express <- Percent_Expressing(seurat_object = pbmc, features = c("CD4", "CD8A"), threshold = 2)
percent_express <- Percent_Expressing(seurat_object = pbmc, features = c("CD4", "CD8A"), threshold = 2)

percent_express %>%
  kableExtra::kbl() %>%
  kableExtra::kable_styling(bootstrap_options = c("bordered", "condensed", "responsive", "striped"))  

Calculate Summary Median Values

scCustomize contains function Median_Stats to quickly calculate the medians for basic QC stats (Genes/, UMIs/, % Mito/Cell).

Basic Use

By default Median_Stats will calculate medians for the following meta data columns (if present): "nCount_RNA", "nFeature_RNA", "percent_mito", "percent_ribo", "percent_mito_ribo".

median_stats <- Median_Stats(seurat_object = pbmc, group_by_var = "orig.ident")
median_stats <- Median_Stats(seurat_object = pbmc, group_by_var = "orig.ident")

median_stats %>%
  kableExtra::kbl() %>%
  kableExtra::kable_styling(bootstrap_options = c("bordered", "condensed", "responsive", "striped"))  

Additional Variables

In addition to default variables, users can supply their own additional meta data columns to calculate medians using the median_var parameter.
NOTE: The meta data column must be in numeric format (e.g., integer, numeric, dbl)

median_stats <- Median_Stats(seurat_object = pbmc, group_by_var = "orig.ident", median_var = "module_score1")
median_stats <- Median_Stats(seurat_object = pbmc, group_by_var = "orig.ident", median_var = "module_score1")

median_stats %>%
  kableExtra::kbl() %>%
  kableExtra::kable_styling(bootstrap_options = c("bordered", "condensed", "responsive", "striped"))  

Calculate Median Absolute Deviations

In addition to calculating the median values scCustomize includes function to calculate the median absolute deviation for each of those features with function MAD_Stats. By setting the parameter mad_num the function will retrun the MAD*mad_num.

mad <- MAD_Stats(seurat_object = pbmc, group_by_var = "orig.ident", mad_num = 2)
mad <- MAD_Stats(seurat_object = pbmc, group_by_var = "orig.ident", mad_num = 2)

mad %>%
  kableExtra::kbl() %>%
  kableExtra::kable_styling(bootstrap_options = c("bordered", "condensed", "responsive", "striped"))  

Plotting Median Data

scCustomize also contains series of functions for plotting the results of these median calculations:

Plot_Median_Genes(seurat_object = pbmc)

Plot_Median_Genes(seurat_object = pbmc, group_by = "group")

Plot_Median_Other(seurat_object = pbmc, median_var = "module_score1", group_by = "group")
p1 <- Plot_Median_Genes(seurat_object = pbmc)

p2 <- Plot_Median_Genes(seurat_object = pbmc, group_by = "group")

p3 <- Plot_Median_Other(seurat_object = pbmc, median_var = "module_score1", group_by = "group")

patchwork::wrap_plots(p1, p2, p3, ncol = 3)


samuel-marsh/scCustomize documentation built on Dec. 20, 2024, 7:41 a.m.