all_times <- list() # store the time for each chunk knitr::knit_hooks$set(time_it = local({ now <- NULL function(before, options) { if (before) { now <<- Sys.time() } else { res <- difftime(Sys.time(), now, units = "secs") all_times[[options$label]] <<- res } } })) knitr::opts_chunk$set( tidy = TRUE, tidy.opts = list(width.cutoff = 95), message = FALSE, warning = FALSE, time_it = TRUE )
While the default plots from Seurat and other packages are often very good they are often modified from their original outputs after plotting. scCustomize seeks to simplify this process and enhance some of the default visualizations.
Even simple things like adding the same two ggplot2 themeing options to every plot can be simplified for end user (and enhance reproducibility and code errors) by wrapping them inside a new function.
For this tutorial, I will be utilizing microglia data from Marsh et al., 2022 (Nature Neuroscience) the mouse microglia (Figure 1) referred to as marsh_mouse_micro
and the human post-mortem snRNA-seq (Figure 3) referred to as marsh_human_pm
in addition to the pbmc3k dataset from SeuratData package.
library(ggplot2) library(dplyr) library(magrittr) library(patchwork) library(viridis) library(Seurat) library(scCustomize) library(qs) # Load Marsh et al., 2022 datasets marsh_mouse_micro <- qread(file = "assets/marsh_2020_micro.qs") marsh_human_pm <- qread(file = "assets/marsh_human_pm.qs") # Load pbmc dataset pbmc <- pbmc3k.SeuratData::pbmc3k.final
# Update pbmc check pbmc <- UpdateSeuratObject(pbmc) MS4A1 <- WhichCells(object = pbmc, expression = MS4A1 > 3) GZMB <- WhichCells(object = pbmc, expression = GZMB > 3)
We'll add some random meta data variables to pbmc data form use in this vignette
pbmc$sample_id <- sample(c("sample1", "sample2", "sample3", "sample4", "sample5", "sample6"), size = ncol(pbmc), replace = TRUE) pbmc$treatment <- sample(c("Treatment1", "Treatment2", "Treatment3", "Treatment4"), size = ncol(pbmc), replace = TRUE)
marsh_mouse_micro <- FindVariableFeatures(object = marsh_mouse_micro, selection.method = "vst", nfeatures = 497) # marsh_mouse_micro <- UpdateSeuratObject(object = marsh_mouse_micro) # marsh_human_pm <- UpdateSeuratObject(marsh_human_pm) # Add misc data to objects marsh_mouse_micro[["sample_id"]] <- paste0("Sample ", marsh_mouse_micro@meta.data$orig.ident) marsh_mouse_micro@meta.data$sample_id <- factor(marsh_mouse_micro@meta.data$sample_id, levels = c("Sample 1", "Sample 2", "Sample 3", "Sample 4", "Sample 5", "Sample 6", "Sample 7", "Sample 8", "Sample 9", "Sample 10", "Sample 11", "Sample 12")) exAM_genes <- c("Rgs1", "Hist2h2aa1", "Hist1h4i", "Nfkbiz", "Klf2", "Junb", "Dusp1", "Ccl3", "Hspa1a", "Hsp90aa1", "Fos", "Hspa1b", "Jun", "Jund", "Nfkbid", "Gem", "Ccl4", "Ier5", "Txnip", "Hist1h2bc", "Zfp36", "Hist1h1c", "Egr1", "Atf3", "Rhob") micro_genes <- c("P2ry12", "Fcrls", "Trem2", "Tmem119", "Cx3cr1", "Hexb", "Tgfbr1", "Sparc", "P2ry13", "Olfml3", "Adgrg1", "C1qa", "C1qb", "C1qc", "Csf1r", "Fcgr3", "Ly86", "Laptm5") marsh_mouse_micro <- AddModuleScore(object = marsh_mouse_micro, features = list(exAM_genes), name = "exAM_Score") marsh_mouse_micro <- AddModuleScore(object = marsh_mouse_micro, features = list(micro_genes), name = "Microglia_Score") mouse_colors <- marsh_mouse_micro@misc$exp17_micro_colors
split.by
) but some others use the scCustomize convention so as to be universal throughout the package (i.e., Seurat=cols:scCustomize=colors_use)....
parameter to allow user to supply any of the parameters for the original Seurat (or other package) function that is being used under the hood. ...
parameter to allow user to supply any of the parameters for the original Seurat function that is being used under the hood.scCustomize allows for plotting of highly variable genes with desired number of points labeled in single function. VariableFeaturePlot_scCustom()
also contains several additional parameters for customizing visualization.
# Default scCustomize plot VariableFeaturePlot_scCustom(seurat_object = marsh_mouse_micro, num_features = 20) # Can remove labels if not desired VariableFeaturePlot_scCustom(seurat_object = marsh_mouse_micro, num_features = 20, label = FALSE) # Repel labels VariableFeaturePlot_scCustom(seurat_object = marsh_mouse_micro, num_features = 20, repel = TRUE) # Change the scale of y-axis from linear to log10 VariableFeaturePlot_scCustom(seurat_object = marsh_mouse_micro, num_features = 20, repel = TRUE, y_axis_log = TRUE)
p1 <- VariableFeaturePlot_scCustom(seurat_object = marsh_mouse_micro, num_features = 20) p2 <- VariableFeaturePlot_scCustom(seurat_object = marsh_mouse_micro, num_features = 20, label = FALSE) p3 <- VariableFeaturePlot_scCustom(seurat_object = marsh_mouse_micro, num_features = 20, repel = TRUE) p4 <- VariableFeaturePlot_scCustom(seurat_object = marsh_mouse_micro, num_features = 20, repel = TRUE, y_axis_log = TRUE) wrap_plots(p1, p2, p3, p4) + plot_annotation(tag_levels = "A")
In addition to labeling the top variable genes VariableFeaturePlot_scCustom
also supports labeling a custom set of genes using the custom_features
parameter.
For ease in evaluating PCA results scCustomize provides function PC_Plotting()
which returns both PC heatmap and Feature Loading plot in single patchwork layout.
PC_Plotting(seurat_object = marsh_mouse_micro, dim_number = 2)
This function can be easily enhanced using iterative version Iterate_PC_Loading_Plots()
to return a PDF document that contains plots for all desired PCs within object. See function manual and Iterative Plotting Vignette for more info.
scCustomize has few functions that improve on the default plotting options/parameters from Seurat and other packages.
The default plots fromSeurat::FeaturePlot()
are very good but I find can be enhanced in few ways that scCustomize sets by default.
Issues with default Seurat settings:
order = FALSE
is the default, resulting in potential for non-expressing cells to be plotted on top of expressing cells.# Set color palette pal <- viridis(n = 10, option = "C", direction = -1) # Create Plots FeaturePlot(object = marsh_mouse_micro, features = "Jun") FeaturePlot(object = marsh_mouse_micro, features = "Jun", order = T) FeaturePlot(object = marsh_mouse_micro, features = "Jun", cols = pal, order = T)
# Set color palette pal <- viridis(n = 10, option = "C", direction = -1) # Create Plots p1 <- FeaturePlot(object = marsh_mouse_micro, features = "Jun") p2 <- FeaturePlot(object = marsh_mouse_micro, features = "Jun", order = T) p3 <- FeaturePlot(object = marsh_mouse_micro, features = "Jun", cols = pal, order = T) wrap_plots(p1, p2, p3, ncol = 3) + plot_annotation(tag_levels = "A")
FeaturePlot_scCustom
solves these issues# Set color palette pal <- viridis(n = 10, option = "D") # Create Plots FeaturePlot_scCustom(seurat_object = marsh_mouse_micro, features = "Jun", order = F) FeaturePlot_scCustom(seurat_object = marsh_mouse_micro, features = "Jun") FeaturePlot_scCustom(seurat_object = marsh_mouse_micro, features = "Jun", colors_use = pal)
# Set color palette pal <- viridis(n = 10, option = "D") # Create Plots p1 <- FeaturePlot_scCustom(seurat_object = marsh_mouse_micro, features = "Jun", order = F) p2 <- FeaturePlot_scCustom(seurat_object = marsh_mouse_micro, features = "Jun") p3 <- FeaturePlot_scCustom(seurat_object = marsh_mouse_micro, features = "Jun", colors_use = pal) wrap_plots(p1, p2, p3, ncol = 3) + plot_annotation(tag_levels = "A")
Sometimes order=TRUE
can be distracting though and so can always set it to FALSE
In some cases (especially likely in snRNA-seq), some of the low expression may simply represent ambient RNA and therefore plotting with order=FALSE
may be advantageous for visualization (or using different plotting method).
FeaturePlot_scCustom(seurat_object = marsh_human_pm, features = "P2RY12") FeaturePlot_scCustom(seurat_object = marsh_human_pm, features = "P2RY12", order = F)
# Create Plots p1 <- FeaturePlot_scCustom(seurat_object = marsh_human_pm, features = "P2RY12") p2 <- FeaturePlot_scCustom(seurat_object = marsh_human_pm, features = "P2RY12", order = F) wrap_plots(p1, p2, ncol = 2)
As you can see above FeaturePlot_scCustom()
has the ability to plot non-expressing cells in outside of color scale used for expressing cells. However it is critical that users pay attention to the correctly setting the na_cutoff
parameter in FeaturePlot_scCustom
.
scCustomize contains a parameter called na_cutoff
which tells the function which values to plot as background. By default this is set to value that means background is treated as 0 or below. Depending on what feature, assay, or value you are interested in this parameter should be modified appropriately.
For instance if plotting module score which contains negative values you will probably want to remove the cutoff value entirely to avoid misconstruing results.
FeaturePlot_scCustom(seurat_object = marsh_mouse_micro, features = "Microglia_Score1") FeaturePlot_scCustom(seurat_object = marsh_mouse_micro, features = "Microglia_Score1", na_cutoff = NA)
p1 <- FeaturePlot_scCustom(seurat_object = marsh_mouse_micro, features = "Microglia_Score1") p2 <- FeaturePlot_scCustom(seurat_object = marsh_mouse_micro, features = "Microglia_Score1", na_cutoff = NA) wrap_plots(p1, p2, ncol = 2)
Other times you may actually want to set high na_cutoff value to enable better interpretation of the range of values in particular clusters of interest.
FeaturePlot_scCustom(seurat_object = marsh_human_pm, features = "nFeature_RNA") FeaturePlot_scCustom(seurat_object = marsh_human_pm, features = "nFeature_RNA", na_cutoff = 6000)
p1 <- FeaturePlot_scCustom(seurat_object = marsh_human_pm, features = "nFeature_RNA") p2 <- FeaturePlot_scCustom(seurat_object = marsh_human_pm, features = "nFeature_RNA", na_cutoff = 6000) wrap_plots(p1, p2, ncol = 2)
Seurat::FeaturePlot()
has additional issues when splitting by object\@meta.data variable.
FeaturePlot(object = marsh_mouse_micro, features = "P2ry12", split.by = "orig.ident")
FeaturePlot_scCustom solves this issue and allows for setting the number of columns in FeaturePlots
FeaturePlot_scCustom(seurat_object = marsh_mouse_micro, features = "P2ry12", split.by = "sample_id", num_columns = 4)
FeaturePlot_scCustom
also allows for customization of the transparency (alpha) of both the points of expressing cells and non-expressing cells using optional parameters.
FeaturePlot_scCustom(object = marsh_mouse_micro, features = "Jun", alpha_exp = 0.75)
# Create Plots p1 <- FeaturePlot_scCustom(seurat_object = marsh_mouse_micro, features = "Jun", pt.size = 1) p2 <- FeaturePlot_scCustom(seurat_object = marsh_mouse_micro, features = "Jun", pt.size = 1, alpha_exp = 0.55) wrap_plots(p1, p2, ncol = 2) + plot_annotation(tag_levels = "A")
The Nebulosa package provides really great functions for plotting gene expression via density plots.
scCustomize provides two functions to extend functionality of these plots and for ease of plotting "joint" density plots.
Currently Nebulosa only supports plotting using 1 of 5 viridis color palettes: "viridis", "magma", "cividis", "inferno", and "plasma"). Plot_Density_Custom()
changes the default palette to "magma" and also allows for use of any custom gradient.
Plot_Density_Custom(seurat_object = marsh_mouse_micro, features = "Fos") Plot_Density_Custom(seurat_object = marsh_mouse_micro, features = "Fos", custom_palette = PurpleAndYellow())
p1 <- Plot_Density_Custom(seurat_object = marsh_mouse_micro, features = "Fos") p2 <- Plot_Density_Custom(seurat_object = marsh_mouse_micro, features = "Fos", custom_palette = PurpleAndYellow()) wrap_plots(p1, p2, ncol = 2)
Often user may only want to return the "Joint" density plot when providing multiple features. Plot_Density_Joint_Only()
simplifies this requiring only single function and only returns the joint plot for the features provided.
Plot_Density_Joint_Only(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun"))
In certain situations returning a plot from two different assays within the same object may be advantageous. For instance when object contains but raw and Cell Bender corrected counts you may want to plot the same gene from both assays to view the difference. See Cell Bender Functionality vignette for more info.
cell_bender_example <- qread("assets/astro_nuc_seq.qs")
cell_bender_example <- UpdateSeuratObject(cell_bender_example)
FeaturePlot_DualAssay(seurat_object = cell_bender_example, features = "Syt1", assay1 = "RAW", assay2 = "RNA")
Often plotting many genes simultaneously using Violin plots is desired. scCustomize provides Stacked_VlnPlot()
for a more aesthetic stacked violin plot compared to stacked plots that can be made using default Seurat::VlnPlot()
.
The original version of this function was written by Ming Tang and posted on his blog. Function is included with permission and authorship.
gene_list_plot <- c("SLC17A7", "GAD2", "AQP4", "MYT1", "COL1A2", "CLDN5", "OPALIN", "CX3CR1", "CD3E") human_colors_list <- c("dodgerblue", "navy", "forestgreen", "darkorange2", "darkorchid3", "orchid", "orange", "gold", "gray") # Create Plots Stacked_VlnPlot(seurat_object = marsh_human_pm, features = gene_list_plot, x_lab_rotate = TRUE, colors_use = human_colors_list)
Stacked_VlnPlot
also supports any additional parameters that are part of Seurat::VlnPlot()
For instance splitting plot by meta data feature.
sample_colors <- c("dodgerblue", "forestgreen", "firebrick1") # Create Plots Stacked_VlnPlot(seurat_object = marsh_human_pm, features = gene_list_plot, x_lab_rotate = TRUE, colors_use = sample_colors, split.by = "orig.ident")
Depending on number of genes plotted and user preferences it may be helpful to change the vertical spacing between plots. This can be done using the plot_spacing
and spacing_unit
parameters.
# Default plot spacing (plot_spacing = 0.15 and spacing_unit = "cm") Stacked_VlnPlot(seurat_object = pbmc, features = c("CD3E", "CD14", "MS4A1", "FCER1A", "PPBP"), x_lab_rotate = TRUE) # Double the space between plots Stacked_VlnPlot(seurat_object = pbmc, features = c("CD3E", "CD14", "MS4A1", "FCER1A", "PPBP"), x_lab_rotate = TRUE, plot_spacing = 0.3)
# Default plot spacing (plot_spacing = 0.15 and spacing_unit = "cm") p1 <- Stacked_VlnPlot(seurat_object = pbmc, features = c("CD3E", "CD14", "MS4A1", "FCER1A", "PPBP"), x_lab_rotate = TRUE) # Double the space between plots p2 <- Stacked_VlnPlot(seurat_object = pbmc, features = c("CD3E", "CD14", "MS4A1", "FCER1A", "PPBP"), x_lab_rotate = TRUE, plot_spacing = 0.5) wrap_plots(p1, p2, ncol = 2)
Please note that even more so than many other plots you will need to adjust the height and width of these plots significantly depending on the number of features and number of identities being plotted.
Stacked_VlnPlot
also supports plotting of object\@meta.data variables (i.e. mito% or module scores).Stacked_VlnPlot(seurat_object = marsh_human_pm, features = c("percent_mito", "percent_ribo"), x_lab_rotate = TRUE, colors_use = human_colors_list)
By default Stacked_VlnPlot
sets the pt.size
parameter to 0 to accelerate plotting/rendering which slow down dramatically depending on size of dataset and number of features plotted.
If points are still desired then plotting/rendering can be sped up by using the raster
parameter. By default raster will be set to TRUE
if pt.size > 0
and number of cells x number of features > 100,000
.
In addition to Stacked_VlnPlot
scCustomize also features a modified version of Seurat's VlnPlot()
.
VlnPlot_scCustom
provides unified color palette plottingVlnPlot(object = pbmc, features = "PTPRC") VlnPlot_scCustom(seurat_object = pbmc, features = "PTPRC")
# Create Plots p1 <- VlnPlot(object = pbmc, features = "PTPRC") p2 <- VlnPlot_scCustom(seurat_object = pbmc, features = "PTPRC") wrap_plots(p1, p2) + plot_annotation(tag_levels = "A")
Seurat VlnPlot()
supports rasterization but does not alter it's plotting behavior by default. Both Stacked_VlnPlot
and VlnPlot_scCustom
have a built in check to automatically rasterize the points if the: number of cells x number of features > 100,000
to accelerate plotting when vector plots are not necessary.
Rasterization can also be manually turned on or off using the raster
parameter
VlnPlot_scCustom(seurat_object = pbmc, features = "PTPRC", raster = FALSE) VlnPlot_scCustom(seurat_object = pbmc, features = "PTPRC", raster = TRUE)
# Create Plots p1 <- VlnPlot_scCustom(seurat_object = pbmc, features = "PTPRC", raster = FALSE) p2 <- VlnPlot_scCustom(seurat_object = pbmc, features = "PTPRC", raster = TRUE) wrap_plots(p1, p2) + plot_annotation(tag_levels = "A")
scCustomize VlnPlot_scCustom
can be further customized to display the median value for each idenity or add boxplot on top of the violin.
VlnPlot_scCustom(seurat_object = pbmc, features = "PTPRC", plot_median = TRUE) & NoLegend() VlnPlot_scCustom(seurat_object = pbmc, features = "PTPRC", plot_boxplot = TRUE) & NoLegend()
# Create Plots p1 <- VlnPlot_scCustom(seurat_object = pbmc, features = "FOS", plot_median = TRUE) & NoLegend() p2 <- VlnPlot_scCustom(seurat_object = pbmc, features = "FOS", plot_boxplot = TRUE) & NoLegend() wrap_plots(p1, p2) + plot_annotation(tag_levels = "A")
Seurat's DotPlot()
function is really good but lacks the ability to provide custom color gradient of more than 2 colors.
DotPlot_scCustom()
allows for plotting with custom gradients.micro_genes <- c("P2ry12", "Fcrls", "Trem2", "Tmem119", "Cx3cr1", "Hexb", "Tgfbr1", "Sparc", "P2ry13", "Olfml3", "Adgrg1", "C1qa", "C1qb", "C1qc", "Csf1r", "Fcgr3", "Ly86", "Laptm5") DotPlot(object = marsh_mouse_micro, features = micro_genes[1:6], cols = viridis_plasma_dark_high) DotPlot_scCustom(seurat_object = marsh_mouse_micro, features = micro_genes[1:6], colors_use = viridis_plasma_dark_high)
# Create Plots p1 <- DotPlot(object = marsh_mouse_micro, features = micro_genes[1:6], cols = viridis_plasma_dark_high) p2 <- DotPlot_scCustom(seurat_object = marsh_mouse_micro, features = micro_genes[1:6], colors_use = viridis_plasma_dark_high) wrap_plots(p1, p2) + plot_annotation(tag_levels = "A")
DotPlot_scCustom()
also contains additional parameters for easy manipulations of axes for better plotting.These allow for:
x_lab_rotate
rotating x-axis text Default is FALSE.y_lab_rotate
rotating y-axis text Default is FALSE.flip_axes
flip the axes. Default is FALSE.remove_axis_titles
remove the x- and y-axis labels. Default is TRUEDotPlot_scCustom(seurat_object = marsh_mouse_micro, features = micro_genes[1:6], x_lab_rotate = TRUE) DotPlot_scCustom(seurat_object = marsh_mouse_micro, features = micro_genes[1:6], y_lab_rotate = TRUE) DotPlot_scCustom(seurat_object = marsh_mouse_micro, features = micro_genes[1:6], flip_axes = T, x_lab_rotate = TRUE) DotPlot_scCustom(seurat_object = marsh_mouse_micro, features = micro_genes[1:6], flip_axes = T, remove_axis_titles = FALSE)
# Create Plots p1 <- DotPlot_scCustom(seurat_object = marsh_mouse_micro, features = micro_genes[1:6], x_lab_rotate = TRUE) p2 <- DotPlot_scCustom(seurat_object = marsh_mouse_micro, features = micro_genes[1:6], y_lab_rotate = TRUE) p3 <- DotPlot_scCustom(seurat_object = marsh_mouse_micro, features = micro_genes[1:6], flip_axes = T, x_lab_rotate = TRUE) p4 <- DotPlot_scCustom(seurat_object = marsh_mouse_micro, features = micro_genes[1:6], flip_axes = T, x_lab_rotate = TRUE, remove_axis_titles = FALSE) wrap_plots(p1, p2, p3, p4, ncol = 2) + plot_annotation(tag_levels = "A")
For a more advanced take on the DotPlot
scCustomize contains a separate function Clustered_DotPlot()
. This function allows for clustering of both gene expression patterns and identities in the final plot.
The original version of this function was written by Ming Tang and posted on his blog. Function is included with permission, authorship, and assistance.
# Find markers and limit to those expressed in greater than 75% of target population all_markers <- FindAllMarkers(object = pbmc) %>% Add_Pct_Diff() %>% filter(pct_diff > 0.6) top_markers <- Extract_Top_Markers(marker_dataframe = all_markers, num_genes = 7, named_vector = FALSE, make_unique = TRUE) Clustered_DotPlot(seurat_object = pbmc, features = top_markers)
all_markers <- FindAllMarkers(object = pbmc) %>% Add_Pct_Diff() %>% filter(pct_diff > 0.6) top_markers <- Extract_Top_Markers(marker_dataframe = all_markers, num_genes = 7, named_vector = FALSE, make_unique = TRUE) plots <- Clustered_DotPlot(seurat_object = pbmc, features = top_markers)
plots[[2]]
By default Clustered_DotPlot
performs k-means clustering with k value set to 1. However, users can change this value to enable better visualization of expression patterns.
Clustered_DotPlot(seurat_object = pbmc, features = top_markers, k = 8)
plots <- Clustered_DotPlot(seurat_object = pbmc, features = top_markers, k = 8)
plots[[2]]
You can also change the location and orientation of the plot legends. For instance here we move the legend to the bottom of the plot and reorient the legend to display horizontally. We also remove the identity legend as redundant information (and it takes up a lot of room when horizontal).
plots <- Clustered_DotPlot(seurat_object = pbmc, features = top_markers, k = 8, plot_km_elbow = FALSE, legend_position = "bottom", legend_orientation = "horizontal", show_ident_legend = FALSE)
plots <- Clustered_DotPlot(seurat_object = pbmc, features = top_markers, k = 8, plot_km_elbow = FALSE, legend_position = "bottom", legend_orientation = "horizontal", show_ident_legend = FALSE)
plots
Clustered_DotPlot
can now plot with additional grouping variable provided to split.by
parameter.
Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1", "P2ry12", "Tmem119"), split.by = "Transcription_Method")
plot_split <- Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1", "P2ry12", "Tmem119"), split.by = "Transcription_Method")
plot_split[[2]]
However, you'll notice that the labels on the bottom get cutoff on the left-hand side of the plot. There are two solutions to this.
Keep bottom labels rotated but add extra white-space padding on left
Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1", "P2ry12", "Tmem119"), split.by = "Transcription_Method", plot_padding = TRUE)
plot_split <- Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1", "P2ry12", "Tmem119"), split.by = "Transcription_Method", plot_padding = TRUE)
plot_split[[2]]
Or simply remove the bottom label text rotation
Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1", "P2ry12", "Tmem119"), split.by = "Transcription_Method", x_lab_rotate = 90)
plot_split <- Clustered_DotPlot(seurat_object = marsh_mouse_micro, features = c("Fos", "Jun", "Egr1", "Aif1", "P2ry12", "Tmem119"), split.by = "Transcription_Method", x_lab_rotate = 90)
plot_split[[2]]
Determining Optimal k Value
There are many methods that can be used to aid in determining an optimal value for k in k-means clustering. One of the simplest is to the look at the elbow in sum of squares error plot. By default Clustered_DotPlot
will return this plot when using the function. However, it can be turned off by setting plot_km_elbow = FALSE
.
plots <- Clustered_DotPlot(seurat_object = pbmc, features = top_markers, k = 8, plot_km_elbow = TRUE, legend_position = "bottom", legend_orientation = "horizontal", show_ident_legend = FALSE) plots[[1]]
plots <- Clustered_DotPlot(seurat_object = pbmc, features = top_markers, k = 8, plot_km_elbow = TRUE, legend_position = "bottom", legend_orientation = "horizontal", show_ident_legend = FALSE)
plots[[1]]
The number of k values plotted must be 1 less than number of features. Default is to plot 20 values but users can customize number of k values plotted using elbow_kmax
parameter.
Consensus k-means Clustering
By default Clustered_DotPlot
uses consensus k-means clustering with 1000 repeats for both the columns and rows. However, user can reduce or increase these values as desired by supplying new value to either or both of row_km_repeats
/column_km_repeats
There are a number of optional parameters that can be modified by user depending on the desired resulting plot.
colors_use_exp
to provide custom color scaleexp_color_min
/exp_color_max
can be used to clip the min and max values on the expression scale.print_exp_quantiles
can be set to TRUE to print the expression quantiles to aid in setting new exp_color_min
/exp_color_max
.exp_color_middle
can be used to change what value is used for the middle of the color scale. By default is set to the middle of exp_color_min
/exp_color_max
but can be modified if a skewed visualization is desired.Clustered_DotPlot(seurat_object = pbmc, features = top_markers, k = 8, print_exp_quantiles = T)
Quantiles of gene expression data are: 10% 50% 90% 99% -0.6555988 -0.3595223 1.7742718 2.6666597
Here we can adjust the expression clipping based on the range of the data in this specific dataset and list of features and change the color scale to use Seurat::PurpleAndYellow()
Clustered_DotPlot(seurat_object = pbmc, features = top_markers, k = 8, exp_color_min = -1, exp_color_max = 2, colors_use_exp = PurpleAndYellow())
plots <- Clustered_DotPlot(seurat_object = pbmc, features = top_markers, k = 8, exp_color_min = -1, exp_color_max = 2, colors_use_exp = PurpleAndYellow())
plots[[2]]
colors_use_idents
Colors to use for the identities (columns) plotted. show_ident_colors
and show_ident_legend
to show or hide the identity colors on plot and/or identity legend. Default is TRUE. x_lab_rotate
Logical or Integer. Default is TRUE (45 degree column label rotation). FALSE (0 degree rotation). Integer for custom text rotation value. row_label_size
Change size of row text (gene symbols). show_row_names
and show_column_names
to show or hide the column and row names. Default is TRUE.column_names_side
and row_names_side
to change the location of column and row labels. Default is column_names_side = "bottom"
and row_names_side = "right"
. raster
Whether to rasterize the plot (faster and smaller file size). Default is FALSE. The scCustomize function FeatureScatter_scCustom()
is a slightly modified version of Seurat::FeatureScatter()
with some different default settings and parameter options.
FeatureScatter_scCustom()
plots can be very useful when comparing between two genes/features or comparing module scores. By default scCustomize version sets shuffle = TRUE
to ensure that points are not hidden due to order of plotting.
# Create Plots FeatureScatter_scCustom(seurat_object = marsh_mouse_micro, feature1 = "exAM_Score1", feature2 = "Microglia_Score1", colors_use = mouse_colors, group.by = "ident", num_columns = 2, pt.size = 1)
scCustomize previously contained function Split_FeatureScatter
as Seurat's plot lacked that functionality. However, that is now present and Split_FeatureScatter
has therefore been deprecated and it's functionality moved within FeatureScatter_scCustom
.
FeatureScatter_scCustom()
contains two options for splitting plots (similar to DimPlot_scCustom
. The default is to return each plot with their own x and y axes, which has a number of advantages (see DimPlot_scCustom
section).
# Create Plots FeatureScatter_scCustom(seurat_object = marsh_mouse_micro, feature1 = "exAM_Score1", feature2 = "Microglia_Score1", colors_use = mouse_colors, split.by = "Transcription_Method", group.by = "ident", num_columns = 2, pt.size = 1)
While scCustomize does not have a custom heatmap plot itself (yet...) it does have functions that can make cell-level heatmaps easier to view/interpret. By default the heatmap in Seurat will size the groups on the heatmap based om their size in the dataset. This can make it very hard to view the results (especially for small clusters).
# Selection fo cluster marker genes micro_markers <- c("P2ry12", "Cx3cr1", "Fos", "Hspa1a", "Ms4a7", "Ccr2", "Isg15", "Ifit3", "Lpl", "Plaur", "Mki67", "Top2a") DoHeatmap(object = marsh_mouse_micro, features = micro_markers)
# Selection fo cluster marker genes micro_markers <- c("P2ry12", "Cx3cr1", "Fos", "Hspa1a", "Ms4a7", "Ccr2", "Isg15", "Ifit3", "Lpl", "Plaur", "Mki67", "Top2a") marsh_mouse_micro2 <- ScaleData(marsh_mouse_micro, features = micro_markers) DoHeatmap(object = marsh_mouse_micro2, features = micro_markers)
While this can be helpful in someways to gauge cluster size other visualizations are usually better for that and heatmaps are usually aimed at informing relative (scaled) gene expression across clusters, groups, etc.
We can solve this issue by downsampling which cells are used in the plot so that number of cells is equal across all groups.
First we need to get idea of how many cells are in each identity. scCustomize provides the Cluster_Stats_All_Samples
function to make this easy.
cluster_stats <- Cluster_Stats_All_Samples(seurat_object = marsh_mouse_micro) cluster_stats
cluster_stats <- Cluster_Stats_All_Samples(seurat_object = marsh_mouse_micro) cluster_stats %>% kableExtra::kbl() %>% kableExtra::kable_styling(bootstrap_options = c("bordered", "condensed", "responsive", "striped"))
Now we can use the function Random_Cells_Downsample
to return a downsampled selection of cells from each identity. We can either set the num_cells
parameter manually to the smallest group or simply provide "min"
.
In this case that would 27 cells per group. which is a bit small to get representative sample when other groups are much larger. So alternatively, we can set a larger value (150 cells) and change the optional parameter allow_lower
to TRUE
. This means that all groups larger than 150 cells will get downsampled but any smaller will simply return all cells in that ident.
# random cells can either be return to environment random_cells_150 <- Random_Cells_Downsample(seurat_object = marsh_mouse_micro, num_cells = 150, allow_lower = T) DoHeatmap(object = marsh_mouse_micro, features = micro_markers, cells = random_cells_150) # Or called within the `DoHeatmap` function. DoHeatmap(object = marsh_mouse_micro, features = micro_markers, cells = Random_Cells_Downsample(seurat_object = marsh_mouse_micro, num_cells = 150, allow_lower = T))
DoHeatmap(object = marsh_mouse_micro, features = micro_markers, cells = Random_Cells_Downsample(seurat_object = marsh_mouse_micro, num_cells = 150, allow_lower = T))
scCustomize has a few functions that improve on the default plotting options available in Seurat
The scCustomize function DimPlot_scCustom()
is a slightly modified version of Seurat::DimPlot()
with some different default settings and parameter options.
The default ggplot2 hue palette becomes very hard to distinguish between at even a moderate number of clusters for a scRNA-seq experiment. scCustomize's function DimPlot_scCustom
sets new default color palettes:
shuffle_pal = TRUE
.ggplot_default_colors = TRUE
.To best demonstrate rationale for this I'm going to use over-clustered version of the marsh_mouse_micro
object.
# Let's create over-clustered object to use as example marsh_mouse_over <- FindNeighbors(object = marsh_mouse_micro, dims = 1:15) marsh_mouse_over <- FindClusters(object = marsh_mouse_over, resolution = 0.8)
DimPlot(object = marsh_mouse_over) DimPlot_scCustom(seurat_object = marsh_mouse_over)
p1 <- DimPlot(object = marsh_mouse_over) p2 <- DimPlot_scCustom(seurat_object = marsh_mouse_over) wrap_plots(p1, p2, ncol = 2)
By default Seurat's DimPlot()
plots each group on top of the next which can make plots harder to interpret. DimPlot_scCustom
sets shuffle = TRUE
by default as I believe this setting is more often the visualization that provides the most clarity.
Here is example when plotting by donor in the human dataset to determine how well the dataset integration worked.
DimPlot(object = marsh_human_pm, group.by = "sample_id") DimPlot_scCustom(seurat_object = marsh_human_pm, group.by = "sample_id")
p1 <- DimPlot(object = marsh_human_pm, group.by = "sample_id") p2 <- DimPlot_scCustom(seurat_object = marsh_human_pm, group.by = "sample_id") wrap_plots(p1, p2, ncol = 2) + plot_annotation(tag_levels = "A")
When plotting a split plot Seurat::DimPlot()
simplifies the axes by implementing shared axes depending on the number of columns specified.
DimPlot(object = pbmc, split.by = "treatment") DimPlot(object = pbmc, split.by = "sample_id", ncol = 4)
p1 <- DimPlot(object = pbmc, split.by = "treatment") p2 <- DimPlot(object = pbmc, split.by = "sample_id", ncol = 4) wrap_plots(p1, p2, ncol = 1) + plot_annotation(tag_levels = "A")
By default when using split.by
with DimPlot_scCustom
the layout is returned with an axes for each plot to make visualization of large numbers of splits easier.
DimPlot_scCustom(seurat_object = pbmc, split.by = "treatment", num_columns = 4, repel = TRUE)
DimPlot_scCustom(seurat_object = pbmc, split.by = "treatment", num_columns = 4, repel = TRUE)
Can also return to the default Seurat method of splitting plots while maintaining all of the other modifications in DimPlot_scCustom
by supplying split_seurat = TRUE
DimPlot_scCustom(seurat_object = pbmc, split.by = "treatment", num_columns = 4, repel = TRUE, split_seurat = TRUE)
Some times when creating plots for figures it is desirable to remove the axes from the plot and simply add an axis legend. To do this with DimPlot_scCustom
simply set figure_plot = TRUE
.
DimPlot_scCustom(seurat_object = pbmc, figure_plot = TRUE)
DimPlot_scCustom(seurat_object = pbmc, figure_plot = TRUE)
Even with an optimized color palette it can still be difficult to determine the boundaries of clusters when plotting all clusters at once.
scCustomize provides easy of use function Cluster_Highlight_Plot()
to highlight a select cluster or clusters vs. the remainder of cells to determine where they lie on the plot.
NOTE: While named Cluster_Highlight_Plot
this function will simply pull from seurat_object\@active.ident slot which may or may not be cluster results depending on user settings. For creating highlight plots of meta data variables see next section on Meta_Highlight_Plot
.
Cluster_Highlight_Plot(seurat_object = marsh_mouse_over, cluster_name = "7", highlight_color = "navy", background_color = "lightgray") Cluster_Highlight_Plot(seurat_object = marsh_mouse_over, cluster_name = "8", highlight_color = "forestgreen", background_color = "lightgray")
p1 <- Cluster_Highlight_Plot(seurat_object = marsh_mouse_over, cluster_name = "7", highlight_color = "navy", background_color = "lightgray") p2 <- Cluster_Highlight_Plot(seurat_object = marsh_mouse_over, cluster_name = "8", highlight_color = "forestgreen", background_color = "lightgray") wrap_plots(p1, p2, ncol = 2)
Cluster_Highlight_Plot()
also supports the ability to plot multiple identities in the same plot.
Cluster_Highlight_Plot(seurat_object = marsh_mouse_over, cluster_name = c("7", "8"), highlight_color = c("navy", "forestgreen"))
Cluster_Highlight_Plot(seurat_object = marsh_mouse_over, cluster_name = c("7", "8"), highlight_color = c("navy", "forestgreen"))
NOTE: If no value is provided to highlight_color
then all clusters provided to cluster_name
will be plotted using single default color (navy).
scCustomize also contains an analogous function Meta_Highlight_Plot()
that allows for quick highlight plots of any valid \@meta.data variable. Meta data variables must be one of class(): "factor", "character", or "logical" to be highlighted.
Numeric variables representing things like "batch" can be converted using as.character
or as.factor
first to allow for plotting.
Meta_Highlight_Plot(seurat_object = marsh_mouse_micro, meta_data_column = "Transcription_Method", meta_data_highlight = "ENZYMATIC_NONE", highlight_color = "firebrick", background_color = "lightgray")
Meta_Highlight_Plot()
also supports plotting two or more levels from the same meta data column in the same plot, similar to plotting multiple identities with Cluster_Highlight_Plot()
Meta_Highlight_Plot(seurat_object = marsh_mouse_micro, meta_data_column = "Transcription_Method", meta_data_highlight = c("ENZYMATIC_NONE", "DOUNCE_NONE"), highlight_color = c("firebrick", "dodgerblue"), background_color = "lightgray")
Sometimes the cells that you want to highlight on a plot may not be represented by an active.ident or meta.data column. In this scenario you can use Cell_Highlight_Plot
.
NOTE: Values of cell names provided to cells_highlight
parameter must be a named list.
Let's say we want to highlight cells with expression of MS4A1 above certain threshold.
# Get cell names MS4A1 <- WhichCells(object = pbmc, expression = MS4A1 > 3) # Make into list cells <- list("MS4A1" = MS4A1) # Plot Cell_Highlight_Plot(seurat_object = pbmc, cells_highlight = cells)
# Make into list cells <- list("MS4A1" = MS4A1) # Plot Cell_Highlight_Plot(seurat_object = pbmc, cells_highlight = cells)
Cell_Highlight_Plot()
also supports plotting two or more sets of cells in the same plot, similar to plotting multiple identities with Cluster_Highlight_Plot()
/Meta_Highlight_Plot()
.
# Get cell names and make list MS4A1 <- WhichCells(object = pbmc, expression = MS4A1 > 3) GZMB <- WhichCells(object = pbmc, expression = GZMB > 3) cells <- list("MS4A1" = MS4A1, "GZMB" = GZMB) # Plot Cell_Highlight_Plot(seurat_object = pbmc, cells_highlight = cells)
# Get cell names and make list cells <- list("MS4A1" = MS4A1, "GZMB" = GZMB) # Plot Cell_Highlight_Plot(seurat_object = pbmc, cells_highlight = cells)
Sometimes can be beneficial to create layouts where there is no grouping variable and plots are simply colored by the split variable.
DimPlot_All_Samples(seurat_object = pbmc, meta_data_column = "sample_id", num_col = 3, pt.size = 0.5)
Can unique color each plot by providing a vector of colors instead of single value
DimPlot_All_Samples(seurat_object = marsh_mouse_micro, meta_data_column = "Transcription", num_col = 2, pt.size = 0.5, color = c("firebrick3", "dodgerblue3"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.