# Functions for pairwise comparisons of two SC datasets
#' meanMarkerExpression
#'
#' This function computes each cell's mean expression of each cluster's
#' markers. Used for \code{\link{markerViolinPlot}}.
#'
#' @param seurat Seurat object, whose expression values will be used
#' @param markers Markers data frame, for the same or another Seurat object. The
#' expression of markers in each cluster in this data frame will be calculated
#' for each cluster in \code{seurat}.
#'
#' @return Data frame where the first two columns are "Cell" and "Cluster" (from
#' \code{seurat}),
#' and all remaining columns give the mean expression of markers identified
#' for each cluster in \code{markers}
#'
#' @export
#' @author Selin Jessa
#' @examples
#'
#' library(dplyr)
#'
#' # Using the same sample's marker:
#' meanMarkerExpressionPerCell(pbmc, markers_pbmc, "gene")
#'
#' # Change the name of the clusters in the markers df, as if it were
#' # from a different sample where the clusters are A, B, C, D:
#' markers2 <- mutate(markers_pbmc, cluster = recode(
#' cluster, `0` = "A", `1` = "B", `2` = "C", `3` = "D"))
#'
#' meanMarkerExpressionPerCell(pbmc, markers2, "gene")
meanMarkerExpressionPerCell <- function(seurat, markers, marker_col = "gene") {
perCluster <- function(i) {
# Keep marker genes for cluster
keep_markers <- filter(markers, cluster == i) %>% `[[`(marker_col)
# Filtered expression
filt_exp <- exp[rownames(exp) %in% keep_markers, ]
# For each cell, compute its mean expression of gene markers for the cluster
mean_exp <- colSums(filt_exp)/nrow(filt_exp)
mean_exp <- data.frame(mean = as.numeric(mean_exp))
names(mean_exp)[1] <- i
return(mean_exp)
}
exp <- as.data.frame(as.matrix(seurat@data))
# Iterate over the clusters for which we have markers
clusters <- as.character(sort(unique(markers$cluster)))
# Bind columns, each of which corresponds to a cluster
purrr::map_dfc(clusters, perCluster) %>%
# Create a column with the clusters from the Seurat object
tibble::add_column(Cluster = as.character(seurat@ident), .before = 1) %>%
tibble::add_column(Cell = colnames(exp), .before = 1)
}
#' pairwiseVln
#'
#' Make a grid of violin plots with the mean expression of each cluster, for markers
# of each cluster either from that same dataset, or from a different one. Here,
#' dataset 1 is the sample whose expression values will be plot, and dataset 2
#' is the sample whose cluster markers are used.
#'
#' @param seurat1 Seurat object for which to plot expression
#' @param markers Data frame as returned by Seurat::FindAllMarkers()
#' @param seurat2 Seurat object from which cluster markers (\code{markers}) were defined
#' @param sample_names (Optional) Character vector giving the names or ID of
#' the two samples, used for axis labels, etc.
#' Default: c(seurat1@@project.name, seurat2@@project.name)
#' @param marker_col String specifying the column in the \code{markers} data frames which
#' specifies the cluster. By default, Seurat calls this "gene" (the default here);
#' in objects produced by the lab's pipeline, it may be called "external_gene_name".
#'
#' @return A ggplot object
#' @author adapted from Alexis Blanchet-Cohen
#'
#' @aliases markerViolinPlot
#' @export
#' @examples
#' pairwiseVln(pbmc, markers_pbmc, pbmc)
pairwiseVln <- function(seurat1, markers, seurat2,
sample_names = c(seurat1@project.name, seurat2@project.name),
marker_col = "gene") {
clusters2 <- glue("{sample_names[2]} cluster {levels(seurat2@ident)}")
exp <- meanMarkerExpressionPerCell(seurat1, markers, marker_col)
gg <- exp %>%
tidyr::gather(s2_cluster, mean_expression, 3:ncol(.)) %>%
dplyr::mutate(s1_cluster = factor(Cluster, levels = levels(seurat1@ident)),
s2_cluster = glue("{sample_names[2]} cluster {s2_cluster}")) %>%
mutate(s2_cluster = factor(s2_cluster, levels = clusters2)) %>%
ggplot(aes(x = s1_cluster, y = mean_expression)) +
geom_violin(aes(fill = s2_cluster)) +
facet_wrap(~ s2_cluster) +
theme_min() +
xlab(glue("{sample_names[1]} cluster")) +
scale_fill_discrete(name = glue("{sample_names[2]} cluster")) +
theme(panel.grid.major.x = element_line(colour = "grey90"))
return(gg)
}
#' @export
markerViolinPlot <- pairwiseVln
#' percentMarkerOverlap
#'
#' @param markers1 Data frame of markers for one dataset as returned by Seurat::FindAllMarkers()
#' @param markers2 Data frame of markers for second dataset as returned by Seurat::FindAllMarkers()
#' @param mode "s1", "s2", "min", "max"
#' @param marker_col String specifying the column in the markers data frames which
#' specifies the cluster. By default, Seurat calls this "gene" (the default here);
#' in objects produced by the lab's pipeline, it may be called "external_gene_name".
#'
#' @return Data frame with three columns: "s1_cluster" giving the cluster for
#' dataset 1, "s2_cluster" giving the cluster for dataset 2, and "marker_overlap"
#' which gives the computed min percent overlap of markers between the two clusters
#' @export
#' @author Selin Jessa
#' @examples
#' percentMarkerOverlap(markers_pbmc, markers_pbmc)
percentMarkerOverlap <- function(markers1, markers2, mode = "min", marker_col = "gene") {
clust1 <- unique(markers1$cluster)
clust2 <- unique(markers2$cluster)
n_clust1 <- length(clust1)
n_clust2 <- length(clust2)
olaps_s1 <- vector("list", n_clust1 * n_clust2)
olaps_s2 <- vector("list", n_clust1 * n_clust2)
k <- 1
for (c1 in clust1) {
for (c2 in clust2) {
mk_1 <- markers1[markers1$cluster == c1, ][[marker_col]]
mk_2 <- markers2[markers2$cluster == c2, ][[marker_col]]
n_olap <- length(base::intersect(mk_1, mk_2))
olaps_s1[[k]] <- n_olap / length(mk_1)
olaps_s2[[k]] <- n_olap / length(mk_2)
k <- k + 1
}
}
df_olap <- data.frame(s1_cluster = rep(clust1, each = n_clust2),
s2_cluster = rep(clust2, times = n_clust1))
if (mode == "s1") df_olap$marker_overlap <- unlist(olaps_s1)
else if (mode == "s2") df_olap$marker_overlap <- unlist(olaps_s2)
else if (mode == "min") df_olap$marker_overlap <- pmin(unlist(olaps_s1), unlist(olaps_s2))
else if (mode == "max") df_olap$marker_overlap <- pmax(unlist(olaps_s1), unlist(olaps_s2))
return(df_olap)
}
#' Compute a heatmap of marker overlap between samples
#'
#' Generate a heatmap where rows correspond to clusters from one dataset and
#' columns correspond to the clusters from the second dataset. The value at [\emph{i, j}]
#' is overlap of markers between Cluster \emph{i} in dataset 1 and Cluster \emph{j} in dataset 2.
#' See Details for how to control how the "overlap" is computed.
#'
#' @details
#' When calculating the shared markers between clusters in two samples, we typically
#' count the number of markers in both lists, but have two options for what to use
#' as the denominator (either the number of markers in sample 1, or the number of
#' markers in sample 2). This function provides a few options for what to plot in the heatmap,
#' controlled by the `mode` argument:
#'
#' \itemize{
#' \item "s1": Plot the percentage of sample 1 markers which are also markers in sample 2
#' \item "s2": Plot the percentage of sample 2 markers which are also markers in sample 1
#' \item "min": Plot the minimum of the above percentages
#' \item "max": Plot the maximum of the above percentages
#' \item "both": Plot two heatmaps, one with the percentage of sample 1 markers which are also markers in sample 2,
#' and one the percentage of sample 2 markers which are also markers in sample 1
#' }
#'
#' @param markers1 Data frame of markers for one dataset as returned by Seurat::FindAllMarkers()
#' The clusters for this sample will be plot on the x axis.
#' @param markers2 Data frame of markers for second dataset as returned by Seurat::FindAllMarkers()
#' The clusters for this sample will be plot on the y axis.
#' @param mode String, one of "s1", "s2", "min", "max", or "both". Controls how to
#' compute the marker overlaps. Default: "min". See Details for more info.
#' @param sample_names (Optional) Character vector giving the names or ID of
#' the two samples, used for axis labels, etc. Default: c("Sample 1", "Sample 2")
#' @param marker_col String specifying the column in the markers data frames which
#' specifies the cluster. By default, Seurat calls this "gene"; in the pipeline,
#' it may be called "external_gene_name" (the default here).
#' @param palette Character vector containing a gradient palette to use.
#' Default: \code{\link{viridis::magma}}.
#' @param label_colour String specifying the colour of value of each cell
#' printed in the heatmap. Default: "white"
#'
#' @export
#' @author Selin Jessa
#'
#' @examples
#' # Compute the heatmap for one dataset (pbmc) with itself
#' heatmapPercentMarkerOverlap(markers_pbmc, markers_pbmc, marker_col = "gene")
heatmapPercentMarkerOverlap <- function(markers1, markers2, mode = "min",
sample_names = c("Sample 1", "Sample 2"),
palette = NULL,
marker_col = "external_gene_name",
label_colour = "white") {
if (mode == "both") {
return(plot_grid(heatmapPercentMarkerOverlap(markers1, markers2, "s1",
sample_names = sample_names,
palette = palette, marker_col = marker_col),
heatmapPercentMarkerOverlap(markers1, markers2, "s2",
sample_names = sample_names,
palette = palette, marker_col = marker_col),
ncol = 2))
}
min_olap <- percentMarkerOverlap(markers1, markers2, mode = mode, marker_col = marker_col)
# Get nice ordering
olap_srt <- min_olap %>% arrange(desc(marker_overlap))
if (is.null(palette)) pal <- viridis::magma(100)
else pal <- palette
gg <- min_olap %>%
dplyr::mutate(cluster = factor(s1_cluster, levels = rev(unique(olap_srt$s1_cluster))),
s2_cluster = factor(s2_cluster, levels = unique(olap_srt$s2_cluster))) %>%
ggplot(aes(x = s2_cluster, y = cluster)) +
geom_raster(aes(fill = marker_overlap)) +
scale_fill_gradientn(colors = pal, limits = c(0, 1)) +
geom_text(aes(label = round(marker_overlap, 2)), colour = label_colour, size = 3) +
scale_x_discrete(position = "top") +
theme_min() +
theme(panel.border = element_blank()) +
xlab(sample_names[1]) + ylab(sample_names[2]) +
guides(fill = guide_legend(title = "Marker overlap")) +
ggtitle(case_when(
mode == "min" ~ "Minimum % marker overlap",
mode == "max" ~ "Maximum % marker overlap",
mode == "s1" ~ as.character(glue("% {sample_names[1]} markers in {sample_names[2]}")),
mode == "s2" ~ as.character(glue("% {sample_names[2]} markers in {sample_names[1]}"))))
return(gg)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.