#' Calculates The Optimum Dimensionality From An Elbow Plot.
#'
#' A function that calculates the optimum dimensionality from an elbow plot. Involves calculating
#' slopes between two consecutive points. The optimum dimensionality will be determined as the point at
#' which slopes no longer change or no visible change occurs to the slopes thereafter. In this function, a dimension
#' is determined optimal when a slope connecting that point to another is flatter than 10 times the flattest slope
#' in the plot and the stdev at that point is proximal to the lower limit.
#' The plot data must be plotted by dims (x-axis) and stdev (y-axis).
#'
#' @param plot_data An elbow plot data used for detemining dimensionality, generated by calling ElbowPlot() on a Seurat object.
#' @param cutoff The cutoff that indicates how much data/variance is covered.
#'
#' @return Returns an integer that represents the optimal dimensionality that was determined by calculation.
#'
#' @examples
#' library(Seurat)
#' determine_dimensionality(elbow_plot_data, 0.95)
#'
#' @import Seurat
#'
#' @export
determine_dimensionality <- function(plot_data, cutoff) {
sum_so_far <- 0
sum_stdev <- sum(plot_data$stdev)
dimension <- 1
# Count dimension until we reach the dimension that explains enough of the data
# that satisfies the cutoff.
if (cutoff == 1) {
dimension <- max(plot_data$dims)
} else {
while (sum_so_far < sum_stdev * cutoff) {
sum_so_far <- sum_so_far + plot_data$stdev[dimension]
dimension <- dimension + 1
}
}
plot_data$color <- "black"
plot_data$color[plot_data$dims < dimension] <- "green"
plot_data$color[dimension] <- "red"
plot(plot_data$dims, plot_data$stdev, col = plot_data$color, pch = 19,
main = "Elbow plot with determined dimensionality",
sub = "Red dot with an arrow indicates the dimensionality and the green points are all data included.")
arrows(dimension, plot_data$stdev[dimension] + 1, dimension, plot_data$stdev[dimension] + 0.5)
return(dimension)
}
#' Compares Genes Expressed In Each Cluster.
#'
#' A function that compares and compiles all the genes identified by extract_genes_expressed() into one matrix. Does
#' this by determining the gene expressions that overlap between clusters and mapping that information into
#' the matrix.
#'
#' @param seurat_object A Seurat object that has undergone the entire workflow, involving UMAP.
#' @param min_percent A float representing the percentage of cells that have to express the gene to be
#' included in the result.
#'
#' @return Returns a matrix of number of clusters by number of clusters, with each matrix cell (x,y)
#' containing the vector of genes expressed by both clusters x and y.
#'
#' @examples
#' compare_cluster_genes(PBMC)
#'
#' @export
compare_cluster_genes <- function(seurat_object, min_percent) {
# Preparing data for manipulation.
clusters <- unique(seurat_object$seurat_clusters)
num_clusters <- length(clusters)
row_col_name <- as.character(sort(clusters)) # Used for row and column of the matrix and they have to be chars.
# Prepare matrices to input the data.
# First matri is to first compile all the gene expression data for each cluster, generated
# by extract_genes_expressed.
# Second matrix is to keep the genes that were determined to overlap between clusters.
gene_expressions <- matrix(list(), nrow = num_clusters, ncol = 1, dimnames = list(row_col_name, "genes"))
gene_comparison_matrix <- matrix(list(), nrow = num_clusters, ncol = num_clusters, dimnames = list(row_col_name, row_col_name))
# Iterate through each cluster and extract genes expressed.
for (cluster_id in row.names(gene_expressions)) {
cluster_markers <- Seurat::FindMarkers(seurat_object, ident.1 = cluster_id, min.pct = min_percent)
gene_expressions[cluster_id, "genes"][[1]] <- rownames(cluster_markers)
}
# For each pair of clusters, determine the overlapping gene expressions and store them in a matrix.
for (cluster_id_x in row_col_name) {
for (cluster_id_y in row_col_name) {
if (cluster_id_x == cluster_id_y) {
gene_comparison_matrix[cluster_id_x, cluster_id_y][[1]] <- NA
}
else if (is.null(gene_comparison_matrix[cluster_id_x, cluster_id_y][[1]])) {
gene_overlaps <-
intersect(gene_expressions[cluster_id_x, "genes"][[1]], gene_expressions[cluster_id_y, "genes"][[1]])
gene_comparison_matrix[cluster_id_x, cluster_id_y][[1]] <- gene_overlaps
} else {
;
}
}
}
return(gene_comparison_matrix)
}
#' Plot A Graph Representing Gene Expression Relationships Between Clusters
#'
#' Plot a graph representing the gene expression relationships between clusters. The edge thickness depends
#' on how many genes the clusters share between each other. The more of the same genes they express, the thicker
#' stronger their relationship is.
#'
#' @param gene_comparison_matrix A matrix object of number of cluster by number of clusters cells, with each
#' matrix cell (x,y) containing the vector of genes expressed by both clusters x and y.
#'
#' @return Returns a graph of nodes and edges, where node (number represents cluster id) represents a cluster
#' and an edge between nodes represent the genes shared by those clusters.
#'
#' @examples
#' library(igraph)
#' library(utils)
#' library(graphics)
#' gene_comparison_matrix <- compare_cluster_genes(PBMC)
#' draw_cluster_gene_relations(gene_comparison_matrix)
#'
#' @import igraph utils graphics
#'
#' @export
draw_cluster_gene_relations <- function(gene_comparison_matrix) {
# Create a clone of the input matrix because we will replace each cell by counts of genes that overlap
# (ie. the length of the vector that occupied that cell).
another_matrix <- cbind(gene_comparison_matrix)
first_cluster <- head(row.names(gene_comparison_matrix), 1)
last_cluster <- tail(row.names(gene_comparison_matrix), 1)
# For every pair of clusters, replace the gene expression vectors with its size/count, so that
# we can use it for edge width.
for (cluster_x in first_cluster:last_cluster) {
for (cluster_y in first_cluster:last_cluster) {
if (length(another_matrix[as.character(cluster_x), as.character(cluster_y)][[1]]) == 1 &&
is.na(another_matrix[as.character(cluster_x), as.character(cluster_y)][[1]])) {
another_matrix[as.character(cluster_x), as.character(cluster_y)][[1]] <- 0
} else {
another_matrix[as.character(cluster_x), as.character(cluster_y)][[1]] <-
length(another_matrix[as.character(cluster_x), as.character(cluster_y)][[1]])
}
}
}
color_gradient <- colorRamp(c('blue', 'cyan', 'yellow', 'red'))
graph <- igraph::graph_from_adjacency_matrix(another_matrix, mode="undirected", weighted = TRUE)
# Convert weight such that it is weight/max(weight) so that we can use it to convert into rgb color.
E(graph)$weight <- E(graph)$weight / max(E(graph)$weight)
# Set the graph edge colors in a gradient to mimick a heatmap.
E(graph)$color <- apply(color_gradient(E(graph)$weight), 1, function(x) rgb(x[1]/255,x[2]/255,x[3]/255))
# Adapted from:
# ahmohamed. (February 14 2015). How to scale edge colors in igraph?. *Stackoverflow*.
# Retrieved from: https://stackoverflow.com/questions/28366329/how-to-scale-edge-colors-in-igraph
# Set this layout to have a good balance between the graph and legend.
layout(matrix(1:2,ncol=2), width = c(2,1),height = c(1,1))
igraph::plot.igraph(graph, main = 'Graph representing gene expression
relationship between clusters')
# Set these palettes for plotting the legend, additionally. This is done separately from the
# igraph plot.
color_palette <- colorRampPalette(c('red', 'yellow', 'cyan', 'blue'))
legend <- as.raster(matrix(color_palette(20), ncol = 1))
plot(c(0,5),c(0,1),type = 'n', axes = F,xlab = '', ylab = '', main = 'Commonness of genes',)
text(x=1.5, y = seq(0,1,l=5), labels = seq(0,1,l=5))
rasterImage(legend, 0, 0, 1,1)
# Above chunk is adapted from the following:
# mnel. (November 13 2012). Gradient legend in base. Stackoverflow.
# Retrieved from: https://stackoverflow.com/questions/13355176/gradient-legend-in-base
# Reset layout because otherwise the subsequent plots are affected too.
par(mfrow=c(1,1))
return(graph)
}
#' Plot A Bar Graph Representing Gene Expression Relationships Between Clusters with Numerical Context.
#'
#' Plot a bar graph representing the gene expression relationships between clusters. Each bar represents
#' how much genes are shared in the pair with respect to other pairs. Because the relationships are made
#' relative to each other, the cluster pair with the most genes shared will be indicated as 1.
#'
#' @param gene_expression_graph Graph output from draw_cluster_relations to utilize the edge weights
#'
#' @return Returns a bar graph indicating which cluster pairs share the more genes with respect to
#' the pair with the most genes shared.
#'
#' @examples
#' library(igraph)
#' library(utils)
#' library(graphics)
#' graph <- draw_cluster_gene_relations(gene_comparison_matrix)
#' plot_cluster_relations(graph)
#' @import igraph utils graphics
#'
#' @export
plot_cluster_relations <- function(gene_expression_graph) {
i <- 1
j <- 2
x_labels <- c()
vertices <- V(gene_expression_graph)$name
# This loop is working to extract all possible cluster pairs to use it for the x-asix label.
while (i <= length(vertices)) {
while (j <= length(vertices)) {
x_labels <- c(x_labels, paste(vertices[i], vertices[j], sep = ','))
j <- j + 1
}
i <- i + 1
j <- i + 1
}
height <- E(gene_expression_graph)$weight
names(height) <- x_labels
return(barplot(height, las = 2, xlab = 'Pairs of cluster ids',
ylab = 'Relative gene expression similarity',
main = 'Relative comparison of gene expression similarity between clusters',
sub = '*It is relative to the most similar cluster pair. 1.0 = the most similar cluster, but not 100% similarity'))
}
#' Combine the functions in seuratter.R to run the workflow for generating the graph.
#'
#' Work as a controller integrating extract_genes_expressed, compare_cluster_genes, and
#' draw_cluster_gene_relations.
#'
#' @param seurat_object A Seurat object that has undergone the entire workflow, involving UMAP.
#'
#' @return Returns a vector containing all the graphical outputs generated
#' by seuratter (except determine_dimensionality)
#'
#' @examples
#' library(igraph)
#' library(utils)
#' library(graphics)
#' run_seuratter(PBMC)
#'
#' @import igraph utils graphics
#'
#' @export
run_seuratter <- function(seurat_object) {
gene_comparison_matrix <- compare_cluster_genes(PBMC, 0.25)
graph <- draw_cluster_gene_relations(gene_comparison_matrix)
bar_graph <- plot_cluster_relations(graph)
return(c(gene_comparison_matrix, graph, bar_graph))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.