#' Plot geneset scores as summary plot
#'
#' @description Geneset scores are a score calculated for each single cell derived from \
#' more than one gene. This function plots geneset scores grouped with a boxplot overlaying a violin\
#' overlaying a jitter of the raw data.
#'
#' When using method 'totals', the sum of the size-factor corrected, log-normalized gene \
#' expression for a give set of genes is performed. When using method 'corrected', single \
#' cell scores for a give gene set that have been "corrected" using 100X genes with similar \
#' expression levels.
#' @param cds Input cell_data_set object.
#' @param marker_set Vector of genes in the gene_metadata DataFrame (fData) that can be found in the column 'fData_col'
#' @param name Name given to the geneset
#' @param fData_col Character string denoting the gene_metadata DataFrame (fData) column that contains the genes in marker_set1. Default = 'gene_short_name'
#' @param method 'totals' or 'corrected'. See estimate_score and estimate_corrected_score for more information
#' @return Plot
#' @import ggplot2 monocle3
#' @export
#' @references Puram, S. V. et al. Single-Cell Transcriptomic Analysis of Primary and
#' Metastatic Tumor Ecosystems in Head and Neck Cancer. Cell 171, 1611.e1–1611.e24 (2017).
plot_grouped_geneset<-function(cds, marker_set, name, by, fData_col= "gene_short_name", scale="width", facet=NULL, adjust=1.4, size=0.05, alpha=0.1,
method="totals", overlay_violinandbox=T, box_width=0.3, rotate_x=T, jitter=T, return_values=F){
if(method=="totals") pData(cds)[[name]]<-estimate_score(cds, marker_set, fData_col= fData_col)
if(method=="corrected") pData(cds)[[name]]<-estimate_corrected_score(cds, marker_set, fData_col= fData_col)
scores<-data.frame(pData(cds)[[name]], as.factor(pData(cds)[[by]]), stringsAsFactors = F)
if(!is.null(facet)){
scores<-cbind(scores, pData(cds)[[facet]])
colnames(scores)<-c(name, by, facet)
}else{
colnames(scores)<-c(name, by)
}
g<- ggplot(scores, aes_string(x=by, y=name, fill=by))
if(jitter)g<-g + geom_jitter(size=size, alpha=alpha)
if(overlay_violinandbox){
g<-g+geom_violin(scale="width")+geom_boxplot(width=box_width, fill="white", outlier.size = 0)
}
if(!is.null(facet)){
g<-g+facet_wrap(as.formula(paste("~", facet)), scales = "free")
}
if(rotate_x) {
g<-g+theme(axis.text.x=element_text(angle=90, hjust=0.95,vjust=0.2))
}
if(return_values)list(plot=g, scores=scores) else(g)
}
#' Plot geneset scores as cells
#'
#' @description Geneset scores are a score calculated for each single cell derived from \
#' more than one gene. This function plots geneset scores using monocle3's 'plot_genes' function.
#'
#' When using method 'totals', the sum of the size-factor corrected, log-normalized gene \
#' expression for a give set of genes is performed. When using method 'corrected', single \
#' cell scores for a give gene set that have been "corrected" using 100X genes with similar \
#' expression levels.
#' @param cds Input cell_data_set object.
#' @param marker_set Vector of genes in the gene_metadata DataFrame (fData) that can be found in the column 'fData_col'
#' @param name Name given to the geneset
#' @param fData_col Character string denoting the gene_metadata DataFrame (fData) column that contains the genes in marker_set1. Default = 'gene_short_name'
#' @return Plot
#' @importFrom Matrix colSums
#' @importFrom Matrix t
#' @export
#' @references Puram, S. V. et al. Single-Cell Transcriptomic Analysis of Primary and
#' Metastatic Tumor Ecosystems in Head and Neck Cancer. Cell 171, 1611.e1–1611.e24 (2017).
plot_geneset<-function(cds, marker_set, name, fData_col="gene_short_name", method=c("totals", "corrected"), reduction_method="UMAP"){
assertthat::assert_that(
tryCatch(expr = ifelse(match.arg(method) == "",TRUE, TRUE),
error = function(e) FALSE),
msg = "method must be one of 'totals' or 'corrected'")
method <- match.arg(method)
if(method=="totals") pData(cds)[[name]]<-estimate_score(cds, marker_set, fData_col=fData_col)
if(method=="corrected") pData(cds)[[name]]<-estimate_corrected_score(cds, marker_set, fData_col=fData_col)
nc<-nchar(name)
if(nc>50){fontsize<-10}else{fontsize=14}
switch(method, totals={loca="log(sums)"},
corrected={loca="log(corr.)"})
plot_cells(cds, color_cells_by = name, label_cell_groups = F, cell_size = 0.5, reduction_method = reduction_method)+
#theme(legend.position="top", legend.title = element_blank())+
theme(legend.position="top")+
#ggtitle(paste0(name, ": ", loca))+
ggtitle(name)+
theme(plot.title = element_text(size = fontsize, face = "bold"), legend.text = element_text(size=9, angle = 90, vjust=0.5, hjust=0.3))+
labs(color = loca)+
scale_color_gradientn(colors=c( "darkblue","skyblue", "white", "red", "darkred"))
}
monocle_theme_opts <- function()
{
theme(strip.background = element_rect(colour = 'white', fill = 'white')) +
theme(panel.border = element_blank()) +
theme(axis.line.x = element_line(size=0.25, color="black")) +
theme(axis.line.y = element_line(size=0.25, color="black")) +
theme(panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank()) +
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank()) +
theme(panel.background = element_rect(fill='white')) +
theme(legend.key=element_blank())
}
#' Plots the cells along with their trajectories.
#'
#' @param cds cell_data_set for the experiment
#' @param x the column of reducedDims(cds) to plot on the horizontal axis
#' @param y the column of reducedDims(cds) to plot on the vertical axis
#' @param cell_size The size of the point for each cell
#' @param reduction_method The lower dimensional space in which to plot cells.
#' Must be one of "UMAP", "tSNE", "PCA", "spRing", "trimap", and "LSI".
#' @param cluster_reduction_method The dimensional space from which to plot clusters.
#' Must be one of "UMAP", "tSNE", "PCA", "LSI".
#' @param color_cells_by What to use for coloring the cells. Must be either the
#' name of a column of colData(cds), or one of "clusters", "partitions", or
#' "pseudotime".
#' @param group_cells_by How to group cells when labeling them. Must be either
#' the name of a column of colData(cds), or one of "clusters" or "partitions".
#' If a column in colData(cds), must be a categorical variable.
#' @param genes Facet the plot, showing the expression of each gene in a facet
#' panel. Must be either a list of gene ids (or short names), or a dataframe
#' with two columns that groups the genes into modules that will be
#' aggregated prior to plotting. If the latter, the first column must be gene
#' ids, and the second must the group for each gene.
#' @param show_trajectory_graph Whether to render the principal graph for the
#' trajectory. Requires that learn_graph() has been called on cds.
#' @param trajectory_graph_color The color to be used for plotting the
#' trajectory graph.
#' @param trajectory_graph_segment_size The size of the line segments used for
#' plotting the trajectory graph.
#' @param norm_method How to normalize gene expression scores prior to plotting
#' them. Must be one of "log" or "size_only".
#' @param label_cell_groups Whether to label cells in each group (as specified
#' by group_cells_by) according to the most frequently occurring label(s) (as
#' specified by color_cells_by) in the group. If false, plot_cells() simply
#' adds a traditional color legend.
#' @param label_groups_by_cluster Instead of labeling each cluster of cells,
#' place each label once, at the centroid of all cells carrying that label.
#' @param group_label_size Font size to be used for cell group labels.
#' @param labels_per_group How many labels to plot for each group of cells.
#' Defaults to 1, which plots only the most frequent label per group.
#' @param label_branch_points Whether to plot a label for each branch point in
#' the principal graph.
#' @param label_roots Whether to plot a label for each root in the principal
#' graph.
#' @param label_leaves Whether to plot a label for each leaf node in the
#' principal graph.
#' @param graph_label_size How large to make the branch, root, and leaf labels.
#' @param alpha Alpha for the cells. Useful for reducing overplotting.
#' @param min_expr Minimum expression threshold for plotting genes
#' @param rasterize Whether to plot cells as a rastered bitmap. Requires the
#' ggrastr package.
#' @importFrom ggplot2 ggplot
#' @import monocle3
#' @return a ggplot2 plot object
#' @export
#' @examples
#' \dontrun{
#' lung <- load_A549()
#' plot_cells(lung)
#' plot_cells(lung, color_cells_by="log_dose")
#' plot_cells(lung, markers="GDF15")
#' }
#' @references this function differs from that found in monocle3 as it allows for use of spRing \
#' dimensionality reduction.
plot_cells_m3addon <- function(cds,
x=1,
y=2,
reduction_method = c("UMAP", "tSNE", "PCA", "LSI", "spRing", "trimap"),
color_cells_by="cluster",
cluster_reduction_method = c("UMAP", "tSNE", "PCA", "LSI", "spRing", "trimap"),
group_cells_by=c("cluster", "partition"),
genes=NULL,
show_trajectory_graph=TRUE,
trajectory_graph_color="grey28",
trajectory_graph_segment_size=0.75,
norm_method = c("log", "size_only"),
label_cell_groups = TRUE,
label_groups_by_cluster=TRUE,
group_label_size=2,
labels_per_group=1,
label_branch_points=TRUE,
label_roots=TRUE,
label_leaves=TRUE,
graph_label_size=2,
cell_size=0.35,
alpha = 1,
min_expr=0.1,
rasterize=FALSE,
legend_title_is_marker=TRUE) {
reduction_method <- match.arg(reduction_method)
cluster_reduction_method <- match.arg(cluster_reduction_method)
assertthat::assert_that(methods::is(cds, "cell_data_set"))
assertthat::assert_that(!is.null(reducedDims(cds)[[reduction_method]]),
msg = paste("No dimensionality reduction for",
reduction_method, "calculated.",
"Please run reduce_dimensions with",
"reduction_method =", reduction_method,
"before attempting to plot."))
# assertthat::assert_that(!is.null(clusters(cds, cluster_reduction_method)),
# msg = paste("No dimensionality reduction for",
# cluster_reduction_method, "calculated.",
# "Please run cluster_cells with",
# "reduction_method =", reduction_method,
# "before attempting to plot."))
low_dim_coords <- reducedDims(cds)[[reduction_method]]
assertthat::assert_that(ncol(low_dim_coords) >=max(x,y),
msg = paste("x and/or y is too large. x and y must",
"be dimensions in reduced dimension",
"space."))
if(!is.null(color_cells_by)) {
assertthat::assert_that(color_cells_by %in% c("cluster", "partition",
"pseudotime") |
color_cells_by %in% names(colData(cds)),
msg = paste("color_cells_by must one of",
"'cluster', 'partition', 'pseudotime,",
"or a column in the colData table."))
if(color_cells_by == "pseudotime") {
tryCatch({pseudotime(cds, reduction_method = reduction_method)},
error = function(x) {
stop(paste("No pseudotime for", reduction_method, "calculated. Please",
"run order_cells with reduction_method =", reduction_method,
"before attempting to color by pseudotime."))})
}
}
assertthat::assert_that(!is.null(color_cells_by) || !is.null(markers),
msg = paste("Either color_cells_by or markers must",
"be NULL, cannot color by both!"))
norm_method = match.arg(norm_method)
group_cells_by=match.arg(group_cells_by)
assertthat::assert_that(!is.null(color_cells_by) || !is.null(genes),
msg = paste("Either color_cells_by or genes must be",
"NULL, cannot color by both!"))
if (show_trajectory_graph &&
is.null(principal_graph(cds)[[reduction_method]])) {
message("No trajectory to plot. Has learn_graph() been called yet?")
show_trajectory_graph = FALSE
}
gene_short_name <- NA
sample_name <- NA
#sample_state <- colData(cds)$State
data_dim_1 <- NA
data_dim_2 <- NA
if (rasterize){
plotting_func <- ggrastr::geom_point_rast
}else{
plotting_func <- ggplot2::geom_point
}
S_matrix <- reducedDims(cds)[[reduction_method]]
data_df <- data.frame(S_matrix[,c(x,y)])
colnames(data_df) <- c("data_dim_1", "data_dim_2")
data_df$sample_name <- row.names(data_df)
data_df <- as.data.frame(cbind(data_df, colData(cds)))
if (group_cells_by == "cluster"){
data_df$cell_group <-
tryCatch({clusters(cds,
reduction_method = cluster_reduction_method)[
data_df$sample_name]},
error = function(e) {NULL})
} else if (group_cells_by == "partition") {
data_df$cell_group <-
tryCatch({partitions(cds,
reduction_method = reduction_method)[
data_df$sample_name]},
error = function(e) {NULL})
} else{
stop("Error: unrecognized way of grouping cells.")
}
if (color_cells_by == "cluster"){
data_df$cell_color <-
tryCatch({clusters(cds,
reduction_method = cluster_reduction_method)[
data_df$sample_name]},
error = function(e) {NULL})
} else if (color_cells_by == "partition") {
data_df$cell_color <-
tryCatch({partitions(cds,
reduction_method = reduction_method)[
data_df$sample_name]},
error = function(e) {NULL})
} else if (color_cells_by == "pseudotime") {
data_df$cell_color <-
tryCatch({pseudotime(cds,
reduction_method = reduction_method)[
data_df$sample_name]}, error = function(e) {NULL})
} else{
data_df$cell_color <- colData(cds)[data_df$sample_name,color_cells_by]
}
## Graph info
if (show_trajectory_graph) {
ica_space_df <- t(cds@principal_graph_aux[[reduction_method]]$dp_mst) %>%
as.data.frame() %>%
dplyr::select_(prin_graph_dim_1 = x, prin_graph_dim_2 = y) %>%
dplyr::mutate(sample_name = rownames(.),
sample_state = rownames(.))
dp_mst <- cds@principal_graph[[reduction_method]]
edge_df <- dp_mst %>%
igraph::as_data_frame() %>%
dplyr::select_(source = "from", target = "to") %>%
dplyr::left_join(ica_space_df %>%
dplyr::select_(
source="sample_name",
source_prin_graph_dim_1="prin_graph_dim_1",
source_prin_graph_dim_2="prin_graph_dim_2"),
by = "source") %>%
dplyr::left_join(ica_space_df %>%
dplyr::select_(
target="sample_name",
target_prin_graph_dim_1="prin_graph_dim_1",
target_prin_graph_dim_2="prin_graph_dim_2"),
by = "target")
}
## Marker genes
markers_exprs <- NULL
expression_legend_label <- NULL
if (!is.null(genes)) {
if (!is.null(dim(genes)) && dim(genes) >= 2){
markers = unlist(genes[,1], use.names=FALSE)
} else {
markers = genes
}
markers_rowData <- as.data.frame(subset(rowData(cds),
gene_short_name %in% markers |
row.names(rowData(cds)) %in%
markers))
if (nrow(markers_rowData) == 0) {
stop("None of the provided genes were found in the cds")
}
if (nrow(markers_rowData) >= 1) {
cds_exprs <- SingleCellExperiment::counts(cds)[row.names(markers_rowData), ,drop=FALSE]
cds_exprs <- Matrix::t(Matrix::t(cds_exprs)/size_factors(cds))
if (!is.null(dim(genes)) && dim(genes) >= 2){
genes = as.data.frame(genes)
row.names(genes) = genes[,1]
genes = genes[row.names(cds_exprs),]
agg_mat = as.matrix(monocle3:::my.aggregate.Matrix(cds_exprs, as.factor(genes[,2]), fun="sum"))
agg_mat = t(scale(t(log10(agg_mat + 1))))
agg_mat[agg_mat < -2] = -2
agg_mat[agg_mat > 2] = 2
markers_exprs = agg_mat
markers_exprs <- reshape2::melt(markers_exprs)
colnames(markers_exprs)[1:2] <- c('feature_id','cell_id')
if (is.factor(genes[,2]))
markers_exprs$feature_id = factor(markers_exprs$feature_id,
levels=levels(genes[,2]))
markers_exprs$feature_label <- markers_exprs$feature_id
norm_method = "size_only"
expression_legend_label = "Expression score"
} else {
cds_exprs@x = round(cds_exprs@x)
markers_exprs = matrix(cds_exprs, nrow=nrow(markers_rowData))
colnames(markers_exprs) = colnames(SingleCellExperiment::counts(cds))
row.names(markers_exprs) = row.names(markers_rowData)
markers_exprs <- reshape2::melt(markers_exprs)
colnames(markers_exprs)[1:2] <- c('feature_id','cell_id')
markers_exprs <- merge(markers_exprs, markers_rowData,
by.x = "feature_id", by.y="row.names")
markers_exprs$feature_label <-
as.character(markers_exprs$gene_short_name)
markers_exprs$feature_label <- ifelse(is.na(markers_exprs$feature_label) | !as.character(markers_exprs$feature_label) %in% markers,
as.character(markers_exprs$feature_id),
as.character(markers_exprs$feature_label))
markers_exprs$feature_label <- factor(markers_exprs$feature_label,
levels = markers)
if (norm_method == "size_only")
expression_legend_label = "Expression"
else
expression_legend_label = "log10(Expression)"
}
}
}
if (label_cell_groups && is.null(color_cells_by) == FALSE){
if (is.null(data_df$cell_color)){
if (is.null(genes)){
message(paste(color_cells_by, "not found in colData(cds), cells will",
"not be colored"))
}
text_df = NULL
label_cell_groups = FALSE
}else{
if(is.character(data_df$cell_color) || is.factor(data_df$cell_color)) {
if (label_groups_by_cluster && is.null(data_df$cell_group) == FALSE){
text_df = data_df %>%
dplyr::group_by(cell_group) %>%
dplyr::mutate(cells_in_cluster= dplyr::n()) %>%
dplyr::group_by(cell_color, add=TRUE) %>%
dplyr::mutate(per=dplyr::n()/cells_in_cluster)
median_coord_df = text_df %>%
dplyr::summarize(fraction_of_group = dplyr::n(),
text_x = stats::median(x = data_dim_1),
text_y = stats::median(x = data_dim_2))
text_df = suppressMessages(text_df %>% dplyr::select(per) %>%
dplyr::distinct())
text_df = suppressMessages(dplyr::inner_join(text_df,
median_coord_df))
text_df = text_df %>% dplyr::group_by(cell_group) %>%
dplyr::top_n(labels_per_group, per)
} else {
text_df = data_df %>% dplyr::group_by(cell_color) %>%
dplyr::mutate(per=1)
median_coord_df = text_df %>%
dplyr::summarize(fraction_of_group = dplyr::n(),
text_x = stats::median(x = data_dim_1),
text_y = stats::median(x = data_dim_2))
text_df = suppressMessages(text_df %>% dplyr::select(per) %>%
dplyr::distinct())
text_df = suppressMessages(dplyr::inner_join(text_df,
median_coord_df))
text_df = text_df %>% dplyr::group_by(cell_color) %>%
dplyr::top_n(labels_per_group, per)
}
text_df$label = as.character(text_df %>% dplyr::pull(cell_color))
# I feel like there's probably a good reason for the bit below, but I
# hate it and I'm killing it for now.
# text_df$label <- paste0(1:nrow(text_df))
# text_df$process_label <- paste0(1:nrow(text_df), '_',
# as.character(as.matrix(text_df[, 1])))
# process_label <- text_df$process_label
# names(process_label) <- as.character(as.matrix(text_df[, 1]))
# data_df[, group_by] <-
# process_label[as.character(data_df[, group_by])]
# text_df$label = process_label
} else {
message(paste("Cells aren't colored in a way that allows them to",
"be grouped."))
text_df = NULL
label_cell_groups = FALSE
}
}
}
if (!is.null(markers_exprs) && nrow(markers_exprs) > 0){
data_df <- merge(data_df, markers_exprs, by.x="sample_name",
by.y="cell_id")
data_df$value <- with(data_df, ifelse(value >= min_expr, value, NA))
na_sub <- data_df[is.na(data_df$value),]
if (legend_title_is_marker)
expression_legend_label = as.character(markers_exprs$feature_label[1])
if(norm_method == "size_only"){
g <- ggplot(data=data_df, aes(x=data_dim_1, y=data_dim_2)) +
plotting_func(aes(data_dim_1, data_dim_2), size=I(cell_size),
stroke = I(cell_size / 2), color = "grey80",
data = na_sub) +
plotting_func(aes(color=value), size=I(cell_size),
stroke = I(cell_size / 2), na.rm = TRUE) +
viridis::scale_color_viridis(option = "viridis",
name = expression_legend_label,
na.value = "grey80", end = 0.8) +
guides(alpha = FALSE) + facet_wrap(~feature_label)
} else {
g <- ggplot(data=data_df, aes(x=data_dim_1, y=data_dim_2)) +
plotting_func(aes(data_dim_1, data_dim_2), size=I(cell_size),
stroke = I(cell_size / 2), color = "grey80",
data = na_sub) +
plotting_func(aes(color=log10(value+min_expr)),
size=I(cell_size), stroke = I(cell_size / 2),
na.rm = TRUE) +
viridis::scale_color_viridis(option = "viridis",
name = expression_legend_label,
na.value = "grey80", end = 0.8) +
guides(alpha = FALSE) + facet_wrap(~feature_label)
}
} else {
g <- ggplot(data=data_df, aes(x=data_dim_1, y=data_dim_2))
# We don't want to force users to call order_cells before even being able
# to look at the trajectory, so check whether it's null and if so, just
# don't color the cells
if(color_cells_by %in% c("cluster", "partition")){
if (is.null(data_df$cell_color)){
g <- g + geom_point(color=I("gray"), size=I(cell_size), stroke = I(cell_size / 2), na.rm = TRUE,
alpha = I(alpha))
message(paste("cluster_cells() has not been called yet, can't",
"color cells by cluster"))
} else{
g <- g + geom_point(aes(color = cell_color), size=I(cell_size), stroke = I(cell_size / 2),
na.rm = TRUE, alpha = alpha)
}
g <- g + guides(color = guide_legend(title = color_cells_by,
override.aes = list(size = 4)))
} else if (class(data_df$cell_color) == "numeric"){
g <- g + geom_point(aes(color = cell_color), size=I(cell_size), stroke = I(cell_size / 2),
na.rm = TRUE, alpha = alpha)
g <- g + viridis::scale_color_viridis(name = color_cells_by, option="C")
} else {
g <- g + geom_point(aes(color = cell_color), size=I(cell_size), stroke = I(cell_size / 2),
na.rm = TRUE, alpha = alpha)
g <- g + guides(color = guide_legend(title = color_cells_by,
override.aes = list(size = 4)))
}
}
if (show_trajectory_graph){
g <- g + geom_segment(aes_string(x="source_prin_graph_dim_1",
y="source_prin_graph_dim_2",
xend="target_prin_graph_dim_1",
yend="target_prin_graph_dim_2"),
size=trajectory_graph_segment_size,
color=I(trajectory_graph_color),
linetype="solid",
na.rm=TRUE,
data=edge_df)
if (label_branch_points){
mst_branch_nodes <- monocle3:::branch_nodes(cds)
branch_point_df <- ica_space_df %>%
dplyr::slice(match(names(mst_branch_nodes), sample_name)) %>%
dplyr::mutate(branch_point_idx = seq_len(dplyr::n()))
g <- g +
geom_point(aes_string(x="prin_graph_dim_1", y="prin_graph_dim_2"),
shape = 21, stroke=I(trajectory_graph_segment_size),
color="white",
fill="black",
size=I(graph_label_size * 1.5),
na.rm=TRUE, branch_point_df) +
geom_text(aes_string(x="prin_graph_dim_1", y="prin_graph_dim_2",
label="branch_point_idx"),
size=I(graph_label_size), color="white", na.rm=TRUE,
branch_point_df)
}
if (label_leaves){
mst_leaf_nodes <- monocle3:::leaf_nodes(cds)
leaf_df <- ica_space_df %>%
dplyr::slice(match(names(mst_leaf_nodes), sample_name)) %>%
dplyr::mutate(leaf_idx = seq_len(dplyr::n()))
g <- g +
geom_point(aes_string(x="prin_graph_dim_1", y="prin_graph_dim_2"),
shape = 21, stroke=I(trajectory_graph_segment_size),
color="black",
fill="lightgray",
size=I(graph_label_size * 1.5),
na.rm=TRUE,
leaf_df) +
geom_text(aes_string(x="prin_graph_dim_1", y="prin_graph_dim_2",
label="leaf_idx"),
size=I(graph_label_size), color="black", na.rm=TRUE, leaf_df)
}
if (label_roots){
mst_root_nodes <- monocle3:::root_nodes(cds)
root_df <- ica_space_df %>%
dplyr::slice(match(names(mst_root_nodes), sample_name)) %>%
dplyr::mutate(root_idx = seq_len(dplyr::n()))
g <- g +
geom_point(aes_string(x="prin_graph_dim_1", y="prin_graph_dim_2"),
shape = 21, stroke=I(trajectory_graph_segment_size),
color="black",
fill="white",
size=I(graph_label_size * 1.5),
na.rm=TRUE,
root_df) +
geom_text(aes_string(x="prin_graph_dim_1", y="prin_graph_dim_2",
label="root_idx"),
size=I(graph_label_size), color="black", na.rm=TRUE, root_df)
}
}
if(label_cell_groups) {
g <- g + ggrepel::geom_text_repel(data = text_df,
mapping = aes_string(x = "text_x",
y = "text_y",
label = "label"),
size=I(group_label_size))
# If we're coloring by gene expression, don't hide the legend
if (is.null(markers_exprs))
g <- g + theme(legend.position="none")
}
g <- g +
#scale_color_brewer(palette="Set1") +
monocle3:::monocle_theme_opts() +
xlab(paste(reduction_method, x)) +
ylab(paste(reduction_method, y)) +
#guides(color = guide_legend(label.position = "top")) +
theme(legend.key = element_blank()) +
theme(panel.background = element_rect(fill='white'))
g
}
#' GSEA and plot
#' @description Performs GSEA (using fgsea) and returns a GSEA-style enrichment plot. Optionally create an \
#' enrichment plot with multiple layers that correspong to enrichnment of a given "pathway" in a list of ranked genes \
#' such that each entry in a list corresponds to a different enrichment plot. Alternatively, one may supply a list of \
#' pathways to overlay the enrichment of multiple pathways on one ranked list. Can't do both though....
#' @param pathway = vector (or list) of genes. Names of list will define plot coloring.
#' @param stats = vector (or list) of ranked gene stats (usually fold change or SNR) with names \
#' that contain the gene name. Names of list will define plot coloring.
#' @param rug = whether to make a rug plot(s).
#' @param dot_enhance character string denoting a color that enhances the dot appearance \
#' with another color
#' @import ggplot2
#' @importFrom fgsea calcGseaStat
#' @param all_the_rest_of_them Should be self explanatory
#' @return Performs GSEA of "pathway" genes on stats'
#' @references fgsea package
#' @export
# enrichmentPlot<-function (pathway, stats,
# gseaParam = 1,
# segment=F, rug=T,
# rug_color="black", segment_color="black",
# dot_color="green", dot_enhance=NULL,
# dot_enhance_size=2, dot_shape=21,
# dot_enhance_alpha=0.7, dot_size=1,
# return_data=FALSE,
# print_plot=FALSE,
# return_plot=TRUE)
# {
# rnk <- rank(-stats)
# ord <- order(rnk)
# statsAdj <- stats[ord]
# statsAdj <- sign(statsAdj) * (abs(statsAdj)^gseaParam)
# statsAdj <- statsAdj/max(abs(statsAdj))
# pathway <- unname(as.vector(na.omit(match(pathway, names(statsAdj)))))
# pathway <- sort(pathway)
# gseaRes <- calcGseaStat(statsAdj, selectedStats = pathway,
# returnAllExtremes = TRUE, returnLeadingEdge = TRUE)
# #bottoms <- gseaRes$bottoms
# tops <- gseaRes$tops
# n <- length(statsAdj)
# xs <- as.vector(pathway)
# ys <- as.vector(rbind(tops))
# le <- c(rep(1, length(xs)))
# le[xs %in% gseaRes$le]<-5
# le_bool<-le==5
# toPlot <- data.frame(x = c(0, xs, n + 1), y = c(0, ys, 0), le=c(0,le,0))
# diff <- (max(ys) - min(ys))/6
# df_out<-data.frame(Rank = c(xs), ES = c(ys), LE=le_bool, row.names = names(tops))
# x = y = NULL
# g <- ggplot(toPlot, aes(x = x, y = y)) + geom_point(color = dot_color,
# size = dot_size) + geom_hline(yintercept = max(ys), colour = "black",
# linetype = "dashed") + geom_hline(yintercept = min(ys),
# colour = "black", linetype = "dashed") + geom_hline(yintercept = 0,
# colour = "black") + geom_line(color = segment_color) + theme_bw()
# # if(rug) {g<-g+geom_segment(data = data.frame(x = pathway), mapping = aes(x = x,
# # y = min(ys)-diff/2, xend = x, yend = min(ys)-diff), size = 0.2, color=rug_color)}
# if(!is.null(dot_enhance)) {g<-g+geom_point(color = dot_enhance,
# size = dot_enhance_size, shape=dot_shape, alpha=dot_enhance_alpha)}
#
# if(rug){ g<-g+ geom_point(data = toPlot, aes(x = x,
# y = min(ys)-diff, size = le), colour = rug_color,shape = 124)}
# g<-g+theme(panel.border = element_blank(), panel.grid.minor = element_blank()) +
# labs(x = "rank", y = "enrichment score")+ theme(legend.position="none")
# if(print_plot) print(g)
# if(return_data) return(list(plot=g, gseaRes=gseaRes, df_out=df_out))
# if(return_plot) return(g)
# }
enrichmentPlot<-function (pathway, stats,
gseaParam = 1, rug=T, dot_enhance="darkgrey",
dot_enhance_size=2, dot_shape=21,
dot_enhance_alpha=0.7, dot_size=1,
return_data=FALSE,
print_plot=FALSE,
return_plot=TRUE)
{
list_amenable_args<-c("stats", "pathway")
for(arg in list_amenable_args){
if(class(get(arg))!="list") {assign(arg, list(get(arg)))}
}
if(is.null(names(stats))){names(stats)=1:length(stats)}
if(is.null(names(pathway))){names(pathway)=1:length(pathway)}
stats_list_process<-function(stats, pathway){
datalist<-lapply(1:length(stats), function(n){
rnk <- rank(-stats[[n]])
ord <- order(rnk)
statsAdj <- stats[[n]][ord]
statsAdj <- sign(statsAdj) * (abs(statsAdj)^gseaParam)
statsAdj <- statsAdj/max(abs(statsAdj), na.rm = T)
pw <- unname(as.vector(na.omit(match(pathway[[1]], names(statsAdj)))))
pw <- sort(pw)
gseaRes <- calcGseaStat(statsAdj, selectedStats = pw,
returnAllExtremes = TRUE, returnLeadingEdge = TRUE)
tops <- gseaRes$tops
i <- length(statsAdj)
xs <- as.vector(pw)
ys <- as.vector(rbind(tops))
le <- c(rep(1, length(xs)))
le[xs %in% gseaRes$le]<-5
le_bool<-le==5
toPlot <- data.frame(x = c(0, xs, i + 1), y = c(0, ys, 0), le=c(0,le,0))
#diff <- (max(ys) - min(ys))/6
df_out<-data.frame(Rank = c(xs), ES = c(ys), LE=le_bool, row.names = names(tops))
df_out$group<-names(stats)[n]
toPlot$group<-names(stats)[n]
list(df_out=df_out, toPlot=toPlot, gseaRes=gseaRes)
})
return(datalist)
}
pathway_list_process<-function(stats, pathway){
datalist<-lapply(1:length(pathway), function(n){
rnk <- rank(-stats[[1]])
ord <- order(rnk)
statsAdj <- stats[[1]][ord]
statsAdj <- sign(statsAdj) * (abs(statsAdj)^gseaParam)
statsAdj <- statsAdj/max(abs(statsAdj), na.rm = T)
pw <- unname(as.vector(na.omit(match(pathway[[n]], names(statsAdj)))))
pw <- sort(pw)
gseaRes <- calcGseaStat(statsAdj, selectedStats = pw,
returnAllExtremes = TRUE, returnLeadingEdge = TRUE)
tops <- gseaRes$tops
i <- length(statsAdj)
xs <- as.vector(pw)
ys <- as.vector(rbind(tops))
le <- c(rep(1, length(xs)))
le[xs %in% gseaRes$le]<-5
le_bool<-le==5
toPlot <- data.frame(x = c(0, xs, i + 1), y = c(0, ys, 0), le=c(0,le,0))
#diff <- (max(ys) - min(ys))/6
df_out<-data.frame(Rank = c(xs), ES = c(ys), LE=le_bool, row.names = names(tops))
df_out$group<-names(pathway)[n]
toPlot$group<-names(pathway)[n]
list(df_out=df_out, toPlot=toPlot, gseaRes=gseaRes)
})
return(datalist)
}
if(length(stats)==1 & length(pathway) > 1){
datalist<-pathway_list_process(stats, pathway)
}else{
datalist<-stats_list_process(stats, pathway)
}
plotList<-lapply(datalist, "[[", 2)
maxs<-max(sapply(plotList, function(df) max(df$y)))
mins<-min(sapply(plotList, function(df) min(df$y)))
y_rug_counter<-mins - (maxs-mins)/6
y_rug_diff<-(maxs-mins)/10
for(i in 1:length(plotList)){
plotList[[i]]$y_rug<-y_rug_counter
y_rug_counter <- y_rug_counter - y_rug_diff
}
#find allgroup_min
dataList<-lapply(datalist, "[[", 1)
gseaRes<-lapply(datalist, "[[", 3)
dataList<-lapply(dataList, function(df){
df$marker<-rownames(df)
rownames(df)<-NULL
df
})
mergedPlot<-do.call(rbind, plotList)
dataList<-do.call(rbind, dataList)
x = y = NULL
g <- ggplot(mergedPlot, aes(x = x, y = y, color=group))+
geom_point(size = dot_size)+
geom_hline(yintercept = maxs, linetype = "dashed")+
geom_hline(yintercept = mins, linetype = "dashed")+
geom_hline(yintercept = 0, colour = "black")+
geom_line()+
theme_bw()
# if(rug) {g<-g+geom_segment(data = data.frame(x = pathway), mapping = aes(x = x,
# y = min(ys)-diff/2, xend = x, yend = min(ys)-diff), size = 0.2, color=rug_color)}
if(!is.null(dot_enhance)) {g<-g+
geom_point(color = dot_enhance, size = dot_enhance_size, shape=dot_shape, alpha=dot_enhance_alpha)}
if(rug){ g<-g+ geom_point(data = mergedPlot, aes(x = x,
y = y_rug, size = le, color=group), shape = 124)}
g<-g+theme(panel.border = element_blank(), panel.grid.minor = element_blank()) +
labs(x = "rank", y = "enrichment score")+ guides( size = FALSE)
if(print_plot) print(g)
if(return_data) return(list(plot=g, gseaRes=gseaRes, df_out=dataList))
if(return_plot) return(g)
}
#' @keywords internal
compileStats<-function(gsa, gs=NULL){
#this function collapses adjusted stats from a gsa (piano package) generated using gsea or fgsea
stats<-data.frame(up=as.vector(gsa$pAdjDistinctDirUp), down=as.vector(gsa$pAdjDistinctDirDn))
if(is.null(gs)){
gs<-names(gsa$gsc)
}
stats[is.na(stats)]<-1
stats<-1-stats
dir<-colnames(stats)[max.col(stats, ties.method = "first")]
stats<-abs(stats-1)
stats$dir<-dir
stats$name<-names(gsa$gsc)
return( stats[stats$name %in% gs, ])
}
#' @export
returnFDR<-function(gsa, gs=NULL){
#this function returns a stat line describing the FDR and direction of a gsa generated using gsea or fsea
if(class(gsa)=="GSAres" & length(gs)==1){
dat<-compileStats(gsa=gsa, gs=gs)
return(paste0("FDR = ", round(dat[[dat$dir]], 6), " in the ",dat$dir, " direction"))
}
if(class(gsa)=="list"){
ru<-lapply(1:length(gsa), function(num){
dat<-compileStats(gsa=gsa[[num]], gs=gs)
lineout<-paste0(names(gsa)[num], " <= ",round(dat[[dat$dir]], 4))
})
return(paste0("FDR: ", paste0(unlist(ru), collapse="; ")))
}
if(length(gs)>1){
ru<-lapply(1:length(gs), function(num){
dat<-compileStats(gsa=gsa, gs=gs[num])
lineout<-paste0(gs[num], " <= ",round(dat[[dat$dir]], 4))
})
return(paste0("FDR: ", paste0(unlist(ru), collapse="; ")))
}
}
#' @keywords internal
monocle_theme_opts <- function()
{
theme(strip.background = element_rect(colour = 'white', fill = 'white')) +
theme(panel.border = element_blank()) +
theme(axis.line.x = element_line(size=0.25, color="black")) +
theme(axis.line.y = element_line(size=0.25, color="black")) +
theme(panel.grid.minor.x = element_blank(),
panel.grid.minor.y = element_blank()) +
theme(panel.grid.major.x = element_blank(),
panel.grid.major.y = element_blank()) +
theme(panel.background = element_rect(fill='white')) +
theme(legend.key=element_blank())
}
#' Plot rho and delta for Density Peak
#'
#' @description Plots the decision map of density clusters.
#' @param cds Input cell_data_set object.
#' @import ggplot2
#' @export
plot_rho_delta<-function (cds)
{
if (!is.null(cds@reduce_dim_aux$densityPeak)) {
df <- data.frame(rho = cds@reduce_dim_aux$densityPeak$rho, delta = cds@reduce_dim_aux$densityPeak$delta)
g <- qplot(rho, delta, data = df, alpha = I(0.5)) +
monocle3:::monocle_theme_opts() + theme(legend.position = "top",
legend.key.height = grid::unit(0.35, "in")) + theme(legend.key = element_blank()) +
theme(panel.background = element_rect(fill = "white"))
}
else {
stop("Please run Density_Peak before using this plotting function")
}
g
}
#' Plot violin plot of gene expression by group
#'
#' @description You can figure it out
#' @param cds Input cell_data_set object.
#' @import ggplot2
#' @importFrom reshape2 melt
#' @importFrom data.table as.data.table
#' @export
plot_genes_violin<- function (cds_subset, grouping = "Cluster",
min_expr = 0.1, scale_y=NULL,
cell_size = 0.75,
nrow = NULL, ncol = 1,
panel_order = NULL,
color_by = NULL,
remove_zeros=T,
plot_trend = F,
label_by_short_name = TRUE,
relative_expr = TRUE, lognorm = TRUE,
noise=FALSE, adjust=2, scale="width",
jitter=FALSE, trim=FALSE,
jitter_alpha = 0.3, round=FALSE, jitterzeros=FALSE,
jitter_width = 0.05, jitter_height = 0,
box_color="red", violin_alpha=1,
boxplot=F,
bwidth=0.3, bcolor="white")
{
if (relative_expr) {
cds_exprs<-exprs(cds_subset)
if (is.null(size_factors(cds_subset))) {
stop("Error: to call this function with relative_expr=TRUE, you must call estimateSizeFactors() first")
}
cds_exprs <- Matrix::t(Matrix::t(cds_exprs)/size_factors(cds_subset))
if(round){
cds_exprs <- as.data.table(melt(round(as.matrix(cds_exprs))))
}else{
cds_exprs <- as.data.table(melt(as.matrix(cds_exprs)))}
}else {
cds_exprs <- exprs(cds_subset)
cds_exprs <- as.data.table(melt(as.matrix(cds_exprs)))
}
if (is.null(min_expr)) {
min_expr <- 1
}
colnames(cds_exprs) <- c("f_id", "Cell", "expression")
cds_exprs$expression[cds_exprs$expression < min_expr] <- min_expr
cds_pData <- as.data.table(pData(cds_subset))
cds_pData$rn<-rownames(pData(cds_subset))
cds_fData <- as.data.table(fData(cds_subset))
cds_fData$rn<-rownames(fData(cds_subset))
cds_exprs$Cell<-as.character(cds_exprs$Cell)
cds_exprs <- merge(cds_exprs, cds_fData, by.x = "f_id", by.y = "rn")
cds_exprs <- merge(cds_exprs, cds_pData, by.x = "Cell", by.y = "rn")
cds_exprs$adjusted_expression <- log10(cds_exprs$expression+1)
if (label_by_short_name == TRUE) {
if (is.null(cds_exprs$gene_short_name) == FALSE) {
cds_exprs$feature_label <- cds_exprs$gene_short_name
cds_exprs$feature_label[is.na(cds_exprs$feature_label)] <- cds_exprs$f_id
}
else {
cds_exprs$feature_label <- cds_exprs$f_id
}
}
else {
cds_exprs$feature_label <- cds_exprs$f_id
}
if (is.null(panel_order) == FALSE) {
cds_exprs$feature_label <- factor(cds_exprs$feature_label,
levels = panel_order)
}
if(noise==TRUE){
noise <- rnorm(length(cds_exprs$adjusted_expression))/100000
cds_exprs$adjusted_expression=cds_exprs$adjusted_expression+noise
}
if(lognorm==TRUE){
if(remove_zeros){
cds_exprs$adjusted_expression[cds_exprs$adjusted_expression==0]<-NA
}
q <- ggplot(aes_string(x = grouping, y = "adjusted_expression"), data = cds_exprs)
}else{
if(remove_zeros){
cds_exprs$expression[cds_exprs$expression==0]<-NA
}
q <- ggplot(aes_string(x = grouping, y = "expression"), data = cds_exprs)
}
if (is.null(color_by) == FALSE) {
q <- q + geom_violin(aes_string(fill = color_by), adjust=adjust, scale=scale, trim=trim, alpha=violin_alpha)
}
else {
q <- q + geom_violin(adjust=adjust, scale="width")
}
if(jitter){
if(!jitterzeros){
dat<-layer_data(q)
dat$y[dat$y==0]<-NA
#P$layers <- c(geom_boxplot(), P$layers)
q <- q + geom_jitter(data = dat, aes(x=x, y=y), height = jitter_height, width = jitter_width, alpha=jitter_alpha)
if (is.null(color_by) == FALSE) {
q <- q + geom_violin(aes_string(fill = color_by), adjust=adjust, scale=scale, trim=trim, alpha=violin_alpha)
}
else {
q <- q + geom_violin(adjust=adjust, scale="width")
}
}else{
q <- q + geom_jitter(height = jitter_height, width = jitter_width, alpha=jitter_alpha)
if (is.null(color_by) == FALSE) {
q <- q + geom_violin(aes_string(fill = color_by), adjust=adjust, scale=scale, trim=trim, alpha=violin_alpha)
}
else {
q <- q + geom_violin(adjust=adjust, scale="width")
}
}
}
if (plot_trend == TRUE) {
q <- q + stat_sum_df("mean_cl_boot", mapping = aes(group = grouping), fill=box_color)
# q <- q + stat_summary(aes_string(x = grouping, y = "adjusted_expression"),
# color="black", size=0.5, fun.data = "mean_cl_boot", geom="hist")
if(lognorm==TRUE){
q <- q + stat_sum_df("mean_cl_boot", mapping = aes(group = grouping), geom="line", file=box_color)
# q <- q + stat_summary(aes_string(x = grouping, y = "adjusted_expression"),
# color="black", fun.data = "mean_cl_boot",
# size = 0.5, geom = "line")
}else{
q <- q + stat_sum_df("mean_cl_boot", mapping = aes(group = grouping), geom="line", fill=box_color)
}
}
q <- q + facet_wrap(~feature_label, nrow = nrow,
ncol = ncol, scales = "free_y")
if (min_expr < 1) {
q <- q + expand_limits(y = c(min_expr, 1))
}
if(lognorm==TRUE){
q <- q + ylab("Log Expression") + xlab(grouping)
}else{
q <- q + ylab("Expression") + xlab(grouping)
}
q <- q + monocle3:::monocle_theme_opts()
if(boxplot){
q<-q + geom_boxplot(width=bwidth, fill=bcolor)
}
if(!is.null(scale_y)){
q<-q + ylim(scale_y)
}
q
}
#' Plot heatmap of gene expression by group
#'
#' @description You can figure it out
#' @param cds Input cell_data_set object.
#' @import ggplot2
#' @importFrom reshape2 melt
#' @importFrom data.table as.data.table
#' @importFrom dplyr group_by
#' @export
plot_heatmap <- function (cds, markers, sample_cell_num = NULL, group_by = "Cluster",
show_rownames = TRUE, gene_name_size = 8, scale_max = 3, col_low = "cyan",
col_mid = "black", col_high = "red",
scale_min = -3, minimal_cluster_fraction = NULL, return_heatmap = FALSE, cluster_row = T, cluster_col = F,
verbose = FALSE, method="ggplot", ...)
{
markers <- rev(as.character(markers))
if (!is.null(minimal_cluster_fraction)) {
cluster_cell_num <- pData(cds) %>% dplyr::mutate(index = 1:ncol(cds)) %>%
dplyr::group_by(group_by) %>% dplyr::summarise(n = n())
valid_clusters <- as.numeric(as.matrix(cluster_cell_num[which(cluster_cell_num$n/max(cluster_cell_num$n) >
minimal_cluster_fraction), 1]))
cds <- cds[, pData(cds)[, group_by] %in% valid_clusters]
}
if (!is.null(sample_cell_num)) {
if (verbose) {
message(paste0("Downsampling ", sample_cell_num,
" for plotting efficiency ..."))
}
cds <- cds[, sample(1:ncol(cds), sample_cell_num)]
}
if (ncol(cds) > 50000 & is.null(sample_cell_num)) {
if (verbose) {
message(paste0("Cell number is larger 50000. Downsampling 1000 cells for plotting efficiency ..."))
}
sample_cell_num <- 1000
cds <- cds[, sample(1:ncol(cds), sample_cell_num)]
}
gene_ids <- row.names(cds)[match(markers, fData(cds)$gene_short_name)]
norm_mat <- monocle3:::normalize_expr_data(cds[gene_ids,], norm_method = "log")
norm_mat <- norm_mat[gene_ids, ]
norm_mat <- Matrix::t(scale(Matrix::t(norm_mat), center = TRUE))
norm_mat <- norm_mat[is.na(row.names(norm_mat)) == FALSE,
]
norm_mat[is.nan(norm_mat)] = 0
norm_mat[norm_mat > scale_max] <- scale_max
norm_mat[norm_mat < scale_min] <- scale_min
cell_index <- data.table::as.data.table(pData(cds)) %>% dplyr::mutate(index = 1:ncol(cds)) %>%
dplyr::arrange((!!as.symbol(group_by)))
norm_mat <- norm_mat[, cell_index$index]
#dim(norm_mat)
if(cluster_row){
row_ord <- hclust( dist(norm_mat, method = "euclidean"), method = "ward.D" )$order
norm_mat<-norm_mat[row_ord,]
}
if(cluster_col){
col_ord <- hclust( dist(t(norm_mat), method = "euclidean"), method = "ward.D" )$order
}
if(method=="return_matrix"){
return(norm_mat)
}
if(method=="ggplot"){
mlt_norm_mat <- reshape2::melt(norm_mat, stringsAsFactors=F)
#dim(mlt_norm_mat)
colnames(mlt_norm_mat) <- c("Gene", "Cell", "Expression")
mlt_norm_mat$Cell<-as.character(mlt_norm_mat$Cell)
mlt_norm_mat[[group_by]]<-cell_index[[group_by]][match(mlt_norm_mat$Cell,as.character(cell_index$cell))]
mlt_norm_mat$Gene <- fData(cds)[as.character(mlt_norm_mat$Gene), "gene_short_name"]
#mlt_norm_mat$Cluster <- as.character(pData(cds)[as.character(mlt_norm_mat$Cell),
# group_by])
mlt_norm_mat <- mlt_norm_mat %>% dplyr::mutate(Cell = factor(Cell, levels = colnames(norm_mat)))
# if(cluster_row){
# mlt_norm_mat$Gene <- factor(mlt_norm_mat$Gene, levels=levels(mlt_norm_mat$Gene)[row_ord])
# }
# if(cluster_col){
# mlt_norm_mat$Cell <- factor(mlt_norm_mat$Cell, levels=levels(mlt_norm_mat$Cell)[col_ord])
# }
g <- ggplot(data = mlt_norm_mat, mapping = aes(x = Cell,
y = Gene, fill = Expression)) + geom_tile() + scale_fill_gradient2(high=col_high, mid=col_mid, low=col_low) + theme(axis.title.x = element_blank(),
axis.title.y = element_blank(), axis.text.x = element_blank(),
axis.text.y = element_text(size = gene_name_size), axis.ticks.x = element_blank(),
axis.line = element_blank(), axis.ticks.y = element_blank())
g <- g + facet_grid(facets = as.formula(~group_by), drop = TRUE, space = "free",
scales = "free") + scale_x_discrete(expand = c(0, 0),
drop = TRUE)
g <- g + theme(strip.background = element_blank(), panel.spacing = unit(x = 0.1,
units = "lines")) + scale_y_discrete(position = "right")
if (!show_rownames) {
g <- g + theme(axis.text.y = element_blank())
}
if (return_heatmap) {
return(list(norm_mat = norm_mat, g = g))
}
else {
return(g)
}
}
if(method=="pheatmap"){
}
}
generate_smooth_curves<-function (cds, new_data, trend_formula = "~sm.ns(Pseudotime, df = 3)",
relative_expr = T, response_type = "response", cores = 1)
{
expressionFamily <- cds@expressionFamily
if (cores > 1) {
expression_curve_matrix <- mcesApply(cds, 1, function(x,
trend_formula, expressionFamily, relative_expr, new_data,
fit_model_helper, responseMatrix, calculate_NB_dispersion_hint,
calculate_QP_dispersion_hint) {
environment(fit_model_helper) <- environment()
environment(responseMatrix) <- environment()
model_fits <- fit_model_helper(x, modelFormulaStr = trend_formula,
expressionFamily = expressionFamily, relative_expr = relative_expr,
disp_func = cds@dispFitInfo[["blind"]]$disp_func)
if (is.null(model_fits))
expression_curve <- as.data.frame(matrix(rep(NA,
nrow(new_data)), nrow = 1))
else expression_curve <- as.data.frame(responseMatrix(list(model_fits),
newdata = new_data, response_type = response_type))
colnames(expression_curve) <- row.names(new_data)
expression_curve
}, required_packages = c("BiocGenerics", "Biobase", "VGAM",
"plyr"), cores = cores, trend_formula = trend_formula,
expressionFamily = expressionFamily, relative_expr = relative_expr,
new_data = new_data, fit_model_helper = fit_model_helper,
responseMatrix = responseMatrix, calculate_NB_dispersion_hint = calculate_NB_dispersion_hint,
calculate_QP_dispersion_hint = calculate_QP_dispersion_hint)
expression_curve_matrix <- as.matrix(do.call(rbind, expression_curve_matrix))
return(expression_curve_matrix)
}
else {
expression_curve_matrix <- smartEsApply(cds, 1, function(x,
trend_formula, expressionFamily, relative_expr, new_data) {
environment(fit_model_helper) <- environment()
environment(responseMatrix) <- environment()
model_fits <- fit_model_helper(x, modelFormulaStr = trend_formula,
expressionFamily = expressionFamily, relative_expr = relative_expr,
disp_func = cds@dispFitInfo[["blind"]]$disp_func)
if (is.null(model_fits))
expression_curve <- as.data.frame(matrix(rep(NA,
nrow(new_data)), nrow = 1))
else expression_curve <- as.data.frame(responseMatrix(list(model_fits),
new_data, response_type = response_type))
colnames(expression_curve) <- row.names(new_data)
expression_curve
}, convert_to_dense = TRUE, trend_formula = trend_formula,
expressionFamily = expressionFamily, relative_expr = relative_expr,
new_data = new_data)
expression_curve_matrix <- as.matrix(do.call(rbind, expression_curve_matrix))
row.names(expression_curve_matrix) <- row.names(fData(cds))
return(expression_curve_matrix)
}
}
#' Plots a pseudotime-ordered, row-centered heatmap
#'
#' @description The function plot_pseudotime_heatmap takes a CellDataSet object
#' (usually containing a only subset of significant genes) and generates smooth expression
#' curves much like plot_genes_in_pseudotime.
#' Then, it clusters these genes and plots them using the pheatmap package.
#' This allows you to visualize modules of genes that co-vary across pseudotime.
#'
#' @param cds_subset CellDataSet for the experiment
#' @param cluster_rows Whether to cluster the rows of the heatmap.
#' @param hclust_method The method used by pheatmap to perform hirearchical clustering of the rows.
#' @param num_clusters Number of clusters for the heatmap of branch genes
#' @param hmcols The color scheme for drawing the heatmap.
#' @param add_annotation_row Additional annotations to show for each row in the heatmap. Must be a dataframe with one row for each row in the fData table of cds_subset, with matching IDs.
#' @param add_annotation_col Additional annotations to show for each column in the heatmap. Must be a dataframe with one row for each cell in the pData table of cds_subset, with matching IDs.
#' @param show_rownames Whether to show the names for each row in the table.
#' @param use_gene_short_name Whether to use the short names for each row. If FALSE, uses row IDs from the fData table.
#' @param scale_max The maximum value (in standard deviations) to show in the heatmap. Values larger than this are set to the max.
#' @param scale_min The minimum value (in standard deviations) to show in the heatmap. Values smaller than this are set to the min.
#' @param norm_method Determines how to transform expression values prior to rendering
#' @param trend_formula A formula string specifying the model used in fitting the spline curve for each gene/feature.
#' @param return_heatmap Whether to return the pheatmap object to the user.
#' @param cores Number of cores to use when smoothing the expression curves shown in the heatmap.
#' @return A list of heatmap_matrix (expression matrix for the branch committment), ph (pheatmap heatmap object),
#' annotation_row (annotation data.frame for the row), annotation_col (annotation data.frame for the column).
#' @import pheatmap
#' @importFrom stats sd as.dist cor cutree
#' @export
#'
plot_pseudotime_heatmap <- function(cds_subset,
cluster_rows = TRUE,
hclust_method = "ward.D2",
num_clusters = 6,
hmcols = NULL,
add_annotation_row = NULL,
add_annotation_col = NULL,
show_rownames = FALSE,
use_gene_short_name = TRUE,
norm_method = c("log", "vstExprs"),
scale_max=3,
scale_min=-3,
trend_formula = '~sm.ns(Pseudotime, df=3)',
return_heatmap=FALSE,
cores=1){
num_clusters <- min(num_clusters, nrow(cds_subset))
pseudocount <- 1
newdata <- data.frame(Pseudotime = seq(min(cds_subset@principal_graph_aux$UMAP$pseudotime), max(cds_subset@principal_graph_aux$UMAP$pseudotime),length.out = 100))
m <- genSmoothCurves(cds_subset, cores=cores, trend_formula = trend_formula,
relative_expr = T, new_data = newdata)
#remove genes with no expression in any condition
m=m[!apply(m,1,sum)==0,]
norm_method <- match.arg(norm_method)
# FIXME: this needs to check that vst values can even be computed. (They can only be if we're using NB as the expressionFamily)
if(norm_method == 'vstExprs' && is.null(cds_subset@dispFitInfo[["blind"]]$disp_func) == FALSE) {
m = vstExprs(cds_subset, expr_matrix=m)
}
else if(norm_method == 'log') {
m = log10(m+pseudocount)
}
# Row-center the data.
m=m[!apply(m,1,sd)==0,]
m=Matrix::t(scale(Matrix::t(m),center=TRUE))
m=m[is.na(row.names(m)) == FALSE,]
m[is.nan(m)] = 0
m[m>scale_max] = scale_max
m[m<scale_min] = scale_min
heatmap_matrix <- m
row_dist <- as.dist((1 - cor(Matrix::t(heatmap_matrix)))/2)
row_dist[is.na(row_dist)] <- 1
if(is.null(hmcols)) {
bks <- seq(-3.1,3.1, by = 0.1)
hmcols <- blue2green2red(length(bks) - 1)
}
else {
bks <- seq(-3.1,3.1, length.out = length(hmcols))
}
ph <- pheatmap(heatmap_matrix,
useRaster = T,
cluster_cols=FALSE,
cluster_rows=cluster_rows,
show_rownames=F,
show_colnames=F,
clustering_distance_rows=row_dist,
clustering_method = hclust_method,
cutree_rows=num_clusters,
silent=TRUE,
filename=NA,
breaks=bks,
border_color = NA,
color=hmcols)
if(cluster_rows) {
annotation_row <- data.frame(Cluster=factor(cutree(ph$tree_row, num_clusters)))
} else {
annotation_row <- NULL
}
if(!is.null(add_annotation_row)) {
old_colnames_length <- ncol(annotation_row)
annotation_row <- cbind(annotation_row, add_annotation_row[row.names(annotation_row), ])
colnames(annotation_row)[(old_colnames_length+1):ncol(annotation_row)] <- colnames(add_annotation_row)
# annotation_row$bif_time <- add_annotation_row[as.character(fData(absolute_cds[row.names(annotation_row), ])$gene_short_name), 1]
}
if(!is.null(add_annotation_col)) {
if(nrow(add_annotation_col) != 100) {
stop('add_annotation_col should have only 100 rows (check genSmoothCurves before you supply the annotation data)!')
}
annotation_col <- add_annotation_col
} else {
annotation_col <- NA
}
if (use_gene_short_name == TRUE) {
if (is.null(fData(cds_subset)$gene_short_name) == FALSE) {
feature_label <- as.character(fData(cds_subset)[row.names(heatmap_matrix), 'gene_short_name'])
feature_label[is.na(feature_label)] <- row.names(heatmap_matrix)
row_ann_labels <- as.character(fData(cds_subset)[row.names(annotation_row), 'gene_short_name'])
row_ann_labels[is.na(row_ann_labels)] <- row.names(annotation_row)
}
else {
feature_label <- row.names(heatmap_matrix)
row_ann_labels <- row.names(annotation_row)
}
}
else {
feature_label <- row.names(heatmap_matrix)
if(!is.null(annotation_row))
row_ann_labels <- row.names(annotation_row)
}
row.names(heatmap_matrix) <- feature_label
if(!is.null(annotation_row))
row.names(annotation_row) <- row_ann_labels
colnames(heatmap_matrix) <- c(1:ncol(heatmap_matrix))
ph_res <- pheatmap(heatmap_matrix[, ], #ph$tree_row$order
useRaster = T,
cluster_cols = FALSE,
cluster_rows = cluster_rows,
show_rownames=show_rownames,
show_colnames=F,
#scale="row",
clustering_distance_rows=row_dist, #row_dist
clustering_method = hclust_method, #ward.D2
cutree_rows=num_clusters,
# cutree_cols = 2,
annotation_row=annotation_row,
annotation_col=annotation_col,
treeheight_row = 20,
breaks=bks,
fontsize = 6,
color=hmcols,
border_color = NA,
silent=TRUE,
filename=NA
)
grid::grid.rect(gp=grid::gpar("fill", col=NA))
grid::grid.draw(ph_res$gtable)
if (return_heatmap){
return(ph_res)
}
}
plot_cells_3d<-function (cds,
dims = c(1, 2, 3),
reduction_method = c("UMAP", "tSNE", "PCA", "LSI", "spRing", "trimap"),
color_cells_by="cluster",
genes = NULL, show_trajectory_graph = TRUE, trajectory_graph_color = "black",
trajectory_graph_segment_size = 5, norm_method = c("log",
"size_only"), color_palette = NULL, color_scale = "Viridis",
cell_size = 25, alpha = 1, min_expr = 0.1)
{
reduction_method <- match.arg(reduction_method)
assertthat::assert_that(methods::is(cds, "cell_data_set"))
assertthat::assert_that(!is.null(reducedDims(cds)[[reduction_method]]),
msg = paste("No dimensionality reduction for", reduction_method,
"calculated.", "Please run reduce_dimensions with",
"reduction_method =", reduction_method, "before attempting to plot."))
low_dim_coords <- reducedDims(cds)[[reduction_method]]
if (!is.null(color_cells_by)) {
assertthat::assert_that(color_cells_by %in% c("cluster",
"partition", "pseudotime") | color_cells_by %in%
names(colData(cds)), msg = paste("color_cells_by must be a column in",
"the colData table."))
}
assertthat::assert_that(!is.null(color_cells_by) || !is.null(markers),
msg = paste("Either color_cells_by or markers must",
"be NULL, cannot color by both!"))
norm_method = match.arg(norm_method)
if (show_trajectory_graph && is.null(principal_graph(cds)[[reduction_method]])) {
message("No trajectory to plot. Has learn_graph() been called yet?")
show_trajectory_graph = FALSE
}
gene_short_name <- NA
sample_name <- NA
x <- dims[[1]]
y <- dims[[2]]
z <- dims[[3]]
S_matrix <- reducedDims(cds)[[reduction_method]]
data_df <- data.frame(S_matrix[, c(dims)])
colnames(data_df) <- c("data_dim_1", "data_dim_2", "data_dim_3")
data_df$sample_name <- row.names(data_df)
data_df <- as.data.frame(cbind(data_df, colData(cds)))
if (color_cells_by == "cluster") {
data_df$cell_color <- tryCatch({
clusters(cds, reduction_method = reduction_method)[data_df$sample_name]
}, error = function(e) {
NULL
})
}
else if (color_cells_by == "partition") {
data_df$cell_color <- tryCatch({
partitions(cds, reduction_method = reduction_method)[data_df$sample_name]
}, error = function(e) {
NULL
})
}
else if (color_cells_by == "pseudotime") {
data_df$cell_color <- tryCatch({
pseudotime(cds, reduction_method = reduction_method)[data_df$sample_name]
}, error = function(e) {
NULL
})
}
else {
data_df$cell_color <- colData(cds)[data_df$sample_name,
color_cells_by]
}
markers_exprs <- NULL
if (!is.null(genes)) {
if ((is.null(dim(genes)) == FALSE) && dim(genes) >= 2) {
markers <- unlist(genes[, 1], use.names = FALSE)
}
else {
markers <- genes
}
markers_rowData <- as.data.frame(subset(rowData(cds),
gene_short_name %in% markers | row.names(rowData(cds)) %in%
markers))
if (nrow(markers_rowData) >= 1) {
cds_exprs <- SingleCellExperiment::counts(cds)[row.names(markers_rowData),
, drop = FALSE]
cds_exprs <- Matrix::t(Matrix::t(cds_exprs)/size_factors(cds))
if ((is.null(dim(genes)) == FALSE) && dim(genes) >=
2) {
genes <- as.data.frame(genes)
row.names(genes) <- genes[, 1]
genes <- genes[row.names(cds_exprs), ]
agg_mat <- as.matrix(my.aggregate.Matrix(cds_exprs,
as.factor(genes[, 2]), fun = "sum"))
agg_mat <- t(scale(t(log10(agg_mat + 1))))
agg_mat[agg_mat < -2] <- -2
agg_mat[agg_mat > 2] <- 2
markers_exprs <- agg_mat
markers_exprs <- reshape2::melt(markers_exprs)
colnames(markers_exprs)[1:2] <- c("feature_id",
"cell_id")
markers_exprs$feature_label <- markers_exprs$feature_id
}
else {
cds_exprs@x <- round(10000 * cds_exprs@x)/10000
markers_exprs <- matrix(cds_exprs, nrow = nrow(markers_rowData))
colnames(markers_exprs) <- colnames(SingleCellExperiment::counts(cds))
row.names(markers_exprs) <- row.names(markers_rowData)
markers_exprs <- reshape2::melt(markers_exprs)
colnames(markers_exprs)[1:2] <- c("feature_id",
"cell_id")
markers_exprs <- merge(markers_exprs, markers_rowData,
by.x = "feature_id", by.y = "row.names")
markers_exprs$feature_label <- as.character(markers_exprs$gene_short_name)
markers_exprs$feature_label[is.na(markers_exprs$feature_label)] <- markers_exprs$feature_id
markers_exprs$feature_label <- factor(markers_exprs$feature_label,
levels = markers)
}
}
}
if (is.null(markers_exprs) == FALSE && nrow(markers_exprs) >
0) {
data_df <- merge(data_df, markers_exprs, by.x = "sample_name",
by.y = "cell_id")
data_df$expression <- with(data_df, ifelse(value >= min_expr,
value, NA))
sub1 <- data_df[!is.na(data_df$expression), ]
sub2 <- data_df[is.na(data_df$expression), ]
if (norm_method == "size_only") {
p <- plotly::plot_ly(sub1) %>% plotly::add_trace(x = ~data_dim_1,
y = ~data_dim_2, z = ~data_dim_3, type = "scatter3d",
size = I(cell_size), alpha = I(alpha), mode = "markers",
marker = list(colorbar = list(title = "Expression",
len = 0.5), color = ~expression, colors = color_scale,
line = list(width = 1, color = ~expression,
colorscale = color_scale), colorscale = color_scale)) %>%
plotly::add_markers(x = sub2$data_dim_1, y = sub2$data_dim_2,
z = sub2$data_dim_3, color = I("lightgrey"),
size = I(cell_size), marker = list(opacity = 0.4),
showlegend = FALSE)
}
else {
sub1$log10_expression <- log10(sub1$expression +
min_expr)
p <- plotly::plot_ly(sub1) %>% plotly::add_trace(x = ~data_dim_1,
y = ~data_dim_2, z = ~data_dim_3, type = "scatter3d",
size = I(cell_size), alpha = I(alpha), mode = "markers",
marker = list(colorbar = list(title = "Log10\nExpression",
len = 0.5), color = ~log10_expression, colors = color_scale,
line = list(width = 1, color = ~log10_expression,
colorscale = color_scale), colorscale = color_scale)) %>%
plotly::add_markers(x = sub2$data_dim_1, y = sub2$data_dim_2,
z = sub2$data_dim_3, color = I("lightgrey"),
size = I(cell_size), marker = list(opacity = 0.4),
showlegend = FALSE)
}
}
else {
if (color_cells_by %in% c("cluster", "partition")) {
if (is.null(data_df$cell_color)) {
p <- plotly::plot_ly(data_df, x = ~data_dim_1,
y = ~data_dim_2, z = ~data_dim_3, type = "scatter3d",
size = I(cell_size), color = I("gray"), mode = "markers",
alpha = I(alpha))
message(paste("cluster_cells() has not been called yet, can't color",
"cells by cluster or partition"))
}
else {
if (is.null(color_palette)) {
N <- length(unique(data_df$cell_color))
color_palette <- RColorBrewer::brewer.pal(N,
"Set2")
}
p <- plotly::plot_ly(data_df, x = ~data_dim_1,
y = ~data_dim_2, z = ~data_dim_3, type = "scatter3d",
size = I(cell_size), color = ~cell_color, colors = color_palette,
mode = "markers", alpha = I(alpha))
}
}
else if (class(data_df$cell_color) == "numeric") {
p <- plotly::plot_ly(data_df) %>% plotly::add_trace(x = ~data_dim_1,
y = ~data_dim_2, z = ~data_dim_3, type = "scatter3d",
size = I(cell_size), alpha = I(alpha), mode = "markers",
marker = list(colorbar = list(title = color_cells_by,
len = 0.5), color = ~cell_color, colors = color_scale,
line = list(width = 1, color = ~cell_color,
colorscale = color_scale), colorscale = color_scale))
}
else {
if (is.null(color_palette)) {
N <- length(unique(data_df$cell_color))
color_palette <- RColorBrewer::brewer.pal(N,
"Set2")
}
p <- plotly::plot_ly(data_df, x = ~data_dim_1, y = ~data_dim_2,
z = ~data_dim_3, type = "scatter3d", size = I(cell_size),
color = ~cell_color, colors = color_palette,
mode = "markers", alpha = I(alpha))
}
}
p <- p %>% plotly::layout(scene = list(xaxis = list(title = paste("Component",
x)), yaxis = list(title = paste("Component", y)), zaxis = list(title = paste("Component",
z))))
if (show_trajectory_graph) {
ica_space_df <- t(cds@principal_graph_aux[[reduction_method]]$dp_mst) %>%
as.data.frame() %>% dplyr::select_(prin_graph_dim_1 = x,
prin_graph_dim_2 = y, prin_graph_dim_3 = z) %>% dplyr::mutate(sample_name = rownames(.),
sample_state = rownames(.))
dp_mst <- cds@principal_graph[[reduction_method]]
edge_df <- dp_mst %>% igraph::as_data_frame() %>% dplyr::select_(source = "from",
target = "to") %>% dplyr::left_join(ica_space_df %>%
dplyr::select_(source = "sample_name", source_prin_graph_dim_1 = "prin_graph_dim_1",
source_prin_graph_dim_2 = "prin_graph_dim_2",
source_prin_graph_dim_3 = "prin_graph_dim_3"),
by = "source") %>% dplyr::left_join(ica_space_df %>%
dplyr::select_(target = "sample_name", target_prin_graph_dim_1 = "prin_graph_dim_1",
target_prin_graph_dim_2 = "prin_graph_dim_2",
target_prin_graph_dim_3 = "prin_graph_dim_3"),
by = "target")
for (i in 1:nrow(edge_df)) {
p <- p %>% plotly::add_trace(x = as.vector(t(edge_df[i,
c("source_prin_graph_dim_1", "target_prin_graph_dim_1")])),
y = as.vector(t(edge_df[i, c("source_prin_graph_dim_2",
"target_prin_graph_dim_2")])), z = as.vector(t(edge_df[i,
c("source_prin_graph_dim_3", "target_prin_graph_dim_3")])),
color = trajectory_graph_color, line = list(color = I(trajectory_graph_color),
width = trajectory_graph_segment_size), mode = "lines",
type = "scatter3d", showlegend = FALSE)
}
}
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.