#' @title Cluster proportion bar plot
#' @description Plot the percentage/absolute cell count of each cluster in each sample
#' @param origin factor/vector of sample
#' @param cluster factor/vector of cluster
#' @param rev If TRUE, plot the proportion of sample in each cluster, Default: F
#' @param normalize Normalize sample size to 100, Default: rev
#' @param percent If FALSE, plot absolute cell number instead of percentage, Default: T
#' @param plot Generate plot (TRUE) or matrix (FALSE), Default: T
#' @param flip If plot bars horizontally, Default: T
#' @param reverse_order If TRUE, will reverse the default stacking order. Default: T
#' @param width Width of bars, Default: 0.9
#' @param stack If TRUE, plot stacked bars, Default: T
#' @param cols Colors to use for plotting. Default: "light"
#' @param border Border color, Default: "white"
#' @return ggplot object or matrix
#' @details See example
#' @examples
#' ClusterDistrBar(origin = pbmc$orig.ident, cluster = pbmc$cluster)
#'
#' # absolute cell count
#' ClusterDistrBar(origin = pbmc$orig.ident, cluster = pbmc$cluster, percent = F)
#'
#' # reverse x and y axis, normalized by sample size
#' ClusterDistrBar(origin = pbmc$orig.ident, cluster = pbmc$cluster, rev = T, normalize = T)
#'
#' # reverse x and y axis, not normalized by sample size
#' ClusterDistrBar(origin = pbmc$orig.ident, cluster = pbmc$cluster, rev = T, normalize = F)
#'
#' # vertical bar plot, and keep the default stacking order
#' ClusterDistrBar(origin = pbmc$orig.ident, cluster = pbmc$cluster, flip = F, reverse_order = F)
#'
#' # not stacking bars
#' ClusterDistrBar(origin = pbmc$orig.ident, cluster = pbmc$cluster, flip = FALSE, stack = FALSE)
#'
#' # export matrix
#' ClusterDistrBar(origin = pbmc$orig.ident, cluster = pbmc$cluster, plot = F)
#' @rdname ClusterDistrBar
#' @export
ClusterDistrBar <- function(
origin,
cluster,
rev = F,
normalize = rev,
percent = T,
plot = T,
flip = T,
reverse_order = T,
width = 0.9,
stack = T,
cols = "light",
border = "white"
){
library(dplyr)
library(rlist)
library(ggplot2)
library(reshape2)
# Check if origin and cluster have the same length
if(length(origin) != length(cluster)) {
stop("'origin' and 'cluster' must have the same length. Please check if they are from the same Seurat object or data frame.")
}
origin <- factor(origin)
cluster <- factor(cluster)
ToPlot <-
split(cluster, origin) %>%
lapply(table) %>%
list.cbind()
if(normalize) ToPlot <- apply(ToPlot, 2, function(x) x * 100/sum(x))
if(rev) ToPlot <- t(ToPlot)
if(percent) ToPlot <- apply(ToPlot, 2, function(x) x * 100/sum(x))
if(!plot) return(ToPlot)
levels_var1 <- rownames(ToPlot)
levels_var2 <- colnames(ToPlot)
ToPlot <- melt(ToPlot)
x.label <- ifelse(rev, "Cluster", "Origin")
fill.label <- ifelse(rev, "Origin", "Cluster")
y.label <- ifelse(normalize, "Normalized cell counts", "Cell counts")
y.label <- ifelse(percent, paste("Percentage of", tolower(y.label)), y.label)
ToPlot$Var1 <- factor(ToPlot$Var1, levels = levels_var1)
if(flip) ToPlot$Var2 <- factor(ToPlot$Var2, levels = rev(levels_var2))
if(stack) position <- position_stack(reverse = reverse_order) else position <- "dodge"
p <-
ggplot(ToPlot, aes(x = Var2, y = value, fill = factor(Var1))) +
geom_bar(stat="identity", position = position, width = width, color = border) +
theme_classic() +
labs(x = x.label, y = y.label, fill = fill.label) +
scale_y_continuous(expand = c(0, 0)) +
if(flip) coord_flip()
p <- p + scale_fill_disc_auto(cols, nlevels(factor(ToPlot$Var1)))
return(p)
}
#' @title Cluster proportion bar plot
#' @description deprecated function of \code{\link[SeuratExtend:ClusterDistrBar]{ClusterDistrBar()}}
#' @param meta meta data data.frame
#' @param cluster variable name of cluster
#' @param origin variable name of sample
#' @param method 'percent'or 'value'
#' @param do.heatmap Generate plot (TRUE) or matrix (FALSE), Default: T
#' @return ggplot object or matrix
#' @seealso \code{\link[SeuratExtend:ClusterDistrBar]{ClusterDistrBar()}}
#' @rdname BarOfCluster
#' @export
BarOfCluster <- function(meta,cluster,origin,method="value",do.heatmap=T){
warning("BarOfCluster() is deprecated. Please use ClusterDistrBar() instead.")
ClusterDistrBar(origin = meta[,origin], cluster = meta[,cluster], percent = method=="percent", plot = do.heatmap)
}
#' @title Cluster proportion boxplot by condition
#' @description Plot the percentage/absolute cell count of each cluster grouped by condition
#' @param origin factor/vector of sample
#' @param cluster factor/vector of cluster
#' @param condition factor/vector of condition, should have the same number of unique values as origin. If provided, will create boxplots instead of bar plots, Default: NULL
#' @param rev If TRUE, plot the proportion of sample in each cluster, Default: F
#' @param normalize Normalize sample size to 100, Default: rev
#' @param percent If FALSE, plot absolute cell number instead of percentage, Default: T
#' @param plot Generate plot (TRUE) or matrix (FALSE), Default: T
#' @param flip If plot bars horizontally (only used when condition is NULL), Default: T
#' @param reverse_order If TRUE, will reverse the default stacking order (only used when condition is NULL), Default: T
#' @param stack If TRUE, plot stacked bars (only used when condition is NULL), Default: T
#' @param cols Colors to use for plotting. Default: "light"
#' @param border Border color for bar plots (only used when condition is NULL), Default: "white"
#' @param violin Indicates whether to generate a violin plot (only used when condition is not NULL), Default: FALSE
#' @param box Indicates whether to depict a box plot (only used when condition is not NULL), Default: TRUE
#' @param width Width parameter. When condition is NULL, controls bar width; when condition is provided, controls box plot width. Default: 0.9
#' @param pt Indicates if points should be plotted (only used when condition is not NULL), Default: TRUE
#' @param pt.style Position adjustment. Default choices: "quasirandom", "jitter" (only used when condition is not NULL)
#' @param pt.size Point size setting (only used when condition is not NULL), Default: 1
#' @param pt.alpha Adjusts the transparency of points (only used when condition is not NULL), Default: 1
#' @param style Plot style: "fill" or "outline" (only used when condition is not NULL). Default: "outline"
#' @param ncol Number of columns for multi-panel plots. Default: NULL
#' @param nrow Number of rows for multi-panel plots. Default: NULL
#' @param stat.method Determines if pairwise statistics are added to the plot. Either "wilcox.test" or "t.test", Default: "wilcox.test"
#' @param p.adjust.method Method for adjusting p-values, Default: "none"
#' @param label Specifies label type. Options include "p.signif", "p", "p.adj", "p.format", Default: "p.signif"
#' @param hide.ns If TRUE, the 'ns' symbol is concealed when displaying significance levels, Default: TRUE
#' @param ... Additional arguments passed to VlnPlot2
#' @return ggplot object or matrix
#' @details This function extends ClusterDistrBar to allow grouping by condition, creating boxplots instead of bar plots.
#' When condition is NULL, it behaves exactly like ClusterDistrBar, supporting all its parameters
#' and returning a stacked bar plot. When condition is provided, it transforms the distribution data
#' into a boxplot format using VlnPlot2, with each box representing the distribution of a cluster's
#' proportion across samples within the same condition. This is particularly useful for comparing cluster
#' distributions between experimental conditions.
#'
#' All styling parameters from VlnPlot2 are supported, including violin/box display options, point
#' styles, colors, and statistical testing. For detailed formatting options, refer to the VlnPlot2
#' documentation. Common customizations include:
#'
#' - Changing point display: pt=TRUE/FALSE, pt.size, pt.alpha, pt.style
#'
#' - Adjusting plot style: style="fill"/"outline", violin=TRUE/FALSE
#'
#' - Adding statistical tests: stat.method="wilcox.test"/"t.test", p.adjust.method
#'
#' - Controlling layout: ncol, nrow for multi-cluster displays
#'
#' @examples
#' # Example with condition
#' ClusterDistrPlot(
#' origin = pbmc$sample_id,
#' cluster = pbmc$cluster,
#' condition = pbmc$condition
#' )
#'
#' @rdname ClusterDistrPlot
#' @export
ClusterDistrPlot <- function(
origin,
cluster,
condition = NULL,
rev = F,
normalize = rev,
percent = T,
plot = T,
flip = T,
reverse_order = T,
stack = T,
cols = "light",
border = "white",
violin = F,
box = T,
width = 0.9,
pt = T,
pt.style = "quasirandom",
pt.size = 1,
pt.alpha = 1,
style = "outline",
ncol = NULL,
nrow = NULL,
stat.method = "wilcox.test",
p.adjust.method = "none",
label = "p.signif",
hide.ns = TRUE,
...
){
library(dplyr)
library(rlist)
library(ggplot2)
library(reshape2)
# Check if origin and cluster have the same length
if(length(origin) != length(cluster)) {
stop("'origin' and 'cluster' must have the same length. Please check if they are from the same Seurat object or data frame.")
}
# Check for invalid parameter combinations
if(!is.null(condition) && rev) {
stop("Cannot use rev=TRUE with condition parameter. When condition is provided, rev must be FALSE.")
}
# Get distribution matrix using standard ClusterDistrBar functionality
distr_matrix <- ClusterDistrBar(
origin = origin,
cluster = cluster,
rev = rev,
normalize = normalize,
percent = percent,
plot = FALSE
)
# If condition is not provided or plot=FALSE, return the distribution matrix
if(is.null(condition) || !plot) {
if(!plot) return(distr_matrix)
# If condition is NULL but plot=TRUE, use standard ClusterDistrBar plot
return(ClusterDistrBar(
origin = origin,
cluster = cluster,
rev = rev,
normalize = normalize,
percent = percent,
plot = TRUE,
flip = flip,
reverse_order = reverse_order,
width = width,
stack = stack,
cols = cols,
border = border
))
}
# Process condition parameter
if(length(condition) == length(origin)) {
# Create condition mapping table from the full data
cond_map <- data.frame(origin = origin, condition = condition)
cond_map <- unique(cond_map)
} else {
# If condition is provided as a mapping table or doesn't match origin length
stop("The condition parameter must be the same length as origin or a valid mapping table")
}
# Ensure each origin is mapped to exactly one condition
if(any(table(cond_map$origin) > 1)) {
stop("Each origin (sample) must be associated with exactly one condition")
}
# Create condition factor for the columns of distr_matrix
col_origins <- colnames(distr_matrix)
f <- rep(NA, length(col_origins))
for(i in seq_along(col_origins)) {
matches <- which(cond_map$origin == col_origins[i])
if(length(matches) == 1) {
f[i] <- as.character(cond_map$condition[matches])
} else if(length(matches) == 0) {
stop(paste("Origin", col_origins[i], "not found in condition mapping"))
} else {
stop(paste("Origin", col_origins[i], "has multiple condition mappings"))
}
}
# Convert to factor, preserving original levels if condition was a factor
if(is.factor(condition)) {
f <- factor(f, levels = levels(condition))
} else {
f <- factor(f)
}
# Create boxplot using VlnPlot2
y_label <- if(percent) {
if(normalize) {
"Percentage of normalized cell counts"
} else {
"Percentage of cell counts"
}
} else {
if(normalize) {
"Normalized cell counts"
} else {
"Cell counts"
}
}
# Generate plot using VlnPlot2
p <- VlnPlot2(
distr_matrix,
f = f,
violin = violin,
box = box,
style = style,
pt.style = pt.style,
pt = pt,
pt.size = pt.size,
pt.alpha = pt.alpha,
width = width,
stat.method = stat.method,
p.adjust.method = p.adjust.method,
label = label,
hide.ns = hide.ns,
cols = cols,
lab_fill = "Condition",
ncol = ncol,
nrow = nrow,
...
)
# Update y-axis label
p <- p + labs(y = y_label)
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.