#' create scatterplots of deconvolution results for all algorithms
# 'and cell types in the dataset
#'
#' @param results.list list of results as returned by deconvolute
#' @param real.props matrix containing real proportions for each cell type
#' '(cell type x bulk); optional, needed only if results.list does not contain
#' 'bulk.props'
#' @param training.pheno pheno data for the single-cell training set; optional
#' @param real boolean; are the deconvoluted bulks real or artificial
#' @param celltype.order character vector of cell types
#' specifying the plotting order
#' @param algorithm.order character vector of algorithm names
#' specifying the plotting order
#' @return list containing one plot per algorithm, ordered by algorithm.order
create_scatterplots <- function(
results.list,
real.props = NULL,
training.pheno = NULL,
real = FALSE,
celltype.order = NULL,
algorithm.order = NULL
) {
# parameter checks
if (!is.list(results.list)) {
stop("results.list must be a list as returned by deconvolute()")
}
if (is.null(real.props)) {
if (!"bulk.props" %in% names(results.list)) {
stop("results.list must contain an entry bulk.props;
alternatively supply real.props")
}
real.props <- results.list$bulk.props
}
if (any(colSums(real.props) != 1)) {
real.props <- sweep(real.props, 2, colSums(real.props), "/")
}
results.list <- results.list[["results.list"]]
scatter.plots <- list()
dfs <- list()
for (n in names(results.list[[1]])) {
dfs[[n]] <- data.frame()
}
# iterate over repetitions
for (j in seq_len(length(results.list))) {
# iterate over algorithms
for (res in results.list[[j]]) {
# only cell types that are in real props and estimates
cts <- intersect(rownames(res$est.props), rownames(real.props))
# create data frame for plotting
# (containing real and estimates for each cell type)
df <- data.frame()
if (!length(cts) > 0) next
for (i in seq_len(length(cts))) {
t <- cts[i]
temp.df <- data.frame(
real = real.props[t, ],
estimate = res$est.props[t, ]
)
rownames(temp.df) <- colnames(real.props)
df <- rbind(
df,
data.frame(
temp.df,
type = t,
repetition = j,
cor = cor(real.props[t, ], res$est.props[t, ])
)
)
}
dfs[[res$name]] <- rbind(dfs[[res$name]], df)
}
}
# create plot for each algorithm
for (name in names(dfs)) {
df <- dfs[[name]]
if (nrow(df) == 0) {
scatter.plots[[name]] <- NULL
next
}
# order subplots of cell types by celltype.order
if (!is.null(celltype.order)) {
if (any(celltype.order == "overall")) {
celltype.order <- celltype.order[-which(celltype.order == "overall")]
}
df$type <- factor(df$type, levels = celltype.order)
}
# store correlation for each algorithm and repetition in matrix
cors <- matrix(0,
nrow = length(unique(df$repetition)),
ncol = length(unique(df$type))
)
rownames(cors) <- unique(as.character(df$repetition))
colnames(cors) <- unique(as.character(df$type))
for (r in unique(as.character(df$repetition))) {
for (ct in unique(as.character(df$type))) {
temp <- as.numeric(
as.character(
unique(df[which(df$repetition == r & df$type == ct), "cor"])
)
)
if (!is.null(temp) && !is.na(temp)) {
cors[as.character(r), ct] <- temp
}
}
}
# join type names with corresponding mean correlation
# across all repetitions for plot titles
df$type <- as.factor(df$type)
labs <- unique(as.character(df$type))
temp <- rep("", length(labs))
for (i in seq_len(length(labs))) {
temp[i] <- paste(
labs[i], "\nr = ",
round(mean(cors[,labs[i]]), 2),
" +/- ",
round(sd(cors[,labs[i]]), 2),
sep = ""
)
}
names(temp) <- labs
labs <- temp
# create scatter plot
scatter.plot <- ggplot(df,
aes(x = real, y = estimate, col = type, group = real)
) +
geom_boxplot(position = "dodge", varwidth = TRUE) +
geom_point(alpha = 0.4) +
facet_grid(
rows = NULL,
cols = vars(type),
labeller = labeller(type = labs)
) +
xlab("real") +
ylab("estimate") +
labs(col = 'cell type') +
theme(
legend.text = element_text(size = 20),
legend.title = element_text(size = 22),
title = element_text(size = 24),
axis.title.x = element_text(size = 22),
axis.text.x = element_text(size = 20, angle = 90),
axis.title.y = element_text(size = 22),
axis.text.y = element_text(size = 20),
strip.text.x = element_text(size = 14)
) +
ggtitle(name) +
geom_abline(slope = 1, intercept = 0, linetype = 2) +
ylim(min(df$estimate), max(df$estimate)) +
scale_x_continuous(limits = c(0, 1), n.breaks = 4)
scatter.plots[[name]] <- scatter.plot
}
# order list of plots by algorithm.order
if (!is.null(algorithm.order)) {
if (all(algorithm.order %in% names(scatter.plots)) &&
length(scatter.plots) == length(algorithm.order)) {
scatter.plots <- scatter.plots[rev(algorithm.order)]
}
}
return(scatter.plots)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.