## plot_dotplot.r: Dotplots in various contexts, currently just smc/smd
#' Make a dotplot of some categorised factors and a set of SVs (for other factors).
#'
#' This should make a quick df of the factors and surrogates and plot them.
#'
#' @param expt Experiment from which to acquire the design, counts, etc.
#' @param svest Set of surrogate variable estimations from sva/svg
#' or batch estimates.
#' @param sv Which surrogate to plot?
#' @param chosen_factor Factor to compare against.
#' @param factor_type This may be a factor or range, it is intended to plot
#' a scatterplot if it is a range, a dotplot if a factor.
#' @return surrogate variable plot as per Leek's work
#' @seealso [ggplot2]
#' @examples
#' \dontrun{
#' estimate_vs_snps <- plot_svfactor(start, surrogate_estimate, "snpcategory")
#' }
#' @export
plot_svfactor <- function(expt, svest, sv = 1, chosen_factor = "batch", factor_type = "factor") {
meta <- pData(expt)
chosen <- meta[[chosen_factor]]
names <- sampleNames(expt)
my_colors <- colors(expt)
sv_df <- data.frame(
"adjust" = svest[, sv], ## Take a single estimate from compare_estimates()
"factors" = chosen,
"samplenames" = names)
sv_melted <- reshape2::melt(sv_df, idvars = "factors")
minval <- min(sv_df[["adjust"]])
maxval <- max(sv_df[["adjust"]])
my_binwidth <- (maxval - minval) / 40
sv_plot <- ggplot(sv_melted, aes(x = .data[["factors"]], y = .data[["value"]])) +
ggplot2::geom_dotplot(binwidth = my_binwidth, binaxis = "y",
stackdir = "center", binpositions = "all",
colour = "black", fill = my_colors) +
ggplot2::xlab(glue("Experimental factor: {chosen_factor}")) +
ggplot2::ylab("1st surrogate variable estimation") +
ggplot2::geom_text(aes(x = .data[["factors"]], y = .data[["value"]],
label = .data[["samplenames"]]),
angle = 45, size = 3, vjust = 2) +
ggplot2::theme(axis.text = ggplot2::element_text(size = base_size, colour = "black")) +
ggplot2::theme_bw(base_size = base_size)
return(sv_plot)
}
#' Make a dotplot of known batches vs. SVs.
#'
#' This should make a quick df of the factors and surrogates and plot them.
#' Maybe it should be folded into plot_svfactor? Hmm, I think first I will
#' write this and see if it is better.
#'
#' @param expt Experiment from which to acquire the design, counts, etc.
#' @param svs Set of surrogate variable estimations from sva/svg
#' or batch estimates.
#' @param sv Which surrogate variable to show?
#' @param batch_column Which experimental design column to use?
#' @param factor_type This may be a factor or range, it is intended to plot
#' a scatterplot if it is a range, a dotplot if a factor.
#' @param id_column Use this column for the sample IDs.
#' @return Plot of batch vs surrogate variables as per Leek's work.
#' @seealso [sva] [ggplot2]
#' @examples
#' \dontrun{
#' estimate_vs_snps <- plot_batchsv(start, surrogate_estimate, "snpcategory")
#' }
#' @export
plot_batchsv <- function(expt, svs, sv = 1, batch_column = "batch", factor_type = "factor",
id_column = "sampleid") {
meta <- pData(expt)
chosen <- meta[[batch_column]]
names(chosen) <- sampleNames(expt)
num_batches <- length(unique(chosen))
samples <- meta[[id_column]]
if (is.null(samples)) {
samples <- rownames(meta)
}
factor_df <- data.frame(
"sample" = samples,
"factor" = as.integer(as.factor(pData(expt)[[batch_column]])),
"fill" = expt[["colors"]],
"condition" = expt[["conditions"]],
"batch" = expt[["batches"]],
"color" = "black",
"svs" = svs[, sv])
if (num_batches <= 5) {
factor_df[["shape"]] <- 20 + as.numeric(as.factor(factor_df[["batch"]]))
} else {
factor_df[["shape"]] <- 21
}
factor_df[["shape"]] <- as.factor(factor_df[["shape"]])
color_list <- as.character(factor_df[["fill"]])
names(color_list) <- as.character(factor_df[["condition"]])
sample_factor <- ggplot(factor_df,
aes(x = .data[["sample"]],
y = .data[["factor"]],
shape = .data[["batch"]],
fill = .data[["condition"]])) +
ggplot2::geom_point(size = 5,
aes(shape = .data[["batch"]],
colour = .data[["condition"]],
fill = .data[["condition"]])) +
ggplot2::geom_point(size = 5,
colour = "black",
show.legend = FALSE,
aes(shape = .data[["batch"]],
fill = .data[["condition"]])) +
ggplot2::scale_shape_manual(name = "Batch",
labels = levels(as.factor(factor_df[["batch"]])),
guide = ggplot2::guide_legend(
override.aes = list(size = 5, fill = "grey")),
values = 21:25) +
ggplot2::scale_color_manual(name = "Condition",
guide = "legend",
values = color_list) +
ggplot2::scale_fill_manual(name = "Condition",
guide = "legend",
values = color_list) +
ggplot2::theme_bw(base_size = base_size) +
ggplot2::theme(axis.text = ggplot2::element_text(size = base_size, colour = "black"),
axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5))
##, hjust = 1.5, vjust = 0.5))
factor_svs <- ggplot(factor_df,
aes(x = .data[["factor"]],
y = .data[["svs"]],
fill = .data[["condition"]],
colour = .data[["condition"]],
shape = .data[["shape"]])) +
ggplot2::geom_point(size = 5,
aes(shape = .data[["batch"]],
colour = .data[["condition"]],
fill = .data[["condition"]])) +
ggplot2::geom_point(size = 5,
colour = "black",
show.legend = FALSE,
aes(shape = .data[["batch"]],
fill = .data[["condition"]])) +
ggplot2::scale_shape_manual(name = "Batch",
labels = levels(as.factor(factor_df[["batch"]])),
guide = ggplot2::guide_legend(
override.aes = list(size = 5, fill = "grey")),
values = 21:25) +
ggplot2::scale_color_manual(name = "Condition",
guide = "legend",
values = color_list) +
ggplot2::scale_fill_manual(name = "Condition",
guide = "legend",
values = color_list) +
ggplot2::theme_bw(base_size = base_size) +
ggplot2::theme(axis.text = ggplot2::element_text(size = base_size, colour = "black"),
axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5))
##, hjust = 1.5, vjust = 0.5))
svs_sample <- ggplot(factor_df,
aes(x = .data[["sample"]],
y = .data[["svs"]],
fill = .data[["condition"]],
colour = .data[["condition"]],
shape = .data[["shape"]])) +
ggplot2::geom_point(size = 5,
aes(shape = .data[["batch"]],
colour = .data[["condition"]],
fill = .data[["condition"]])) +
ggplot2::geom_point(size = 5,
colour = "black",
show.legend = FALSE,
aes(shape = .data[["batch"]],
fill = .data[["condition"]])) +
ggplot2::scale_shape_manual(name = "Batch",
labels = levels(as.factor(factor_df[["batch"]])),
guide = ggplot2::guide_legend(
override.aes = list(size = 5, fill = "grey")),
values = 21:25) +
ggplot2::scale_color_manual(name = "Condition",
guide = "legend",
values = color_list) +
ggplot2::scale_fill_manual(name = "Condition",
guide = "legend",
values = color_list) +
ggplot2::theme_bw(base_size = base_size) +
ggplot2::theme(axis.text = ggplot2::element_text(size = base_size, colour = "black"),
axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5))
## An alternate possibility: hjust = 1.5, vjust = 0.5))
plots <- list(
"sample_factor" = sample_factor,
"factor_svs" = factor_svs,
"svs_sample" = svs_sample)
class(plots) <- "sv_plots"
return(plots)
}
#' make a dotplot of some categorised factors and a set of principle components.
#'
#' This should make a quick df of the factors and PCs and plot them.
#'
#' @param pc_df Df of principle components.
#' @param expt Expt containing counts, metadata, etc.
#' @param exp_factor Experimental factor to compare against.
#' @param component Which principal component to compare against?
#' @return Plot of principle component vs factors in the data
#' @seealso [ggplot2]
#' @examples
#' \dontrun{
#' estimate_vs_pcs <- plot_pcfactor(pcs, times)
#' }
#' @export
plot_pcfactor <- function(pc_df, expt, exp_factor = "condition", component = "PC1") {
meta <- pData(expt)
samplenames <- sampleNames(expt)
my_colors <- colors(expt)
minval <- min(pc_df[[component]])
maxval <- max(pc_df[[component]])
my_binwidth <- (maxval - minval) / 40
sv_plot <- ggplot(pc_df, aes(x = .data[[exp_factor]], y = .data[[component]])) +
ggplot2::geom_dotplot(binwidth = my_binwidth, binaxis = "y",
stackdir = "center", binpositions = "all",
colour = "black", fill = my_colors) +
ggplot2::xlab(glue("Experimental factor: {exp_factor}")) +
ggplot2::ylab("1st surrogate variable estimation") +
##ggplot2::geom_text(
## ggplot2::aes(x = .data[[exp_factor]], y = .data[[component]], label = "strains"),
## angle = 45, size = 3, vjust = 2) +
ggplot2::theme_bw(base_size = base_size) +
ggplot2::theme(axis.text = ggplot2::element_text(size = base_size, colour = "black"),
axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5))
return(sv_plot)
}
#' Make an R plot of the standard median correlation or distance among samples.
#'
#' This was written by a mix of Kwame Okrah <kokrah at gmail dot com>, Laura
#' Dillon <dillonl at umd dot edu>, and Hector Corrada Bravo <hcorrada at umd dot edu>
#' I reimplemented it using ggplot2 and tried to make it a little more flexible.
#' The general idea is to take the pairwise correlations/distances of the
#' samples, then take the medians, and plot them. This version of the plot is
#' no longer actually a dotplot, but a point plot, but who is counting?
#'
#' @param data Expt, expressionset, or data frame.
#' @param design Specify metadata if desired.
#' @param colors Color scheme if data is not an expt.
#' @param method Correlation or distance method to use.
#' @param plot_legend Include a legend on the side?
#' @param expt_names Use pretty names for the samples?
#' @param label_chars Maximum number of characters before abbreviating sample names.
#' @param plot_title Title for the graph.
#' @param dot_size How large should the glyphs be?
#' @param ... More parameters to make you happy!
#' @return ggplot of the standard median something
#' among the samples. This will also write to an
#' open device. The resulting plot measures the median correlation of
#' each sample among its peers. It notes 1.5* the interquartile range
#' among the samples and makes a horizontal line at that correlation
#' coefficient. Any sample which falls below this line is considered
#' for removal because it is much less similar to all of its peers.
#' @seealso [matrixStats] [ggplot2]
#' @examples
#' \dontrun{
#' smc_plot = hpgl_smc(expt = expt)
#' }
#' @export
plot_sm <- function(data, design = NULL, colors = NULL, method = "pearson", plot_legend = FALSE,
expt_names = NULL, label_chars = 10, plot_title = NULL, dot_size = 5,
...) {
arglist <- list(...)
chosen_palette <- "Dark2"
if (!is.null(arglist[["palette"]])) {
chosen_palette <- arglist[["palette"]]
}
if (is.null(colors)) {
colors <- grDevices::colorRampPalette(
RColorBrewer::brewer.pal(ncol(data), chosen_palette))(ncol(data))
}
colors <- as.character(colors)
properties <- NULL
if (method == "pearson" || method == "spearman" || method == "robust") {
mesg("Performing ", method, " correlation.")
properties <- hpgl_cor(data, method = method)
} else {
mesg("Performing ", method, " distance.")
properties <- as.matrix(dist(t(data), method = method))
}
prop_median <- matrixStats::rowMedians(properties)
prop_spread <- stats::quantile(prop_median, p = c(1, 3) / 4)
prop_iqr <- diff(prop_spread)
log_scale <- FALSE
outer_limit <- NULL
ylimit <- NULL
type <- "unknown"
if (method == "pearson" || method == "spearman" || method == "robust") {
outer_limit <- prop_spread[1] - (1.5 * prop_iqr)
ylimit <- c(pmin(min(prop_median), outer_limit), max(prop_median))
ylimit <- ylimit[[1]]
type <- "correlation"
} else {
log_scale <- TRUE
outer_limit <- prop_spread[2] + (1.5 * prop_iqr)
ylimit <- c(pmin(min(prop_median), outer_limit), max(prop_median))
ylimit <- ylimit[[2]]
type <- "distance"
}
sm_df <- data.frame(
"sample" = rownames(properties),
"sm" = prop_median,
"color" = colors)
if (!is.null(design)) {
if (!is.null(design[["condition"]])) {
sm_df[["condition"]] <- as.factor(design[["condition"]])
} else {
sm_df[["condition"]] <- "undefined"
}
if (is.null(design[["batch"]])) {
sm_df[["batch"]] <- "undefined"
} else {
sm_df[["batch"]] <- as.factor(design[["batch"]])
}
if (class(expt_names) == "character" && length(expt_names) == 1) {
## Then this refers to an experimental metadata column.
sm_df[["sample"]] <- design[[expt_names]]
}
} else {
sm_df[["condition"]] <- "undefined"
sm_df[["batch"]] <- "undefined"
}
color_listing <- sm_df[, c("condition", "color")]
color_listing <- unique(color_listing)
color_list <- as.character(color_listing[["color"]])
names(color_list) <- as.character(color_listing[["condition"]])
sm_df[["num"]] <- seq_len(nrow(sm_df))
num_batches <- nlevels(sm_df[["batch"]])
if (!is.null(label_chars) && is.numeric(label_chars)) {
sm_df[["sample"]] <- abbreviate(sm_df[["sample"]], minlength = label_chars)
}
legend_position <- "right"
if (isFALSE(plot_legend)) {
legend_position <- "none"
}
minval <- min(sm_df[["sm"]])
maxval <- max(sm_df[["sm"]])
my_binwidth <- (maxval - minval) / 40
if (num_batches <= 5) {
sm_plot <- ggplot(sm_df,
aes(x = .data[["num"]], y = .data[["sm"]],
shape = .data[["batch"]], fill = .data[["condition"]])) +
ggplot2::geom_hline(colour = "red", yintercept = ylimit, linewidth = 1) +
ggplot2::geom_point(size = dot_size,
aes(shape = .data[["batch"]],
colour = .data[["condition"]],
fill = .data[["condition"]])) +
ggplot2::geom_point(size = dot_size, colour = "black", show.legend = FALSE,
aes(shape = .data[["batch"]], fill = .data[["condition"]])) +
ggplot2::scale_color_manual(name = "Condition",
guide = "legend",
values = color_list) +
ggplot2::scale_fill_manual(name = "Condition",
guide = "legend",
values = color_list) +
ggplot2::scale_shape_manual(name = "Batch",
labels = levels(as.factor(sm_df[["batch"]])),
guide = ggplot2::guide_legend(
override.aes = list(size = 5, fill = "grey")),
values = 21:25) +
ggplot2::scale_x_continuous(labels = sm_df[["sample"]],
breaks = 1:nrow(sm_df),
limits = c(1, nrow(sm_df))) +
ggplot2::ylab(glue("Standard Median {method}")) +
ggplot2::xlab("Sample") +
ggplot2::ggtitle(plot_title) +
ggplot2::theme_bw(base_size = base_size) +
ggplot2::theme(axis.text = ggplot2::element_text(size = base_size, colour = "black"),
legend.position = legend_position,
axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5))
## Perhaps instead: hjust = 1.5, vjust = 0.5))
} else {
sm_plot <- ggplot(
sm_df,
aes(x = .data[["sample"]], y = .data[["sm"]],
shape = .data[["batch"]], fill = .data[["condition"]])) +
ggplot2::geom_hline(color = "red", yintercept = ylimit, linewidth = 1) +
ggplot2::geom_dotplot(binwidth = my_binwidth,
binaxis = "y",
stackdir = "center",
binpositions = "all",
colour = "black",
dotsize = 1,
aes(fill = as.factor(.data[["condition"]]))) +
ggplot2::ylab(glue("Standard Median {method}")) +
ggplot2::xlab("Sample") +
ggplot2::ggtitle(plot_title) +
ggplot2::theme_bw(base_size = base_size) +
ggplot2::theme(
legend.position = legend_position,
axis.text.x = ggplot2::element_text(size = base_size, colour = "black",
angle = 90, hjust = 1))
}
if (type == "distance") {
sm_plot <- sm_plot +
ggplot2::scale_y_continuous(labels = scales::scientific)
}
retlist <- list(
"measurement" = properties,
"medians" = prop_median,
"quantile" = prop_spread,
"plot" = sm_plot)
class(retlist) <- "standardmedian_plot"
return(retlist)
}
setGeneric("plot_sm")
## EOF
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.