######
# PLOTTING CODE
######
#' Plot replicate comparisons.
#'
#' Plots replicate comparisons for all replicates in a list of screens and outputs
#' plots to a given folder.
#'
#' @param df Reads or lfc dataframe.
#' @param screens List of screens created with \code{add_screens}.
#' @param output_folder Folder to output plots to.
#' @param negative_controls List of negative control genes to append to default list of
#' non-essential genes (default NULL).
#' @param control_label Label for negative control genes in plot (default "Negative controls").
#' @param plot_type Type of plot to output, one of "png" or "pdf" (default "png").
#' @param display_numbers Whether or not to include PCC values in heatmap (default TRUE).
#' @export
plot_lfc_qc <- function(df, screens, output_folder, negative_controls = NULL,
control_label = "Negative controls", plot_type = "png",
display_numbers = TRUE) {
# Checks for input errors
check_screen_params(df, screens)
# Gets essential gene QC metrics
auc <- essential_lfc_qc(df, screens, negative_controls = negative_controls)
auc_file <- file.path(output_folder, "essential_PR_QC.tsv")
if (!is.null(auc)) {
utils::write.table(auc, auc_file, quote = FALSE, sep = "\t",
row.names = FALSE, col.names = TRUE)
}
# Checks plot type and converts to lowercase
plot_type <- tolower(plot_type)
if (plot_type != "png" & plot_type != "pdf") {
stop("plot_type must be either png or pdf")
}
# Builds dataframe of replicate PCCs
cor_df <- NULL
# Adds nonessential labels to df and sets color scheme for replicate plots
nonessentials <- traver_nonessentials
nonessentials <- c(nonessentials, negative_controls)
nonessential_ind <- (df$gene1 %in% nonessentials & df$gene2 %in% nonessentials) |
(df$gene1 %in% nonessentials & df$gene2 == "NegControl") |
(df$gene2 %in% nonessentials & df$gene1 == "NegControl")
df$nonessentials <- "Other"
df$nonessentials[nonessential_ind] <- control_label
color_values <- c("Other" = "Gray")
color_values[control_label] <- "Blue"
# Compares replicates across all screens
for (screen in screens) {
rep_cols <- screen[["replicates"]]
if (length(rep_cols) > 1) {
pairs <- utils::combn(rep_cols, 2)
for (i in 1:ncol(pairs)) {
col1 <- pairs[1,i]
col2 <- pairs[2,i]
x_label <- paste0(col1, " log2 fold change")
y_label <- paste0(col2, " log2 fold change")
temp <- plot_samples(df, col1, col2, x_label, y_label, print_cor = TRUE,
color_col = "nonessentials", color_lab = "Guide type",
color_values = color_values)
p <- temp[[1]]
pcc <- temp[[2]]
scc <- temp[[3]]
file_name <- paste0(col1, "_vs_", col2, "_replicate_comparison.", plot_type)
suppressWarnings(ggplot2::ggsave(file.path(output_folder, file_name), width = 10, height = 7, dpi = 300))
# Stores PCC in dataframe
if (is.null(cor_df)) {
cor_df <- data.frame(rep1 = col1, rep2 = col2, pcc = pcc, scc = scc,
stringsAsFactors = FALSE)
} else {
cor_df <- rbind(cor_df, c(col1, col2, pcc, scc))
}
}
}
}
# Writes PCCs to file
cor_file <- file.path(output_folder, "replicate_cor.tsv")
utils::write.table(cor_df, cor_file, quote = FALSE, sep = "\t",
row.names = FALSE, col.names = TRUE)
# Gets sample groups
all_cols <- c()
col_groups <- c()
i <- 1
for (screen_name in names(screens)) {
screen <- screens[[screen_name]]
for (col in screen[["replicates"]]) {
all_cols <- c(all_cols, col)
col_groups[i] <- screen_name
i <- i +1
}
}
# Plots heatmap of log-scaled read counts
df <- df[,all_cols]
heatmap_file <- file.path(output_folder, paste0("lfc_heatmap.", plot_type))
plot_heatmap(df, col_groups, heatmap_file, display_numbers)
}
#' Computes essential gene recovery AUC.
#'
#' Computes area under the curve for ROC curves that measure how well each technical replicate
#' recovers signal for essential-targeting guides. Only computes AUC for guides that target
#' essential genes twice, guides that target two different essential genes, or guides that
#' target an essential gene and an intergenic region.
#'
#' @param df LFC dataframe.
#' @param screens List of screens generated with \code{add_screens}.
#' @param negative_controls List of negative control genes to append to default list of
#' non-essential genes (default NULL).
#' @return Returns a dataframe with three columns for replicate name, essential AUC relative
#' to all other genes, and essential AUC relative to a specified set of non-essentials.
essential_lfc_qc <- function(df, screens, negative_controls = NULL) {
# Loads gene standards from internal data
essentials <- traver_core_essentials
nonessentials <- traver_nonessentials
# Adds genes to nonessentials list if specified
if (!is.null(negative_controls)) {
nonessentials <- c(nonessentials, negative_controls)
}
# Gets indices of essential-targeting guides
essential_ind <- (df$gene1 %in% essentials & df$gene2 %in% essentials) |
(df$gene1 %in% essentials & df$gene2 == "NegControl") |
(df$gene2 %in% essentials & df$gene1 == "NegControl")
# Gets indices of nonessential-targeting guides
nonessential_ind <- (df$gene1 %in% nonessentials & df$gene2 %in% nonessentials) |
(df$gene1 %in% nonessentials & df$gene2 == "NegControl") |
(df$gene2 %in% nonessentials & df$gene1 == "NegControl")
# Throws warning if too few genes in standards
skip_nonessential <- FALSE
if (sum(essential_ind) < 10) {
warning(paste("too few essential-targeting guides in df, skipping all AUC computation"))
return(NULL)
}
if (sum(nonessential_ind) < 10) {
warning(paste("too few nonessential-targeting guides in df, skipping non-essential AUC computation"))
skip_nonessential <- TRUE
}
# Gets PR curves for all essential genes relative to all other genes
results <- data.frame(screen = NA,
replicate = NA,
essential_AUC_all = NA,
essential_AUC_nonessential = NA)
counter <- 1
for (screen_name in names(screens)) {
screen <- screens[[screen_name]]
for (rep in screen[["replicates"]]) {
# Gets replicate-specific indices (taking NAs into account)
temp <- df[!is.na(df[[rep]]),]
essential_ind_rep <- (temp$gene1 %in% essentials & temp$gene2 %in% essentials) |
(temp$gene1 %in% essentials & temp$gene2 == "NegControl") |
(temp$gene2 %in% essentials & temp$gene1 == "NegControl")
nonessential_ind_rep <- (temp$gene1 %in% nonessentials & temp$gene2 %in% nonessentials) |
(temp$gene1 %in% nonessentials & temp$gene2 == "NegControl") |
(temp$gene2 %in% nonessentials & temp$gene1 == "NegControl")
# Computes AUC relative to all genes
auc1 <- NA
auc2 <- NA
if (sum(essential_ind_rep) < 10) {
warning(paste("too few essential-targeting guides for replicate", rep, "skipping AUC computation"))
} else {
roc <- PRROC::roc.curve(-temp[[rep]], weights.class0 = as.numeric(essential_ind_rep), curve = TRUE)
auc1 <- roc$auc
}
# Computes AUC relative to non-essential genes
if (!skip_nonessential & sum(nonessential_ind_rep) > 10) {
essential_rownames <- rownames(temp)[essential_ind_rep]
temp <- temp[essential_ind_rep | nonessential_ind_rep,]
temp_essential_ind <- rownames(temp) %in% essential_rownames
roc <- PRROC::roc.curve(-temp[[rep]], weights.class0 = as.numeric(temp_essential_ind), curve = TRUE)
auc2 <- roc$auc
}
results[counter,] <- c(screen_name, rep, auc1, auc2)
counter <- counter + 1
}
}
return(results)
}
#' Plot read counts for a screen.
#'
#' Plots a histogram of read counts for each replicate of all screens. Also
#' plots total reads for all screens.
#'
#' @param df Reads dataframe.
#' @param screens List of screens created with \code{add_screens}.
#' @param output_folder Folder to output plots to.
#' @param log_scale If true, log-normalizes data.
#' @param pseudocount Pseudocounts to add to log-normalized data if specified (default 1).
#' @param display_numbers Whether or not to include PCC values in heatmap (default TRUE).
#' @param plot_type Type of plot to output, one of "png" or "pdf" (default "png").
#' @export
plot_reads_qc <- function(df, screens, output_folder,
log_scale = TRUE, pseudocount = 1,
display_numbers = TRUE,
plot_type = "png") {
# Checks for input errors
check_screen_params(df, screens)
# Checks plot type and converts to lowercase
plot_type <- tolower(plot_type)
if (plot_type != "png" & plot_type != "pdf") {
stop("plot_type must be either png or pdf")
}
# Plots read count histograms for all replicates of all screens and stores total reads
reads_df <- NULL
all_cols <- c()
col_groups <- c()
all_coverage <- c()
i <- 1
for (screen_name in names(screens)) {
screen <- screens[[screen_name]]
for (col in screen[["replicates"]]) {
total_reads <- sum(df[,col], na.rm = TRUE)
if (is.null(reads_df)) {
reads_df <- data.frame(rep = col, reads = total_reads,
stringsAsFactors = FALSE)
} else {
reads_df <- rbind(reads_df, c(col, total_reads))
}
all_coverage <- c(all_coverage, screen[["target_coverage"]])
all_cols <- c(all_cols, col)
p <- plot_reads(df, col, log_scale, pseudocount)
file_name <- paste0(col, "_raw_reads_histogram.", plot_type)
suppressWarnings(ggplot2::ggsave(file.path(output_folder, file_name), width = 10, height = 7, dpi = 300))
col_groups[i] <- screen_name
i <- i + 1
}
}
# Gets unique coverage breakpoints
all_coverage <- unique(all_coverage)
all_coverage <- all_coverage*nrow(df)
# Plots total reads
reads_df$reads <- as.numeric(reads_df$reads)
p <- ggplot2::ggplot(reads_df, ggplot2::aes_string(x = "rep", y = "reads")) +
ggplot2::geom_bar(stat = "identity", color = "Black", fill = "gray30")
for (coverage in all_coverage) {
p <- p + ggplot2::geom_hline(yintercept = coverage, linetype = 2, size = 1, alpha = 0.9, color = "Gray")
}
p <- p +
ggplot2::xlab("Replicate") +
ggplot2::ylab("Total reads") +
ggplot2::scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
ggthemes::theme_tufte(base_size = 20, base_family = "sans") +
ggplot2::theme(axis.text.x = ggplot2:: element_text(angle = 45, hjust = 1))
file_name <- paste0("total_reads.", plot_type)
suppressWarnings(ggplot2::ggsave(file.path(output_folder, file_name), plot = p, width = 10, height = 7, dpi = 300))
# Log-scales read counts if specified
df <- df[,all_cols]
if (log_scale) {
for (col in colnames(df)) {
df[,col] <- log2(df[,col] + 1)
}
}
# Plots heatmap of log-scaled read counts
heatmap_file <- file.path(output_folder, paste0("reads_heatmap.", plot_type))
plot_heatmap(df, col_groups, heatmap_file, display_numbers)
}
#' Plot read counts.
#'
#' Plots a histogram of read counts for a given column.
#'
#' @param df Reads dataframe.
#' @param col Name of column to plot.
#' @param log_scale If true, log-normalizes data.
#' @param pseudocount Pseudocounts to add to log-normalized data if specified (default 1).
#' @return A ggplot object.
plot_reads <- function(df, col, log_scale = TRUE, pseudocount = 1) {
x_label <- paste(col, "log-normalized read counts")
y_label <- "Number of read counts"
if (log_scale) {
df[,col] <- log2(df[,col] + 1)
y_label <- "Number of log-normalized read counts"
}
p <- ggplot2::ggplot(df, ggplot2::aes_string(col)) +
ggplot2::geom_histogram(bins = 30) +
ggplot2::xlab(x_label) +
ggplot2::ylab(y_label) +
ggthemes::theme_tufte(base_size = 20, base_family = "sans")
return(p)
}
#' Plots sample comparisons.
#'
#' Pretty-plots comparisons between two samples in a scatterplot.
#
#' @param df Reads or lfc dataframe.
#' @param xcol Name of column containing values to plot on the x-axis.
#' @param ycol Name of column containing values to plot on the y-axis.
#' @param xlab X-axis label.
#' @param ylab Y-axis label.
#' @param color_col Name of column to color points by (optional).
#' @param color_lab Name of color legend (optional, defaults to color_col).
#' @param color_values Named list of discrete values in color_col mapped
#' to their respective colors (defaults to RColorBrewer's Set2 colors
#' with NULL).
#' @param print_cor If true, prints Pearson correlation between columns
#' (default FALSE).
#' @return A list of three elements. The first is a ggplot object, the
#' second is the correlation between xcol and ycol, and the third is
#' the Spearman correlation between xcol and ycol.
plot_samples <- function(df, xcol, ycol, xlab, ylab,
color_col = NULL, color_lab = NULL,
color_values = NULL, print_cor = FALSE) {
# Computes correlations and optionally prints Pearson correlation
pcc <- stats::cor(df[[xcol]], df[[ycol]], use = "complete.obs")
scc <- stats::cor(df[[xcol]], df[[ycol]], method = "spearman", use = "complete.obs")
if (print_cor) {
cat(paste0("Pearson correlation between ", xcol, " and ", ycol, ": ", pcc, "\n"))
}
# Makes plot
p <- NULL
if (is.null(color_col)) {
p <- ggplot2::ggplot(df, ggplot2::aes_string(x = xcol, y = ycol)) +
ggplot2::geom_abline(slope = 1, intercept = 0, color = "black", linetype = 2, size = 1) +
ggplot2::geom_point(size = 1.5, alpha = 0.7) +
ggplot2::xlab(xlab) +
ggplot2::ylab(ylab) +
ggthemes::theme_tufte(base_size = 20, base_family = "sans")
} else {
# Gets parameters for a discrete color scale
if (is.null(color_lab)) {
color_lab <- color_col
}
if (is.null(color_values)) {
n_discrete <- length(unique(df[[color_col]]))
n_color <- max(3, n_discrete)
color_values <- RColorBrewer::brewer.pal(n_color, "Set1")[1:n_discrete]
names(color_values) <- sort(unique(df[[color_col]]))
}
# Constructs basic plot
p <- ggplot2::ggplot() +
ggplot2::geom_abline(slope = 1, intercept = 0, color = "black", linetype = 2, size = 1)
# Adds layers of points one by one
for (x in names(color_values)) {
p <- p + ggplot2::geom_point(data = df[df[[color_col]] == x,], size = 1.5, alpha = 0.7,
mapping = ggplot2::aes_string(x = xcol, y = ycol, color = color_col))
}
# Finishes plot
p <- p + ggplot2::scale_color_manual(values = color_values, name = color_lab) +
ggplot2::xlab(xlab) +
ggplot2::ylab(ylab) +
ggthemes::theme_tufte(base_size = 20, base_family = "sans")
}
return(list(p, pcc, scc))
}
#' Plots Pearson correlation heatmap.
#'
#' Plots heatmap of Pearson correlations for a dataframe with labels
#' for groups of samples and a blue-yellow color scheme.
#'
#' @param df Reads or lfc dataframe
#' @param col_groups List of grouping labels for each column in df.
#' @param filename Output filename for plot.
#' @param display_numbers Whether or not to include PCC values in heatmap (default TRUE).
#' @return Writes plot to file with no return value.
plot_heatmap <- function(df, col_groups, filename, display_numbers) {
# Gets colors for different screens
screen_colors <- NA
n_colors <- length(unique(col_groups))
if (n_colors < 10) {
screen_colors <- list(group = RColorBrewer::brewer.pal(length(unique(col_groups)), "Set1"))
} else {
pal <- RColorBrewer::brewer.pal(9, "Set1")
pal <- grDevices::colorRampPalette(pal)(n_colors)
screen_colors <- list(group = pal)
}
names(screen_colors$group) <- unique(col_groups)
# Gets PCCs for heatmap
cor_mat <- data.matrix(stats::cor(df, use = "complete.obs"))
# Gets annotation for heatmap
col_groups <- data.frame("Screen" = col_groups)
rownames(col_groups) <- colnames(df)
colnames(cor_mat) <- colnames(df)
# Gets color for heatmap values
breaks <- seq(-1, 1, by = (1/150))
pal <- grDevices::colorRampPalette(c("#7fbf7b", "#f7f7f7", "#af8dc3"))(n = length(breaks))
# Plots heatmap of raw reads to file
pheatmap::pheatmap(cor_mat,
border_color = NA,
annotation_col = col_groups,
annotation_colors = screen_colors,
display_numbers = display_numbers,
color = pal,
breaks = breaks,
filename = filename,
width = 10,
height = 10)
}
#' Plots drug response for scored data.
#'
#' Pretty-plots response for data which does not use a derived null model (e.g. for directly
#' comparing drug response to DMSO response). Assumes that data was scored by
#' \code{score_conditions_vs_control} and significant effects were called by
#' \code{call_condition_hits}. Writes both a scatterplot and a volcano plot
#' to file in the specified folder.
#'
#' @param scores Dataframe of scores returned from \code{call_condition_hits}.
#' @param control_name Name of control passed to \code{call_condition_hits}.
#' @param condition_name Name of condition passed to \code{call_condition_hits}.
#' @param output_folder Folder to output plots to.
#' @param loess If true and data was loess-normalized, plots loess null model instead
#' (default TRUE).
#' @param neg_type Label for significant effects with a negative differential effect
#' passed to \code{call_condition_hits} (default "Negative").
#' @param pos_type Label for significant effects with a positive differential effect
#' passed to \code{call_condition_hits} (default "Positive").
#' @param volcano_type One of "pval" or "FDR" to specify whether to plot -log10(pval)
#' or -log2(FDR), respectively (default "FDR").
#' @param plot_type Type of plot to output, one of "png" or "pdf" (default "png").
#' @export
plot_condition_response <- function(scores, control_name, condition_name, output_folder,
loess = TRUE, neg_type = "Negative",
pos_type = "Positive", volcano_type = "FDR",
plot_type = "png") {
# Makes output folder if it doesn't exist
if (!dir.exists(output_folder)) {
dir.create(output_folder)
}
# Manually sets colors and fill for plot
diff_col <- paste0("differential_", condition_name, "_vs_", control_name)
scores <- scores[order(scores[[diff_col]]),]
response_col <- paste0("effect_type_", condition_name)
colors <- c(neg_type = "Black", "None" = "Gray", pos_type = "Black")
names(colors)[1] <- neg_type
names(colors)[3] <- pos_type
fill <- c(neg_type = "Blue", "None" = "Gray", pos_type = "Yellow")
names(fill)[1] <- neg_type
names(fill)[3] <- pos_type
# Builds basic plot
p <- ggplot2::ggplot(scores, ggplot2::aes_string(x = paste0("mean_", control_name),
y = paste0("mean_", condition_name))) +
ggplot2::geom_hline(yintercept = 0, linetype = 2, size = 1, alpha = 1, color = "Gray") +
ggplot2::geom_vline(xintercept = 0, linetype = 2, size = 1, alpha = 1, color = "Gray")
# Appends choice of null model to plot
if (loess) {
} else {
p <- p +
ggplot2::geom_abline(slope = 1, intercept = 0, size = 1.5, alpha = 0.5, color = "Black")
}
# Finishes plot
p <- p +
ggplot2::geom_point(ggplot2::aes_string(color = response_col, fill = response_col), shape = 21, alpha = 0.7) +
ggplot2::scale_color_manual(values = colors) +
ggplot2::scale_fill_manual(values = fill) +
ggplot2::xlab(paste0(control_name, " mean LFC")) +
ggplot2::ylab(paste0(condition_name, " mean LFC")) +
ggplot2::labs(fill = "Significant response") +
ggplot2::guides(color = "none", size = "none") +
ggthemes::theme_tufte(base_size = 20, base_family = "sans") +
ggplot2::theme(axis.text.x = ggplot2:: element_text(color = "Black", size = 16),
axis.text.y = ggplot2:: element_text(color = "Black", size = 16),
legend.text = ggplot2:: element_text(size = 16))
# Saves to file
file_name <- paste0(condition_name, "_vs_", control_name, "_scatter.", plot_type)
suppressWarnings(ggplot2::ggsave(file.path(output_folder, file_name), width = 10, height = 7, dpi = 300))
# Gets log-scaled p-value or FDR column for volcano plot
fdr_col <- paste0("fdr_", condition_name, "_vs_", control_name)
pval_col <- paste0("pval_", condition_name, "_vs_", control_name)
sig_col <- "volcano_y"
ylab <- ""
if (volcano_type == "FDR") {
scores[[sig_col]] <- -log2(scores[[fdr_col]])
ylab <- "-log2(FDR)"
} else if (volcano_type == "pval") {
scores[[sig_col]] <- -log10(scores[[pval_col]])
ylab <- "-log10(p)"
} else {
cat(paste("invalid volcano_type value specified, defaulting to -log2(FDR)"))
scores[[sig_col]] <- -log2(scores[[fdr_col]])
}
# Makes volcano plot
p <- ggplot2::ggplot(scores, ggplot2::aes_string(x = diff_col, y = sig_col)) +
ggplot2::geom_point(ggplot2::aes_string(color = response_col, fill = response_col), shape = 21, alpha = 0.7) +
ggplot2::scale_color_manual(values = colors) +
ggplot2::scale_fill_manual(values = fill) +
ggplot2::xlab(paste0("Differential effect")) +
ggplot2::ylab(ylab) +
ggplot2::labs(fill = "Significant response") +
ggplot2::guides(color = "none", size = "none") +
ggthemes::theme_tufte(base_size = 20, base_family = "sans") +
ggplot2::theme(axis.text.x = ggplot2:: element_text(color = "Black", size = 16),
axis.text.y = ggplot2:: element_text(color = "Black", size = 16),
legend.text = ggplot2:: element_text(size = 16))
# Saves to file
file_name <- paste0(condition_name, "_vs_", control_name, "_volcano.", plot_type)
suppressWarnings(ggplot2::ggsave(file.path(output_folder, file_name), width = 10, height = 7, dpi = 300))
}
#' Plots drug response for scored paired data.
#'
#' Pretty-plots response for data which uses a derived null model (e.g. for comparing
#' dual-gene knockout effects to a multiplicative null model derived from single-gene
#' effects). Assumes that data was scored by \code{score_combn_vs_single} and
#' significant effects were called by \code{call_combn_hits}. Writes both a scatterplot
#' and a volcano plot to file in the specified folder.
#'
#' @param scores Dataframe of scores returned from \code{call_combn_hits}.
#' @param condition_name Name of condition passed to \code{call_combn_hits}.
#' @param output_folder Folder to output plots to.
#' @param filter_names If a list of column names is given, calls points as non-significant
#' if they are significant in the provided columsn (e.g. to remove points significant
#' in control screens; default NULL).
#' @param loess If true and data was loess-normalized, plots loess null model instead
#' (default TRUE).
#' @param neg_type Label for significant effects with a negative differential effect
#' passed to \code{call_combn_hits} (default "Negative").
#' @param pos_type Label for significant effects with a positive differential effect
#' passed to \code{call_combn_hits} (default "Positive").
#' @param volcano_type One of "pval" or "FDR" to specify whether to plot -log10(pval)
#' or -log2(FDR), respectively (default "FDR").
#' @param plot_type Type of plot to output, one of "png" or "pdf" (default "png").
#' @export
plot_combn_response <- function(scores, condition_name, output_folder,
filter_names = NULL, loess = TRUE,
neg_type = "Negative", pos_type = "Positive",
volcano_type = "FDR", plot_type = "png") {
# Makes output folder if it doesn't exist
if (!dir.exists(output_folder)) {
dir.create(output_folder)
}
# If filter_names given, remove significant guides with the same effect as a control
# type of guides (e.g. DMSO) from plot
response_col <- paste0("effect_type_", condition_name)
if (!is.null(filter_names)) {
for (name in filter_names) {
control_response_col <- paste0("effect_type_", name)
scores[[response_col]][
scores[[response_col]] != "None" &
scores[[control_response_col]] == scores[[response_col]]] <-
"None"
}
}
# Manually sets colors for plot
diff_col <- paste0("differential_combn_vs_single_", condition_name)
scores <- scores[order(scores[[diff_col]]),]
colors <- c(neg_type = "Black", "None" = "Gray", pos_type = "Black")
names(colors)[1] <- neg_type
names(colors)[3] <- pos_type
fill <- c(neg_type = "Blue", "None" = "Gray", pos_type = "Yellow")
names(fill)[1] <- neg_type
names(fill)[3] <- pos_type
# Plots data
p <- ggplot2::ggplot(scores, ggplot2::aes_string(x = paste0("mean_single_", condition_name),
y = paste0("mean_combn_", condition_name))) +
ggplot2::geom_hline(yintercept = 0, linetype = 2, size = 1, alpha = 1, color = "Gray") +
ggplot2::geom_vline(xintercept = 0, linetype = 2, size = 1, alpha = 1, color = "Gray")
# Appends choice of null model to plot
if (loess) {
} else {
p <- p +
ggplot2::geom_abline(slope = 1, intercept = 0, size = 1.5, alpha = 0.5, color = "Black")
}
# Finishes plot
p <- p +
ggplot2::geom_point(ggplot2::aes_string(color = response_col, fill = response_col), shape = 21, alpha = 0.7) +
ggplot2::scale_color_manual(values = colors) +
ggplot2::scale_fill_manual(values = fill) +
ggplot2::xlab(paste0(condition_name, " mean expected single LFC")) +
ggplot2::ylab(paste0(condition_name, " mean observed combinatorial LFC")) +
ggplot2::labs(fill = "Significant response") +
ggplot2::guides(color = "none", size = "none") +
ggthemes::theme_tufte(base_size = 20, base_family = "sans") +
ggplot2::theme(axis.text.x = ggplot2:: element_text(color = "Black", size = 16),
axis.text.y = ggplot2:: element_text(color = "Black", size = 16),
legend.text = ggplot2:: element_text(size = 16))
# Saves to file
file_name <- paste0(condition_name, "_combn_scatter.", plot_type)
suppressWarnings(ggplot2::ggsave(file.path(output_folder, file_name), width = 10, height = 7, dpi = 300))
# Gets log-scaled p-value or FDR columns for volcano plot
fdr_col1 <- paste0("fdr1_combn_vs_single_", condition_name)
fdr_col2 <- paste0("fdr2_combn_vs_single_", condition_name)
pval_col1 <- paste0("pval1_combn_vs_single_", condition_name)
pval_col2 <- paste0("pval2_combn_vs_single_", condition_name)
sig_col <- "volcano_y"
ylab <- ""
if (volcano_type == "FDR") {
scores[[sig_col]] <- -log2(rowMeans(scores[,c(fdr_col1, fdr_col2)], na.rm = TRUE))
ylab <- "Mean -log2(FDR) across orientations"
} else if (volcano_type == "pval") {
scores[[sig_col]] <- -log10(rowMeans(scores[,c(fdr_col1, fdr_col2)], na.rm = TRUE))
ylab <- "Mean -log10(p) across orientations"
} else {
cat(paste("invalid volcano_type value specified, defaulting to -log2(FDR)"))
scores[[sig_col]] <- -log2(rowMeans(scores[[fdr_col1]], scores[[fdr_col2]], na.rm = TRUE))
}
# Makes volcano plot
p <- ggplot2::ggplot(scores, ggplot2::aes_string(x = diff_col, y = sig_col)) +
ggplot2::geom_point(ggplot2::aes_string(color = response_col, fill = response_col), shape = 21, alpha = 0.7) +
ggplot2::scale_color_manual(values = colors) +
ggplot2::scale_fill_manual(values = fill) +
ggplot2::xlab(paste0("Differential effect")) +
ggplot2::ylab(ylab) +
ggplot2::labs(fill = "Significant response") +
ggplot2::guides(color = "none", size = "none") +
ggthemes::theme_tufte(base_size = 20, base_family = "sans") +
ggplot2::theme(axis.text.x = ggplot2:: element_text(color = "Black", size = 16),
axis.text.y = ggplot2:: element_text(color = "Black", size = 16),
legend.text = ggplot2:: element_text(size = 16))
# Saves to file
file_name <- paste0(condition_name, "_combn_volcano.", plot_type)
suppressWarnings(ggplot2::ggsave(file.path(output_folder, file_name), width = 10, height = 7, dpi = 300))
}
#' Plot guide-level residuals for all gene pairs.
#'
#' Plots replicate comparisons for all replicates in a list of screens and outputs
#' plots to a given folder. Works for data returned from \code{call_condition_hits}.
#'
#' @param scores Dataframe of scores returned from \code{call_condition_hits}.
#' @param residuals Residuals returned with the return_residuals argument set to true
#' from \code{call_condition_hits}.
#' @param control_name Name of control passed to \code{call_condition_hits}.
#' @param condition_name Name of condition passed to \code{call_condition_hits}.
#' @param output_folder Folder to output plots to.
#' @param neg_type Label for significant effects with a negative differential effect
#' passed to \code{call_condition_hits} (default "Negative").
#' @param pos_type Label for significant effects with a positive differential effect
#' passed to \code{call_condition_hits} (default "Positive").
#' @param plot_type Type of plot to output, one of "png" or "pdf" (default "png").
#' @export
plot_condition_residuals <- function(scores, residuals, control_name,
condition_name, output_folder,
neg_type = "Negative", pos_type = "Positive",
plot_type = "png") {
# Checks plot type and converts to lowercase
plot_type <- tolower(plot_type)
if (plot_type != "png" & plot_type != "pdf") {
stop("plot_type must be either png or pdf")
}
# Makes output folder if it doesn't exist
if (!dir.exists(output_folder)) {
dir.create(output_folder)
}
# Gets top hits
response_col <- paste0("effect_type_", condition_name)
control_col <- paste0("mean_", control_name)
condition_col <- paste0("mean_", condition_name)
diff_col <- paste0("differential_", condition_name, "_vs_", control_name)
scores <- scores[scores[[response_col]] != "None",]
residuals <- residuals[residuals$n %in% as.numeric(rownames(scores)),]
residuals$lfc <- residuals[[condition_col]] - residuals[[control_col]]
# Gets ranking of top hits
neg_order <- order(scores[[diff_col]])
scores$neg_rank <- NA
scores$pos_rank <- NA
scores$neg_rank[neg_order] <- 1:nrow(scores)
scores$pos_rank[neg_order] <- nrow(scores):1
# Makes LFC plots for all top hits
for (i in unique(residuals$n)) {
# Gets data and gene names
df <- residuals[residuals$n == i,]
ind <- which(as.numeric(rownames(scores)) == i)
gene1 <- scores$gene1[ind]
gene2 <- scores$gene2[ind]
x_label <- paste0("Guides")
y_label <- paste0("Guide-level differential LFC for ", gene1, "/", gene2)
# Adds ID column for plotting
df$ID <- paste("Guide", 1:nrow(df))
# Plots data
p <- ggplot2::ggplot(df) +
ggplot2::xlab(x_label) +
ggplot2::ylab(y_label) +
ggplot2::geom_bar(ggplot2::aes_string(x = "ID", y = "lfc"), stat = "identity", color = "Black",
fill = ggplot2::alpha(c("gray30"), 1)) +
ggplot2::geom_hline(yintercept = 1, linetype = 2, size = 1, alpha = 0.75, color = "Yellow") +
ggplot2::geom_hline(yintercept = 0, linetype = 2, size = 1, alpha = 0.75, color = "Gray") +
ggplot2::geom_hline(yintercept = -1, linetype = 2, size = 1, alpha = 0.75, color = "Blue") +
ggplot2::coord_flip() +
ggthemes::theme_tufte(base_size = 20, base_family = "sans")
# Gets type and rank of effect
effect <- ""
rank <- 0
effect_type <- scores[[response_col]][ind]
if (effect_type == neg_type) {
effect <- "neg"
rank <- scores$neg_rank[ind]
} else {
effect <- "pos"
rank <- scores$pos_rank[ind]
}
# Saves to file
file_name <- paste0(effect, "_", rank, "_", gene1, "_", gene2, ".", plot_type)
suppressWarnings(ggplot2::ggsave(file.path(output_folder, file_name), width = 10, height = 7, dpi = 300))
}
}
#' Plot guide-level residuals for a given gene.
#'
#' Plots guide-level residuals for a given gene and returns the plot. Works for data
#' returned from \code{call_condition_hits}.
#'
#' @param scores Dataframe of scores returned from \code{call_condition_hits}.
#' @param residuals Residuals returned with the return_residuals argument set to true
#' from \code{call_condition_hits}.
#' @param gene Gene name for guides to plot.
#' @param control_name Name of control passed to \code{call_condition_hits}.
#' @param condition_name Name of condition passed to \code{call_condition_hits}.
#' @return A ggplot object.
#' @export
plot_condition_gene_residuals <- function(scores, residuals, gene, control_name, condition_name) {
# Checks that genes present in dataframe
if (!(gene %in% scores$gene1)) {
stop(paste("entry for gene", gene, "not in scores"))
}
# Gets guide-level residuals for the given gene
ind <- which(scores$gene1 == gene)
df <- residuals[residuals$n == ind,]
x_label <- paste0("Guides")
y_label <- paste0("Guide-level differential LFC for ", gene)
# Computes residuals
control_col <- paste0("mean_", control_name)
condition_col <- paste0("mean_", condition_name)
df$lfc <- df[[condition_col]] - df[[control_col]]
# Adds ID column for plotting
df$ID <- paste("Guide", 1:nrow(df))
# Plots data and returns plot
p <- ggplot2::ggplot(df) +
ggplot2::xlab(x_label) +
ggplot2::ylab(y_label) +
ggplot2::geom_bar(ggplot2::aes_string(x = "ID", y = "lfc"), stat = "identity", color = "Black",
fill = ggplot2::alpha(c("gray30"), 1)) +
ggplot2::geom_hline(yintercept = 1, linetype = 2, size = 1, alpha = 0.75, color = "Yellow") +
ggplot2::geom_hline(yintercept = 0, linetype = 2, size = 1, alpha = 0.75, color = "Gray") +
ggplot2::geom_hline(yintercept = -1, linetype = 2, size = 1, alpha = 0.75, color = "Blue") +
ggplot2::coord_flip() +
ggthemes::theme_tufte(base_size = 20, base_family = "sans")
return(p)
}
#' Plot guide-level LFCs for all gene pairs.
#'
#' Plots replicate comparisons for all replicates in a list of screens and outputs
#' plots to a given folder. Works for data returned from \code{call_combn_hits}.
#'
#' @param scores Dataframe of scores returned from \code{call_combn_hits}.
#' @param residuals Residuals returned with the return_residuals argument set to true
#' from \code{call_combn_hits}.
#' @param condition_name Name of condition passed to \code{call_combn_hits}.
#' @param output_folder Folder to output plots to.
#' @param neg_type Label for significant effects with a negative differential effect
#' passed to \code{call_combn_hits} (default "Negative").
#' @param pos_type Label for significant effects with a positive differential effect
#' passed to \code{call_combn_hits} (default "Positive").
#' @param plot_type Type of plot to output, one of "png" or "pdf" (default "png").
#' @export
plot_combn_residuals <- function(scores, residuals, condition_name, output_folder,
neg_type = "Negative", pos_type = "Positive",
plot_type = "png") {
# Checks plot type and converts to lowercase
plot_type <- tolower(plot_type)
if (plot_type != "png" & plot_type != "pdf") {
stop("plot_type must be either png or pdf")
}
# Makes output folder if it doesn't exist
if (!dir.exists(output_folder)) {
dir.create(output_folder)
}
# Gets top hits across both orientations
response_col <- paste0("effect_type_", condition_name)
single_col <- paste0("mean_single_", condition_name)
combn_col <- paste0("mean_combn_", condition_name)
diff_col <- paste0("differential_combn_vs_single_", condition_name)
scores <- scores[scores[[response_col]] != "None",]
residuals1 <- residuals[[1]]
residuals2 <- residuals[[2]]
residuals1 <- residuals1[residuals1$n %in% as.numeric(rownames(scores)),]
residuals1$lfc <- residuals1[[combn_col]] - residuals1[[single_col]]
residuals1$orientation <- "Orientation 1"
ignored_orientation <- all(residuals2 == 0)
if (!ignored_orientation) {
residuals2 <- residuals2[residuals2$n %in% as.numeric(rownames(scores)),]
residuals2$lfc <- residuals2[[combn_col]] - residuals2[[single_col]]
residuals2$orientation <- "Orientation 2"
}
# Gets ranking of top hits
neg_order <- order(scores[[diff_col]])
scores$neg_rank <- NA
scores$pos_rank <- NA
scores$neg_rank[neg_order] <- 1:nrow(scores)
scores$pos_rank[neg_order] <- nrow(scores):1
# Makes LFC plots for all top hits
for (i in unique(residuals1$n)) {
# Gets data
df <- NULL
if (!ignored_orientation) {
df1 <- residuals1[residuals1$n == i,]
df2 <- residuals2[residuals2$n == i,]
df1$orientation <- paste0(df1$orientation[1], " (n = ", nrow(df1), ")")
df2$orientation <- paste0(df2$orientation[1], " (n = ", nrow(df2), ")")
df <- rbind(df1, df2)
} else {
df1 <- residuals1[residuals1$n == i,]
df1$orientation <- paste0(df1$orientation[1], " (n = ", nrow(df1), ")")
df <- df1
}
# Gets gene names and sets axis labels
ind <- which(as.numeric(rownames(scores)) == i)
gene1 <- scores$gene1[ind]
gene2 <- scores$gene2[ind]
x_label <- paste0("Guides")
y_label <- paste0("Guide-level differential LFC for ", gene1, "/", gene2)
# Adds ID column for plotting
df$ID <- nrow(df):1
# Plots data
p <- ggplot2::ggplot(df) +
ggplot2::xlab(x_label) +
ggplot2::ylab(y_label) +
ggplot2::geom_bar(ggplot2::aes_string(x = "ID", y = "lfc"), stat = "identity", color = "Black",
fill = ggplot2::alpha(c("gray30"), 1)) +
ggplot2::geom_hline(yintercept = 1, linetype = 2, size = 1, alpha = 0.75, color = "Yellow") +
ggplot2::geom_hline(yintercept = 0, linetype = 2, size = 1, alpha = 0.75, color = "Gray") +
ggplot2::geom_hline(yintercept = -1, linetype = 2, size = 1, alpha = 0.75, color = "Blue") +
ggplot2::coord_flip() +
ggplot2::facet_grid(. ~ orientation) +
ggthemes::theme_tufte(base_size = 20, base_family = "sans") +
ggplot2::theme(axis.text.y = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
panel.border = ggplot2::element_rect(fill = NA, color = "black"))
# Gets type and rank of effect
effect <- ""
rank <- 0
effect_type <- scores[[response_col]][ind]
if (effect_type == neg_type) {
effect <- "neg"
rank <- scores$neg_rank[ind]
} else {
effect <- "pos"
rank <- scores$pos_rank[ind]
}
# Saves to file
file_name <- paste0(effect, "_", rank, "_", gene1, "_", gene2, ".", plot_type)
suppressWarnings(ggplot2::ggsave(file.path(output_folder, file_name), width = 10, height = 7, dpi = 300))
}
}
#' Plot guide-level residuals for a given gene.
#'
#' Plots guide-level residuals for a given gene pair and returns the plot. Works for data
#' returned from \code{call_combn_hits}.
#'
#' @param scores Dataframe of scores returned from \code{call_combn_hits}.
#' @param residuals Residuals returned with the return_residuals argument set to true
#' from \code{call_combn_hits}.
#' @param gene1 First gene name for guides to plot.
#' @param gene2 Second gene name for guides to plot.
#' @param condition_name Name of condition passed to \code{call_combn_hits}.
#' @return A ggplot object.
#' @export
plot_combn_gene_residuals <- function(scores, residuals, gene1, gene2, condition_name) {
# Checks that genes present in dataframe
if (!((gene1 %in% scores$gene1) & (gene2 %in% scores$gene2)) &
!((gene1 %in% scores$gene2) & (gene2 %in% scores$gene1))) {
stop(paste("entry for gene", gene1, "and", gene2, "not in scores"))
}
# Splits and preps residuals dataframes
residuals1 <- residuals[[1]]
residuals2 <- residuals[[2]]
residuals1$orientation <- "Orientation 1"
residuals2$orientation <- "Orientation 2"
# Gets guide-level residuals for the given gene
ind <- max(which(scores$gene1 == gene1 & scores$gene2 == gene2),
which(scores$gene1 == gene2 & scores$gene2 == gene1))
df1 <- residuals1[residuals1$n == ind,]
df2 <- residuals2[residuals2$n == ind,]
df1$orientation <- paste0(df1$orientation[1], " (n = ", nrow(df1), ")")
df2$orientation <- paste0(df2$orientation[1], " (n = ", nrow(df2), ")")
df <- rbind(df1, df2)
x_label <- paste0("Guides")
y_label <- paste0("Guide-level differential LFC for ", gene1, "/", gene2)
# Computes residuals
single_col <- paste0("mean_single_", condition_name)
combn_col <- paste0("mean_combn_", condition_name)
df$lfc <- df[[combn_col]] - df[[single_col]]
# Adds ID column for plotting
df$ID <- paste("Guide", 1:nrow(df))
# Plots data and returns plot
p <- ggplot2::ggplot(df) +
ggplot2::xlab(x_label) +
ggplot2::ylab(y_label) +
ggplot2::geom_bar(ggplot2::aes_string(x = "ID", y = "lfc"), stat = "identity", color = "Black",
fill = ggplot2::alpha(c("gray30"), 1)) +
ggplot2::geom_hline(yintercept = 1, linetype = 2, size = 1, alpha = 0.75, color = "Yellow") +
ggplot2::geom_hline(yintercept = 0, linetype = 2, size = 1, alpha = 0.75, color = "Gray") +
ggplot2::geom_hline(yintercept = -1, linetype = 2, size = 1, alpha = 0.75, color = "Blue") +
ggplot2::coord_flip() +
ggplot2::facet_grid(. ~ orientation) +
ggthemes::theme_tufte(base_size = 20, base_family = "sans") +
ggplot2::theme(axis.text.y = ggplot2::element_blank(),
axis.ticks.y = ggplot2::element_blank(),
panel.border = ggplot2::element_rect(fill = NA, color = "black"))
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.