# ---------------------------
# generate_plots.R
# Plot reordered seed_results
# (i.e. plot by method).
# ---------------------------
# Necessary functions
# ---------------------------
# ---------------------------
# NOTE: no for loops?
# ---------------------------
# For each efficacy, for each distance, generate a plot of the clusters
#' plot_clusts
#'
#' @export
plot_clusts = function(seed_data, seed_result, dists, effs, seed, method_name, results_dir){
method_results_dir = paste(results_dir, method_name, "_clusts/", sep='')
dir.create(method_results_dir, showWarnings=FALSE)
method_results_dir = paste(method_results_dir, "seed_", seed, '/', sep='')
dir.create(method_results_dir, showWarnings=FALSE)
two_dir = paste(method_results_dir, "2d/", sep='')
dir.create(two_dir, showWarnings=FALSE)
three_dir = paste(method_results_dir, "3d/", sep='')
dir.create(three_dir, showWarnings=FALSE)
theta = 20
phi = 15
seed_groups = seed_result$effs_new_groups
for (e in 1:length(effs)){
eff_data = seed_data[seed_data[,"eff"] == effs[e],]
eff_result = seed_groups[seed_groups[,"eff"] == effs[e],]
eff_data = as.matrix(data.frame(eff_data, new_groups=eff_result[,"new_groups"]))
if (any(colnames(eff_result) == "probs")){
eff_data = cbind(eff_data, probs=eff_result[,"probs"])
}
# NOTE: jank - not arbitrary
plot_types = c("Old", "True")
# NOTE: outline by UC2 value or something? Help with 3D
for (i in 1:length(plot_types)){
main = paste(plot_types[i], "vs New Labels", "\nEfficacy:", effs[e], sep=' ')
jpg_name1 = paste(two_dir, tolower(plot_types[i]), "_clusters_",
effs[e], "_2d.jpg", sep='')
jpg_name2 = paste(three_dir, tolower(plot_types[i]), "_clusters_",
effs[e], "_3d.jpg", sep='')
dist_labels = paste("Distance:", dists, sep=' ')
names(dist_labels) = dists
color = ifelse(any(colnames(eff_data) == "probs"), "probs", "new_groups")
shape = ifelse(plot_types[i] == "True", "prot_groups", "inf_groups")
eff_data = as.data.frame(eff_data)
eff_data[,color] = as.factor(eff_data[,color])
eff_data[,shape] = as.factor(eff_data[,shape])
eff_data$dist = as.factor(eff_data$dist)
var_names = colnames(eff_data)
var_names = var_names[!(grepl("_groups", var_names))]
var_names = var_names[!(var_names %in% c("dist", "eff"))]
g1 = ggplot(eff_data, aes_string(x=var_names[1], y=var_names[2], color=color,
shape=shape)) +
geom_point() +
facet_wrap( ~ dist, ncol=3, labeller=labeller(dist=dist_labels)) +
ggtitle(main) +
xlab("UC1") +
ylab("UC2") +
xlim(0, 14) +
ylim(0, 14)
g2 = ggplot(eff_data, aes_string(x=var_names[1], y=var_names[2], z=var_names[3],
color=color, shape=shape)) +
axes_3D(theta=theta, phi=phi) +
stat_3D(theta=theta, phi=phi) +
facet_wrap( ~ dist, ncol=3, labeller=labeller(dist=dist_labels)) +
ggtitle(main) +
labs_3D(inherit.aes=FALSE, data=eff_data,
mapping=aes_string(x=var_names[1], y=var_names[2], z=var_names[3]),
theta=theta, phi=phi, labs=c("UC1", "UC2", "UC3")) +
theme_void()
suppressMessages(ggsave(jpg_name1, g1, device="jpeg", dpi=400))
suppressMessages(ggsave(jpg_name2, g2, device="jpeg", dpi=400))
}
gc()
}
}
# ---------------------------
# Requires:
# library(ggplot2)
# NOTE: na.rm before ggplot does it so ribbons appear
# for the data that does show up.
# NOTE: this code has lots of jank.
#' plot_stats
#'
#' @export
plot_stats = function(results, comp_num, dists, effs, results_dir, method_name, main, xlab, ylab, ylim, val_name, transform, line_method=NULL, alpha=0.05){
plot_type1 = ifelse(val_name == "volcano", "volcano_plot_", paste(val_name, "s_no_ribbon_", sep=''))
plot_type2 = ifelse(val_name == "volcano", "volcano_plot_avg_", paste(val_name, "s_avg_", sep=''))
jpg_name1 = paste(results_dir, plot_type1, method_name, ".jpg", sep='')
jpg_name2 = paste(results_dir, plot_type2, method_name, ".jpg", sep='')
val_type = paste(val_name, "_type", sep='')
# ---------------------------
# Get normal and permuted data
if (val_name == "volcano"){
p_value_data = results$method_results[["p_value"]]
p_value_data = p_value_data[p_value_data[,"component"] == comp_num,]
effect_size_data = results$method_results[["effect_size"]]
effect_size_data = effect_size_data[effect_size_data[,"component"] == comp_num,]
data = cbind(p_value_data, effect_size_data)
data = data[,!duplicated(colnames(data))]
p_value_data = results$permuted_method_results[["p_value"]]
p_value_data = p_value_data[p_value_data[,"component"] == comp_num,]
effect_size_data = results$permuted_method_results[["effect_size"]]
effect_size_data = effect_size_data[effect_size_data[,"component"] == comp_num,]
permuted_data = cbind(p_value_data, effect_size_data)
permuted_data = permuted_data[,!duplicated(colnames(permuted_data))]
} else {
data = results$method_results[[val_name]]
data = data[data[,"component"] == comp_num,]
permuted_data = results$permuted_method_results[[val_name]]
permuted_data = permuted_data[permuted_data[,"component"] == comp_num,]
}
# ---------------------------
# Convert everything to long format so we can plot easily
to_plot = generate_plottable_stats(data, dists, effs, method_name,
val_name, transform, line_method,
alpha, permuted=FALSE)
permuted_to_plot = generate_plottable_stats(permuted_data, dists, effs,
method_name, val_name,
transform, line_method,
alpha, permuted=TRUE)
# ---------------------------
# Set up graphics
if (val_name == "volcano"){
gopts = generate_plottable_gopts(method_name, main, permuted,
to_plot$plot1_data, dists,
effs, val_name, val_type,
facet_type="p_value_type")
} else {
gopts = generate_plottable_gopts(method_name, main, permuted,
to_plot$plot1_data, dists,
effs, val_name, val_type,
facet_type=val_type)
}
gopts$ylab = ylab
gopts$ylim = ylim
gopts$xlab = xlab
gopts$line_method = to_plot$line_method
g1 = generate_stats_g1(to_plot, permuted_to_plot, val_name, gopts)
# Average volcano, or average with ribbon
g2 = generate_stats_g2(g1, to_plot, permuted_to_plot, val_name, val_type, gopts)
suppressMessages(ggsave(jpg_name1, g1, device="jpeg", dpi=400))
suppressMessages(ggsave(jpg_name2, g2, device="jpeg", dpi=400))
gc()
}
# ---------------------------
# Generate labels, etc.
#' generate_plottable_gopts
#'
#' @export
generate_plottable_gopts = function(method_name, main, permuted, data, dists, effs, val_name, val_type, facet_type){
gopts = list()
# ggplot2 defaults
gopts$group_cols = c("#F8766D", "#00BA38", "#619CFF")
gopts$gg_main = main
if (val_name == "score"){
gopts$facet_labels = c("Infected/Protected", "Prediction/Protected")
} else {
gopts$facet_labels = c("Infected", "Prediction", "Protected")
}
names(gopts$facet_labels) = unique(data[,facet_type])
gopts$facet_rows = length(gopts$facet_labels)
gopts$facet_by = ifelse(val_name == "volcano", "p_value_type", val_type)
gopts$color_scale_name = "Subj. Groups"
if (val_name == "score"){
gopts$group_labs = c("Inf./Prot.", "Pred./Prot.")
gopts$color_scale_name = "Comparison"
} else {
gopts$group_labs = c("Inf.", "Pred.", "Prot.")
}
gopts$shape_scale_name = "Label Type"
if (val_name == "volcano"){
gopts$plot1_x_name = "effect_size"
gopts$plot1_y_name = "p_value"
gopts$plot_color = "eff"
gopts$plot_shape = "dist"
gopts$linetype_scale_name = "MHC Cutoff"
gopts$plot2_x_name = "avg_effect_size"
gopts$plot2_y_name = "avg_p_value"
gopts$plot_line_x_name = gopts$plot2_x_name
gopts$plot_line_y_name = gopts$plot2_y_name
gopts$plot_line_inherit = FALSE
gopts$xlim = c(-1.05, 1.05)
} else {
gopts$plot1_x_name = "dist"
gopts$plot1_y_name = val_name
gopts$plot_color = "eff"
gopts$plot_shape = "perm"
gopts$linetype_scale_name = ifelse(val_name == "p_value", "MHC Cutoff", " ")
gopts$dist_labs = "Original"
gopts$plot2_x_name = gopts$plot1_x_name
gopts$plot2_y_name = "avg_value"
gopts$plot_line_x_name = gopts$plot2_x_name
gopts$plot_line_y_name = gopts$plot2_y_name
gopts$plot_line_inherit = TRUE
gopts$xlim = c(0, max(dists))
}
gopts$shape_labs = c("Original", "Permuted")
gopts$shape_vals = c(17, 6)
gopts$color_labs = gopts$group_labs
gopts$color_vals = gopts$group_cols
gopts$line_labs = c("Original", "Permuted")
gopts$line_vals = c("solid", "dashed")
gopts$plot_line_color = gopts$plot_color
return(gopts)
}
# ---------------------------
# Convert all data to long format
#' generate_plottable_stats
#'
#' @export
generate_plottable_stats = function(data, dists, effs, method_name, val_name, transform, line_method, alpha, permuted){
val_type = paste(val_name, "_type", sep='')
data = as.data.frame(data)
data$eff = as.factor(data$eff)
# Get only the variable type you care about; convert wide to long.
if (val_name == "volcano"){
p_value_data = melt_val_type_data(data, "p_value", "p_value_type")
effect_size_data = melt_val_type_data(data, "effect_size", "effect_size_type")
plot1_data = cbind(p_value_data, effect_size_data)
plot1_data = plot1_data[,!duplicated(colnames(plot1_data))]
tmp_val_name = "p_value"
} else {
plot1_data = melt_val_type_data(data, val_name, val_type)
tmp_val_name = val_name
}
# NOTE: Do not affect p-values before this! It will change the cutoff.
# Make them nice for plotting afterwards.
if (!is.null(line_method) && (val_name == "volcano" || val_name == "p_value")){
if (line_method == "bonferroni"){
y_int = -log10(alpha / nrow(plot1_data))
} else if (line_method == "fdr"){
y_int = fdr_line(plot1_data[,"p_value"], alpha)
if (is.na(y_int)){
y_int = alpha / nrow(plot1_data)
warning("Defaulting to Bonferroni cutoff line...", call.=TRUE)
line_method = "bonferroni"
}
if (!is.null(transform) && transform == "-log10"){
y_int = -log10(y_int)
}
}
}
y_int = ifelse(exists("y_int"), y_int, 0)
# Just a horizontal line
y_int_frame = cbind(y_int=rep(y_int, length(effs)), eff=effs)
# Transform the data as necessary
if (!is.null(transform) && tmp_val_name == "p_value"){
if (transform == "-log10"){
plot1_data[,tmp_val_name] = -log10(plot1_data[,tmp_val_name])
plot1_data[,tmp_val_name][plot1_data[,tmp_val_name] > 5] = 5
}
}
# Get average value at each dist-eff combo.
if (val_name == "volcano"){
plot2_data = lapply(effs, function(eff, plot1_data){
eff_data = plot1_data[plot1_data[,"eff"] == eff,]
avg_p_value = get_avg_stats(dists, eff, eff_data,
"p_value", "p_value_type",
do_sd=FALSE,
include_cols=TRUE)
avg_effect_size = get_avg_stats(dists, eff, eff_data,
"effect_size",
"effect_size_type",
do_sd=FALSE,
include_cols=FALSE)
return(cbind(avg_p_value, avg_effect_size))
}, plot1_data)
} else {
plot2_data = lapply(effs, function(eff, plot1_data, val_name, val_type){
eff_data = plot1_data[plot1_data[,"eff"] == eff,]
tmp = get_avg_stats(dists, eff, eff_data, val_name,
val_type, do_sd=TRUE,
include_cols=TRUE)
tmp[,"dist"] = as.numeric(as.character(tmp[,"dist"]))
return(tmp)
}, plot1_data, val_name, val_type)
}
plot2_data = do.call(rbind, plot2_data)
if (is.null(line_method)){
line_method = "Null Model"
# Make a pretty
} else if (line_method == "fdr"){
line_method = toupper(line_method)
} else if (line_method == "bonferroni"){
line_method = "Bonferroni"
}
plot1_data = cbind(plot1_data, perm=permuted)
plot1_data[,"perm"] = as.factor(plot1_data[,"perm"])
plot2_data = cbind(plot2_data, perm=permuted)
plot2_data[,"perm"] = as.factor(plot2_data[,"perm"])
# Just makes it a little clearer, since "plot1" and "plot2" are such
# informative names.
plot_addon_data = plot2_data
if (val_name == "volcano"){
plot1_data[,"dist"] = as.factor(plot1_data[,"dist"])
plot2_data[,"dist"] = as.factor(plot2_data[,"dist"])
}
gc()
return(list(plot1_data=plot1_data, plot2_data=plot2_data,
plot_addon_data=plot_addon_data, y_int_frame=y_int_frame,
line_method=line_method))
}
# ---------------------------
# Generate the first plot object
# Either volcano plot or val_type
# plot
#' generate_stats_g1
#'
#' @export
generate_stats_g1 = function(to_plot, permuted_to_plot, val_name, gopts){
plot1_data = rbind(to_plot$plot1_data, permuted_to_plot$plot1_data)
plot_addon_data = rbind(to_plot$plot_addon_data, permuted_to_plot$plot_addon_data)
# TODO: how does this work? do you get the permuted line?
y_int_frame = rbind(to_plot$y_int_frame, permuted_to_plot$y_int_frame)
y_int_frame = as.data.frame(y_int_frame, stringsAsFactors=FALSE)
val_type = paste(val_name, "_type", sep='')
# Testing
# plot1_data = to_plot$plot1_data
# plot_addon_data = to_plot$plot_addon_data
# y_int_frame = to_plot$y_int_frame
# y_int_frame = as.data.frame(y_int_frame, stringsAsFactors=FALSE)
plot1_data$eff = as.factor(plot1_data$eff)
plot_addon_data$eff = as.factor(plot_addon_data$eff)
g1 = ggplot(plot1_data, aes_string(x=gopts$plot1_x_name,
y=gopts$plot1_y_name,
color=gopts$plot_color,
shape="perm")) +
geom_point() +
geom_line(data=plot_addon_data, inherit.aes=gopts$plot_line_inherit,
aes_string(x=gopts$plot_line_x_name,
y=gopts$plot_line_y_name,
color=gopts$plot_color,
linetype="perm")) +
# This makes a merged shape and line legend
scale_shape_manual(name=gopts$shape_scale_name,
labels=gopts$shape_labs,
values=gopts$shape_vals) +
scale_linetype_manual(name=gopts$shape_scale_name,
labels=gopts$shape_labs,
values=gopts$line_vals) +
# Separate legend for group color-coding
scale_color_hue(name="Efficacy") +
ggtitle(gopts$gg_main) +
facet_wrap(gopts$facet_by, nrow=gopts$facet_rows,
labeller=as_labeller(gopts$facet_labels)) +
xlab(gopts$xlab) +
ylab(gopts$ylab) +
xlim(gopts$xlim) +
ylim(gopts$ylim) +
geom_hline(data=y_int_frame, color="red",
mapping=aes(yintercept=y_int, size=as.factor(0.5)),
linetype="dashed") +
# Sneaky use of size to get a second legend for the linetype
# aesthetic. Thanks to:
# http://www.quantide.com/ggplot-multiple-legends-for-the-same-aesthetic/
scale_size_manual(gopts$linetype_scale_name, values=c(0.5), labels=gopts$line_method)
return(g1)
}
# ---------------------------
# Generate the second plot object
# Either avg volcano or g1 with
# geom_ribbon
# TODO: incorporate information on distance into volcano plot somehow
# TODO: legend item for fill that isn't ugly
#' generate_stats_g2
#'
#' @export
generate_stats_g2 = function(g1, to_plot, permuted_to_plot, val_name, val_type, gopts){
plot2_data = rbind(to_plot$plot2_data, permuted_to_plot$plot2_data)
plot2_data$eff = as.factor(plot2_data$eff)
plot_addon_data = rbind(to_plot$plot_addon_data, permuted_to_plot$plot_addon_data)
y_int_frame = rbind(to_plot$y_int_frame, permuted_to_plot$y_int_frame)
y_int_frame = as.data.frame(y_int_frame, stringsAsFactors=FALSE)
# # Testing
# plot2_data = to_plot$plot2_data
# plot_addon_data = to_plot$plot_addon_data
# y_int_frame = to_plot$y_int_frame
plot_addon_data$eff = as.factor(plot_addon_data$eff)
g2 = ggplot(plot2_data, aes_string(x=gopts$plot2_x_name,
y=gopts$plot2_y_name,
color=gopts$plot_color,
shape="perm")) +
geom_point() +
scale_shape_manual(name=gopts$shape_scale_name,
labels=gopts$shape_labs,
values=gopts$shape_vals) +
geom_line(data=plot_addon_data, inherit.aes=FALSE,
aes_string(x=gopts$plot_line_x_name,
y=gopts$plot_line_y_name,
color=gopts$plot_line_color,
linetype="perm")) +
scale_color_hue(name="Efficacy") +
scale_linetype_manual(name=gopts$shape_scale_name,
labels=gopts$line_labs,
values=gopts$line_vals) +
ggtitle(gopts$gg_main) +
facet_wrap(gopts$facet_by, nrow=gopts$facet_rows,
labeller=as_labeller(gopts$facet_labels)) +
xlab(gopts$xlab) +
ylab(gopts$ylab) +
xlim(gopts$xlim) +
ylim(gopts$ylim) +
# Sneaky use of size to get a second legend for the linetype
geom_hline(data=y_int_frame, color="red",
mapping=aes(yintercept=y_int, size=as.factor(0.5)),
linetype="dashed") +
scale_size_manual(name=gopts$linetype_scale_name, values=c(0.5),
labels=gopts$line_method)
if (val_name != "volcano"){
g2 = g2 + geom_ribbon(data=plot_addon_data,
aes_string(x="dist", ymin="avg_value-avg_sd",
ymax="avg_value+avg_sd",
alpha=0.6, color="eff",
fill="perm"),
inherit.aes=FALSE, show.legend=FALSE) +
# Put points back on top
geom_point(data=plot2_data) +
# Draw the hline again so it's on top of the ribbon
# Sneaky use of size to get a second legend for the linetype
geom_hline(data=y_int_frame, color="red",
mapping=aes(yintercept=y_int, size=as.factor(0.5)),
linetype="dashed")
}
return(g2)
}
#' wrap_plot_stats
#'
#' @export
wrap_plot_stats = function(dists, effs, dims, results, method_name, line_method, results_dir, permuted=FALSE){
cat("Generating plots for", method_name, '\n')
method_results_dir = paste(results_dir, method_name, '/', sep='')
dir.create(method_results_dir, showWarnings=FALSE)
pb = txtProgressBar(0, dims, style=3)
for (i in 1:dims){
var_results_dir = paste(method_results_dir, "uc", i, '/', sep='')
dir.create(var_results_dir, showWarnings=FALSE)
# ---------------------------
# 2A. Plot score versus distance for 1C, for each eff step
cat("\tPlotting scores...")
main = ""
plot_stats(results, i, dists, effs, var_results_dir, method_name, main,
"Distance (sd)", "Matthews Correlation Coefficient", c(-1.25, 1.25),
"score", NULL)
# ---------------------------
# 2B. Plot p-values for all components for old, new, true labels for each eff step
cat("\tPlotting p-values...")
main = ""
plot_stats(results, i, dists, effs, var_results_dir, method_name, main,
"Distance (sd)", "-log10(p-value)", c(-1, 7), "p_value", "-log10",
line_method=line_method)
# ---------------------------
# 2C. Plot effect_size for all components for old, new, true labels for each eff step
cat("\tPlotting effect sizes...")
main = ""
plot_stats(results, i, dists, effs, var_results_dir, method_name, main,
"Distance (sd)", "Cliff's Delta", c(-1.05, 1.05), "effect_size",
NULL)
# ---------------------------
# 2F. Plot volcanoes for all components for old, new, true labels for each eff step
cat("\tPlotting volcanoes...")
main = ""
plot_stats(results, i, dists, effs, var_results_dir, method_name, main,
"Cliff's Delta", "-log10(p-value)", c(-1, 7), "volcano",
"-log10", line_method=line_method)
setTxtProgressBar(pb, i)
}
cat('\n')
}
# ---------------------------
# WRAPPER FOR IT ALL
# ---------------------------
#' generate_plots
#'
#' @export
generate_plots = function(seed_data, method_results, method_names, dims, dists, effs, line_method, results_dir, doplot, dostats, doclusts){
# For each method, plot all seed data
if (doplot){
# ---------------------------
# 2A. Plot score vs distance for 1C, for each eff step
# 2B. Plot p-values for all components for old, new, true labels for each eff step
# 2C. Plot effect_size for all components for old, new, true labels for each eff step
# 2F. Plot volcanoes for all components for old, new, true labels for each eff step
w_p_s_expr = expression(
if (is.null(globals_list$do_permute) || !globals_list$do_permute){
wrap_plot_stats(dists, effs, dims, method_results[[m]],
method_names[m], line_method,
results_dir, permuted=FALSE)
} else {
wrap_plot_stats(dists, effs, dims, method_results[[m]],
method_names[m], line_method,
results_dir, permuted=TRUE)
}
)
if (dostats){
if (dopar){
invisible(foreach (m=1:length(methods), .inorder=FALSE) %dopar% {
eval(w_p_s_expr)
})
} else {
invisible(foreach (m=1:length(methods)) %do% {
eval(w_p_s_expr)
})
}
}
# ---------------------------
# 2D. Plot clusters
p_c_expr = expression(
if (is.null(globals_list$do_permute) || !globals_list$do_permute){
# The data is the same, but the new_groups are different,
# hence the need for seed_results.
for (m in 1:length(methods)){
plot_clusts(seed_data[[s_idx]], seed_results[[s_idx]][[m]],
dists, effs, seeds[s_idx], method_names[m],
results_dir)
}
} else {
for (m in 1:length(methods)){
plot_clusts(seed_data[[s_idx]]$effs_data,
seed_results[[s_idx]]$methods_results[[m]],
dists, effs, seeds[s_idx], method_names[m],
results_dir)
}
}
)
if (doclusts){
cat("Plotting clusters...\n")
if (globals_list$par_seeds){
invisible(foreach (s_idx=1:length(seeds), .inorder=FALSE) %dopar% {
eval(p_c_expr)
})
} else {
invisible(foreach (s_idx=1:length(seeds)) %do% {
eval(p_c_expr)
})
}
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.