R/forecastingplots.R

Defines functions plot_R2_verus_sweep_metric plot_viol_sweep_metric plot_curves_faceted plot_trajectories_by_diversity plot_trajectories_by_K plot_scatter_waiting_time_diversity plot_scatter_outcome_diversity plot_corr_waiting_time_versus_depth plot_corr_waiting_time_versus_start_size plot_corr_outcome_versus_period

Documented in plot_corr_outcome_versus_period plot_corr_waiting_time_versus_depth plot_corr_waiting_time_versus_start_size plot_curves_faceted plot_R2_verus_sweep_metric plot_scatter_outcome_diversity plot_scatter_waiting_time_diversity plot_trajectories_by_diversity plot_trajectories_by_K plot_viol_sweep_metric

#' Plot correlation between outcome and diversity, versus projection period, coloured by K, for each value of tumour size at measurement
#' 
#' @param df dataframe generated by get_cor_summary
#' @param col_name name of column containing correlation coefficients, with or without "Cor_" suffix (default "DriverDiversity")
#' @param output_dir folder in which to save the image file; if NA then plots are displayed on screen instead
#' @param output_filename name of output image file
#' @param file_type either "pdf" or "png" (other values default to "pdf")
#' 
#' @return a plot object
#' 
#' @export
#' @import dplyr
#' @importFrom graphics abline
#' @importFrom graphics legend
#' 
#' @examples 
#' plot_corr_outcome_versus_period(cor_summary)
plot_corr_outcome_versus_period <- function(df, col_name = "DriverDiversity", output_filename = "corr_outcome_versus_period", file_type = "png", output_dir = NA) {
  if(!is.na(output_dir)) if(substr(output_dir, nchar(output_dir), nchar(output_dir)) != "/") output_dir <- paste0(output_dir, "/")
  
  start_size_range <- unique(df$start_size)
  K_range <- unique(df$K)
  
  panel_cols <- ceiling(length(start_size_range) / 2)
  
  if(!is.na(output_filename) & !is.na(output_dir)) {
    if(file_type == "png") png(paste0(output_dir, output_filename,".png"), width = 300 * panel_cols, height = 500, res = 100)
    else pdf(paste0(output_dir, output_filename,".pdf"), width = 3 * panel_cols, height = 5)
  }
  
  if(substr(col_name, 1, 4) != "Cor_") col_name <- paste0("Cor_", col_name)
  col_name_suffix <- substr(col_name, 5, nchar(col_name))
  
  cols <- rainbow(length(K_range))
  
  par(mfrow=c(2, panel_cols))
  par(mar=c(4.5, 5, 1.5, 1))
  for(i in 1:length(start_size_range)) {
    start_size_val <- start_size_range[i]
    title <- paste("Measure at ", start_size_val, " cells", sep = "")
    for(j in 1:length(K_range)) {
      K_val <- K_range[j]
      if(j == 1) {
        plot(1, type = "n", xlim = c(0, 1), ylim = c(-1, 1),
             main = title, xlab = "projection period", ylab = paste0("correlation coefficient:\n", col_name_suffix, " vs tumour size"))
        if(i == 1) legend("bottomleft", as.character(K_range), title = "K", ncol = 3, 
                                                     xpd = TRUE, horiz = FALSE, inset = c(0, 0), bty = "n", lty = 1, col = cols, lwd = 2)
      }
      df_filtered <- filter(df, start_size == start_size_val, K == K_val)
      lines(df_filtered[[col_name]] ~ df_filtered$gap, 
            col = cols[j], lwd = 2)
      abline(h = 0, untf = FALSE, lty = 3)
    }
  }
  
  if(!is.na(output_filename) & !is.na(output_dir)) dev.off()
}

#' Plot correlation between waiting time and diversity, versus tumour size at measurement, coloured by K
#' 
#' @param df dataframe generated by get_wait_cor_summary
#' @param col_name name of column containing correlation coefficients, with or without "Cor_" suffix (default "DriverDiversity")
#' @param output_dir folder in which to save the image file; if NA then plots are displayed on screen instead
#' @param output_filename name of output image file
#' @param file_type either "pdf" or "png" (other values default to "pdf")
#' 
#' @return a plot object
#' 
#' @export
#' @import dplyr
#' @importFrom grDevices rainbow
#' @importFrom graphics legend
#' 
#' @examples 
#' plot_corr_waiting_time_versus_start_size(wait_cor_summary)
plot_corr_waiting_time_versus_start_size <- function(df, col_name = "DriverDiversity", output_filename = "corr_waiting_time_versus_start_size", file_type = "png", output_dir = NA) {
  if(!is.na(output_dir)) if(substr(output_dir, nchar(output_dir), nchar(output_dir)) != "/") output_dir <- paste0(output_dir, "/")
  
  if(!is.na(output_filename) & !is.na(output_dir)) {
    if(file_type == "png") png(paste0(output_dir, output_filename, ".png"), width = 500, height = 500, res = 100)
    else pdf(paste0(output_dir, output_filename, ".pdf"), width = 5, height = 5)
  }
  
  if(substr(col_name, 1, 4) != "Cor_") col_name <- paste0("Cor_", col_name)
  col_name_suffix <- substr(col_name, 5, nchar(col_name))
  
  start_size_range <- unique(df$start_size)
  K_range <- unique(df$K)
  
  cols <- rainbow(length(K_range))
  
  par(mfrow=c(1, 1))
  par(mar=c(4, 4, 2, 2))
  for(j in 1:length(K_range)) {
    K_val <- K_range[j]
    if(j == 1) {
      plot(1, type = "n", xlim = c(0, max(start_size_range)), ylim = c(-1, 1),
           main = "", xlab = "tumour size at measurement", ylab = paste0("correlation coefficient:\n", col_name_suffix, " vs waiting time"))
      legend("topleft", as.character(K_range), title = "K", ncol = 3, lwd = 2, 
             xpd = TRUE, horiz = FALSE, inset = c(0, 0), bty = "n", lty = 1, col = cols)
      }
    df_filtered <- filter(df, K == K_val)
    lines(df_filtered[[col_name]] ~ df_filtered$start_size, 
          col = cols[j], lwd = 2)
    abline(h = 0, untf = FALSE, lty = 3)
    }
  
  if(!is.na(output_filename) & !is.na(output_dir)) dev.off()
}

#' Plot correlations between waiting time and diversity measured at different depths, coloured by size at measurement
#' 
#' @param df dataframe generated by get_wait_cor_summary
#' @param col_prefix text that appears in all names of columns containing correlation statistics
#' @param output_dir folder in which to save the image file; if NA then plots are displayed on screen instead
#' @param output_filename name of output image file
#' @param file_type either "pdf" or "png" (other values default to "pdf")
#' 
#' @return a plot object
#' 
#' @export
#' 
#' @examples
#' library(dplyr)
#' plot_corr_waiting_time_versus_depth(filter(depth_wait_cor_summary, K == 16), 
#' "DriverDiversityFrom1SamplesAtDepth")
plot_corr_waiting_time_versus_depth <- function(df, col_prefix, output_filename = "corr_waiting_time_versus_depth", file_type = "png", output_dir = NA) {
  if(!is.na(output_dir)) if(substr(output_dir, nchar(output_dir), nchar(output_dir)) != "/") output_dir <- paste0(output_dir, "/")
  
  if(!is.na(output_filename) & !is.na(output_dir)) {
    if(file_type == "png") png(paste0(output_dir, output_filename, ".png"), width = 400, height = 350, res = 100)
    else pdf(paste0(output_dir, output_filename, ".pdf"), width = 4, height = 3.5)
  }
  
  par(mfrow=c(1,1))
  par(mar=c(4.5,5,2,1))
  col_nums <- which(grepl(col_prefix, colnames(df)))
  depth <- (0:(length(col_nums)-1)) / (length(col_nums)-1)
  sizes <- unique(df$start_size)
  cols <- rainbow(length(sizes))
  plot(as.numeric(df[which(df$start_size == sizes[1]), col_nums]) ~ depth, ylim = c(-1, 1), col = cols[1], type = "l",
       xlab = "measurement distance from periphery (%)", 
       ylab = "correlation coefficient:\ndiversity vs waiting time", xaxt = "n", lwd = 2)
  axis(1, at=0.2*0:5,labels=20*0:5)
  for(i in 2:length(sizes)) {
    lines(as.numeric(df[which(df$start_size == sizes[i]), col_nums]) ~ depth, col = cols[i], lwd = 2)
  }
  par(fig = c(0, 1, 0, 1), oma = c(0, 0, 0, 0), mar = c(0, 0, 0, 0), new = TRUE)
  plot(0, 0, type = "n", bty = "n", xaxt = "n", yaxt = "n")
  legend("topright", as.character(sizes), title = "size at measurement", xpd = TRUE, ncol = 2, 
         horiz = FALSE, inset = c(0.1, 0.1), bty = "n", lty = 1, col = cols, lwd = 2)
  
  if(!is.na(output_filename) & !is.na(output_dir)) dev.off()
}

#' Example scatter plots of outcome versus diversity
#' 
#' @param df dataframe
#' @param col_name name of column containing a diversity metric (default "DriverDiversity")
#' @param output_dir folder in which to save the image file; if NA then plots are displayed on screen instead
#' @param output_filename name of output image file
#' @param file_type either "pdf" or "png" (other values default to "pdf")
#' 
#' @return a plot object
#' 
#' @export
#' @importFrom stats lm
#' 
#' @examples
#' library(dplyr)
#' plot_scatter_outcome_diversity(filter(summary, K == 16, start_size == 500))
plot_scatter_outcome_diversity <- function(df, col_name = "DriverDiversity", output_filename = "scatter_outcome_diversity", file_type = "png", output_dir = NA) {
  if(!is.na(output_dir)) if(substr(output_dir, nchar(output_dir), nchar(output_dir)) != "/") output_dir <- paste0(output_dir, "/")
  
  gap_list <- unique(df$gap)
  
  panel_cols <- ceiling(length(gap_list) / 2)
  
  if(!is.na(output_filename) & !is.na(output_dir)) {
    if(file_type == "png") png(paste0(output_dir, output_filename, ".png"), width = 200 * panel_cols, height = 400, res = 100)
    else pdf(paste0(output_dir, output_filename, ".pdf"), width = 2 * panel_cols, height = 4)
  }
  
  par(mfrow=c(2, panel_cols))
  par(mar=c(4.5,5,2,1))
  for(gap in gap_list) {
    plot_data <- df[which(df$gap == gap), ]
    title <- paste("Forecast period ", gap, sep = "")
    if(sum(!is.na(plot_data$outcome)) == 0) plot(1, type = "n", 
                                                 main = "", xlab = col_name, ylab = "final tumour size")
    else {
      plot(plot_data$outcome ~ plot_data[[col_name]], col = "black",
         main = "", xlab = col_name, ylab = "final tumour size")
      mod1 <- lm(plot_data$outcome ~ plot_data[[col_name]])
      if(!is.na(mod1$coefficients[2])) abline(mod1, lty = 2)
    }
    title(title, line = 0.5)
  }
  
  if(!is.na(output_filename) & !is.na(output_dir)) dev.off()
}

#' Example scatter plots of waiting time versus diversity
#' 
#' @param df dataframe
#' @param col_name name of column containing a diversity metric (default "DriverDiversity")
#' @param output_dir folder in which to save the image file; if NA then plots are displayed on screen instead
#' @param output_filename name of output image file
#' @param file_type either "pdf" or "png" (other values default to "pdf")
#' 
#' @return a plot object
#' 
#' @export
#' 
#' @examples
#' library(dplyr)
#' plot_scatter_waiting_time_diversity(filter(summary, K == 16, gap == min(summary$gap)))
plot_scatter_waiting_time_diversity <- function(df, col_name = "DriverDiversity", output_filename = "scatter_waiting_time_diversity", file_type = "png", output_dir = NA) {
  if(!is.na(output_dir)) if(substr(output_dir, nchar(output_dir), nchar(output_dir)) != "/") output_dir <- paste0(output_dir, "/")
  
  start_list <- unique(df$start_size)
  
  panel_cols <- ceiling(length(start_list) / 2)
  
  if(!is.na(output_filename) & !is.na(output_dir)) {
    if(file_type == "png") png(paste0(output_dir, output_filename, ".png"), width = 200 * panel_cols, height = 400, res = 100)
    else pdf(paste0(output_dir, output_filename, ".pdf"), width = 2 * panel_cols, height = 4)
  }
  
  par(mfrow=c(2, panel_cols))
  par(mar=c(4.5,5,2,1))
  for(start_size in start_list) {
    plot_data <- df[which(df$start_size == start_size), ]
    title <- paste("Start size ", start_size, sep = "")
    if(sum(!is.na(plot_data$outcome)) == 0) plot(1, type = "n", 
                                                 main = "", xlab = col_name, ylab = "waiting time")
    else {
      plot(plot_data$outcome ~ plot_data[[col_name]], col = "black",
           main = "", xlab = col_name, ylab = "waiting time")
      mod1 <- lm(plot_data$outcome ~ plot_data[[col_name]])
      if(!is.na(mod1$coefficients[2])) abline(mod1, lty = 2)
    }
    title(title, line = 0.5)
  }
  
  if(!is.na(output_filename) & !is.na(output_dir)) dev.off()
}

#' Plot growth trajectories, coloured by K
#' 
#' @param df data frame
#' @param x_var column name of the x-variable (default "gen_adj")
#' @param output_dir folder in which to save the image file; if NA then plots are displayed on screen instead
#' @param output_filename name of output image file
#' @param file_type either "pdf" or "png" (other values default to "pdf")
#' 
#' @return a plot object
#' 
#' @export
#' @import ggplot2
#' 
#' @examples 
#' plot_trajectories_by_K(data)
#' plot_trajectories_by_K(data, x_var = "Generation")
plot_trajectories_by_K <- function(df, x_var = "gen_adj", output_filename = "trajectories_by_K", file_type = "png", output_dir = NA) {
  if(!is.na(output_dir)) if(substr(output_dir, nchar(output_dir), nchar(output_dir)) != "/") output_dir <- paste0(output_dir, "/")
  
  if(!is.na(output_filename) & !is.na(output_dir)) {
    if(file_type == "png") png(paste0(output_dir, output_filename, ".png"), width = 400, height = 300, res = 100)
    else pdf(paste0(output_dir, output_filename, ".pdf"), width = 4, height = 3)
  }
  
  q <- ggplot(df, aes(x = get(x_var), y = NumCells, colour = factor(K), group = interaction(K, seed)))
  q <- q + geom_line(alpha = 0.5) +
    scale_x_continuous(name = "relative time") +
    scale_y_continuous(name = "tumour size") +
    theme_classic() + 
    labs(colour = 'K')
  
  print(q)
  
  if(!is.na(output_filename) & !is.na(output_dir)) dev.off()
}

#' Plot growth trajectories, coloured by driver edge diversity
#' 
#' @param df data frame
#' @param output_dir folder in which to save the image file; if NA then plots are displayed on screen instead
#' @param output_filename name of output image file
#' @param file_type either "pdf" or "png" (other values default to "pdf")
#' 
#' @return a plot object
#' 
#' @export
#' @import ggplot2
#' @import RColorBrewer
#' @import dplyr
#' @importFrom gridExtra grid.arrange
#' 
#' @examples 
#' plot_trajectories_by_diversity(data)
plot_trajectories_by_diversity <- function(df, output_filename = "trajectories_by_diversity", file_type = "png", output_dir = NA) {
  if(!is.na(output_dir)) if(substr(output_dir, nchar(output_dir), nchar(output_dir)) != "/") output_dir <- paste0(output_dir, "/")
  
  if(!is.na(output_filename) & !is.na(output_dir)) {
    if(file_type == "png") png(paste0(output_dir, output_filename, ".png"), width = 1000, height = 500, res = 100)
    else pdf(paste0(output_dir, output_filename, ".pdf"), width = 10, height = 5)
  }
  
  K_list <- unique(df$K)
  
  qp1 <- qplot(
    new_time,
    NumCells,
    group = interaction(seed, K),
    data = df,
    colour = div0,
    geom = "line") + 
    scale_colour_distiller(palette = "RdYlBu", name = "diversity at periphery") +
    facet_grid(.~K, scales = "free") +
    scale_x_continuous(name = "relative time") +
    scale_y_continuous(name = "tumour size") +
    theme_classic()
  print(qp1)
  
  if(!is.na(output_filename) & !is.na(output_dir)) dev.off()
}

#' Plot relationship between two variables faceted by K (rows) and combinations of other parameters (columns)
#' 
#' @param df data frame
#' @param num_parameters number of parameters, accounting for the first set of columns in the dataframe
#' @param x_var column name of the x-variable (default "gen_adj")
#' @param y_var column name of the y-variable (default "NumCells")
#' @param log if this contains "x" (resp "y") then x-axis (resp y-axis) will be log-transformed (default NA)
#' @param xlimits limits for x-axis
#' @param ylimits limits for y-axis
#' @param line_col line colour or column name of variable to colour by
#' @param alpha line transparency level (if line_col is not specified)
#' @param xintercept position of vertical dashed line (default NA means no line)
#' @param output_dir folder in which to save the image file; if NA then plots are displayed on screen instead
#' @param output_filename name of output image file
#' @param file_type either "pdf" or "png" (other values default to "pdf")
#' 
#' @return a plot object
#' 
#' @export
#' @import ggplot2
#' @import RColorBrewer
#' @importFrom rlang sym
#' 
#' @examples 
#' plot_curves_faceted(data, 16)
#' plot_curves_faceted(data, 16, x_var = "Generation", y_var = "MeanBirthRate")
plot_curves_faceted <- function(df, num_parameters, x_var = "gen_adj", y_var= "NumCells", 
                                log = NA, xlimits = NULL, ylimits = NULL, line_col = "#00000005", alpha = 0.1, xintercept = NA, 
                                output_filename = NA, file_type = "png", output_dir = NA) {
  if(!is.na(output_dir)) if(substr(output_dir, nchar(output_dir), nchar(output_dir)) != "/") output_dir <- paste0(output_dir, "/")
  
  pars <- colnames(df)[1:num_parameters]
  
  pars <- pars[pars != "init_migration_rate"]
  
  pars_without_K <- pars[pars != "K"]
  
  df <- filter(df, !is.na(!!rlang::sym(y_var)))
  
  q <- ggplot(df, aes_string(x = x_var, y = y_var, 
                             group = paste0("interaction(", paste0(pars_without_K, collapse =  ", "), ")")))
  
  if(!is.na(xintercept)) q <- q + geom_vline(aes(xintercept = xintercept), linetype = "dashed")
  
  if(line_col %in% colnames(df)) q <- q + geom_line(aes_string(col = line_col), alpha = alpha) + 
    scale_colour_distiller(palette = "RdYlBu")
  else q <- q + geom_line(col = line_col, alpha = alpha)
  
  pars_without_K_seed <- pars_without_K[pars_without_K != "seed"]
  
  q <- q +
    facet_grid(paste0("K ~ ", paste0("interaction(", paste0(pars_without_K_seed, collapse =  ", "), ")"))) + 
    theme_classic()
  
  if(grepl("y", log)) q <- q + scale_y_continuous(name = y_var, trans = 'log10', limits = ylimits)
  else q <- q + scale_y_continuous(name = y_var, limits = ylimits)
  
  if(grepl("x", log)) q <- q + scale_x_continuous(name = x_var, trans = 'log10', limits = xlimits)
  else q <- q + scale_x_continuous(name = x_var, limits = xlimits)
  
  n_panels <- length(unique(ggplot_build(q)$data[[1]]$PANEL))
  dims <- wrap_dims(n_panels)
  
  if(!is.na(output_filename) & !is.na(output_dir)) {
    if(file_type == "png") png(paste0(output_dir, output_filename, ".png"), width = 300 * dims[2], height = 300 * dims[1], res = 100)
    else pdf(paste0(output_dir, output_filename, ".pdf"), width = 4 * dims[2], height = 3 * dims[1])
  }
  
  print(q)
  
  if(!is.na(output_filename) & !is.na(output_dir)) dev.off()
}

#' Violin plot of mean sweep metric (genotype frequency autocorrelation)
#' 
#' @param df dataframe generated by get_summary (filtered), get_cor_summary (filtered) or get_wait_cor_summary (filtered)
#' @param output_dir folder in which to save the image file; if NA then plots are displayed on screen instead
#' @param output_filename name of output image file
#' @param file_type either "pdf" or "png" (other values default to "pdf")
#' 
#' @return a plot object
#' 
#' @export
#' @import ggplot2
#' 
#' @examples
#' library(dplyr)
#' plot_viol_sweep_metric(filter(summary, gap == min(gap), start_size == min(start_size)))
#' plot_viol_sweep_metric(filter(cor_summary, gap == min(gap), start_size == min(start_size)))
#' plot_viol_sweep_metric(filter(wait_cor_summary, start_size == min(start_size)))
plot_viol_sweep_metric <- function(df, output_filename = "viol_sweep_metric", file_type = "png", output_dir = NA) {
  if(!is.na(output_dir)) if(substr(output_dir, nchar(output_dir), nchar(output_dir)) != "/") output_dir <- paste0(output_dir, "/")
  
  if(!is.na(output_filename) & !is.na(output_dir)) {
    if(file_type == "png") png(paste0(output_dir, output_filename, ".png"), width = 400, height = 400, res = 100)
    else pdf(paste0(output_dir, output_filename, ".pdf"), width = 4, height = 4)
  }
    
  p <- ggplot(df, aes(factor(K), mean_autocor))
  if(dim(df)[1] > length(unique(df$K))) p <- p + geom_violin(scale = "count", fill = "gold") + 
    geom_jitter(height = 0.01, width = 0.1, shape = 4)
  else p <- p + geom_point()
  
  p <- p + scale_x_discrete(name = "K") +
    scale_y_log10(name = "turnover metric", limits = c(1e-6, 1)) +
    theme_classic()
  
  print(p)
  
  if(!is.na(output_filename) & !is.na(output_dir)) dev.off()
}

#' Plot R-squared versus mean sweep metric
#' 
#' @param df dataframe generated by get_summary (filtered), get_cor_summary (filtered) or get_wait_cor_summary (filtered)
#' @param col_name name of column containing correlation coefficients, with or without "Cor_" suffix (default "DriverDiversity")
#' @param output_dir folder in which to save the image file; if NA then plots are displayed on screen instead
#' @param output_filename name of output image file
#' @param file_type either "pdf" or "png" (other values default to "pdf")
#' 
#' @return a plot object
#' 
#' @export
#' @import ggplot2
#' 
#' @examples
#' plot_R2_verus_sweep_metric(wait_cor_summary)
plot_R2_verus_sweep_metric <- function(df, col_name = "DriverDiversity", output_filename = "R2_verus_sweep_metric", file_type = "png", output_dir = NA) {
  if(!is.na(output_dir)) if(substr(output_dir, nchar(output_dir), nchar(output_dir)) != "/") output_dir <- paste0(output_dir, "/")
  
  if(!is.na(output_filename) & !is.na(output_dir)) {
    if(file_type == "png") png(paste0(output_dir, output_filename, ".png"), width = 800, height = 500, res = 100)
    else pdf(paste0(output_dir, output_filename, ".pdf"), width = 8, height = 5)
  }
  
  if(substr(col_name, 1, 4) != "Cor_") col_name <- paste0("Cor_", col_name)
  col_name_suffix <- substr(col_name, 5, nchar(col_name))
  
  qp2 <- ggplot(df, aes_string(quote(mean_autocor), col_name)) + 
    geom_point(aes(group = start_size, colour = as.factor(K))) + 
    scale_fill_brewer(palette="Set1") + 
    facet_wrap(~start_size) +
    scale_x_continuous(name = "mean sweep metric") +
    scale_y_continuous(name = paste0("R-squared: ", col_name_suffix, " vs. waiting time")) +
    geom_smooth(size = 1, method = lm) +
    theme_classic() + 
    labs(colour = 'K')
  
  print(qp2)
  
  if(!is.na(output_filename) & !is.na(output_dir)) dev.off()
}
robjohnnoble/demonanalysis documentation built on June 30, 2020, 12:47 a.m.