cpp/analyse_output/header.R

# To analyse the output produced by a cpp simulation

init_cols = c("#D50000", "#64DD17", "#6200EA")
pair_cols = c("#FF0481", "#FFFF00", "#00B0FF")

# Libraries needed
library(ggplot2)
library(tidyr)
library(dplyr)

# constants
WITH_FLAGS    = "with_flags"
WITHOUT_FLAGS = "without_flags"
NO_BURSTS     = "no_bursts"
HOME_DIR <- "~/SysBioProject/rcombinator/cpp/"

# Main function
get_data <- function(filename_data,
                     sim_type,
                     output_loc = "./unified_output/")
{
  file_in <- paste0(HOME_DIR, output_loc, filename_data, ".csv")
  print(paste("Loading", file_in))
  data <- read.csv(file_in)

  #########################
  # Preprocessing of data #
  #########################

  data$run <- factor(data$run)

  if("recomb_mean" %in% colnames(data))
  {
    data$recomb_mean <- factor(data$recomb_mean)
  }
  data <- data %>% mutate(time = time/(10^6))

  stats <- c("all_min", "all_q25", "all_median", "all_q75", "all_max",
             "p_all_min", "p_all_q25", "p_all_median", "p_all_q75", "p_all_max",
             "active_min", "active_q25", "active_median", "active_q75", "active_max",
             "p_active_min", "p_active_q25", "p_active_median", "p_active_q75", "p_active_max",
             "min", "q25", "median", "q75", "max",
             "p_min", "p_q25", "p_median", "p_q75", "p_max"
             )
  for(stat in stats)
  {
    if(stat %in% colnames(data))
    {
      data[, stat] <- (100*(5000-data[, stat]))/5000
    }
  }
  all_times <- unique(data$time)
  to_print_time <- rep(FALSE, length(all_times))
  for(i in 1:length(to_print_time))
  {
    if("p_all_min" %in% colnames(data))
    {
      to_print_time[i] <- data %>% filter(time == all_times[i]) %>% select(p_all_min) %>%
                          anyNA()
    }
    else if("p_min" %in% colnames(data))
    {
      to_print_time[i] <- data %>% filter(time == all_times[i]) %>% select(p_min) %>%
                          anyNA()
    }
    else
    {
      cat("ERROR")
    }
  }
  all_times <- all_times[!to_print_time]
  data <- data %>% filter(time %in% all_times)
  return(data)
}

# MPLOT gene data
no_bursts_plot <- function(data, title=NULL)
{
  cols <- c("To initial sequence"="lightskyblue1",
            "Pairwise"="indianred1")
  m_data_p <- data
  m_p <- ggplot() +
    geom_ribbon(data=m_data_p,
                aes(x=time, ymax=min, ymin=max,
                    group=run,
                    fill="To initial sequence"),
                alpha=0.6) +
    geom_ribbon(data=m_data_p,
                aes(x=time, ymax=p_min, ymin=p_max,
                    group=run,
                    fill="Pairwise"),
                alpha=0.6) +
    scale_fill_manual(name="Similarity",
                       values=cols)

  m_p <- m_p +
    labs(x="Time (in 1M years)",
         y = "Sequence similarity (%)",
         title=title)

  m_p <- m_p +
         ggplot2::theme(
           axis.title  = ggplot2::element_text(size = 18, face = "bold"),
           axis.text.y = ggplot2::element_text(size = 18, face = "bold"),
           axis.text.x = ggplot2::element_text(size = 18, face = "bold")
         ) +
         ggplot2::theme(
           legend.title = ggplot2::element_text(size = 18),
           legend.text = ggplot2::element_text(size = 18),
           legend.position = c(0.65, 0.75)
         )
  print(m_p)
  return(m_p)
}

# MPLOT inactive data
flags_no_bursts_plot_all_runs <- function(data, title=NULL)
{
  cols <- c("To initial sequence"="lightskyblue1",
            "Pairwise"="indianred1")

  m_data_p <- data
  m_data_p <- m_data_p %>% filter(num_active_seq > 1)

  m_p <- ggplot() +
    geom_ribbon(data=m_data_p,
                aes(x=time, ymax=active_min, ymin=active_max,
                    group=run,
                    fill="To initial sequence"),
                alpha=0.7) +
    geom_ribbon(data=m_data_p,
                aes(x=time, ymax=p_active_min, ymin=p_active_max,
                    group=run,
                    fill="Pairwise"),
                alpha=0.5) +
    scale_fill_manual(name="Similarity",
                       values=cols)

  m_p <- m_p + labs(x="Time (in 1M years)",
                    y = "Sequence similarity (%)",
                    title=title)

  m_p <- m_p +
         ggplot2::theme(
           axis.title  = ggplot2::element_text(size = 18, face = "bold"),
           axis.text.y = ggplot2::element_text(size = 18, face = "bold"),
           axis.text.x = ggplot2::element_text(size = 18, face = "bold")
         ) +
         ggplot2::theme(
           legend.title = ggplot2::element_text(size = 18),
           legend.text = ggplot2::element_text(size = 18),
           legend.position = c(0.65, 0.75)
         )
  print(m_p)
  return(m_p)
}

# MPLOT without comparison init
without_plot_init <- function(data, data_gene)
{
  cols <- c("Bursts without recombination"="red",
            "Bursts with recombination"="blue",
            "No bursts"="green")
  m_data_0 <- data %>%
    filter(recomb_mean == 0) %>%
    filter(num_seq > 1)

  m_data_1 <- data %>%
    filter(recomb_mean == 1) %>%
    filter(num_seq > 1)

  p <- ggplot() +
    geom_line(data=m_data_0,
              aes(x=time, y=median,
                  group=run,
                  color="Bursts without recombination")) +
    geom_line(data=m_data_1,
                aes(x=time, y=median,
                    group=run,
                    color="Bursts with recombination")) +
    geom_line(data=data_gene,
                aes(x=time, y=median,
                    group=run,
                    color="No bursts"),
              alpha=0.5) +
    scale_color_manual(name="",
                       values=cols)

  p <- p +
    labs(x="Time (in 1M years)",
         y = "Sequence similarity to initial sequence (%)")

  p <- p +
         ggplot2::theme(
           axis.title  = ggplot2::element_text(size = 18, face = "bold"),
           axis.text.y = ggplot2::element_text(size = 18, face = "bold"),
           axis.text.x = ggplot2::element_text(size = 18, face = "bold")
         ) +
         ggplot2::theme(
           legend.title = ggplot2::element_text(size = 18),
           legend.text = ggplot2::element_text(size = 18),
           legend.position = c(0.65, 0.75)
         )
  print(p)
  return(p)
}

# MPLOT without pair
without_plot_pair <- function(data, data_gene, max_time = 500)
{
  cols <- c("Bursts without recombination"="red",
            "Bursts with recombination"="blue",
            "No bursts"="green")
  data      <- data      %>% filter(time <= max_time)
  data_gene <- data_gene %>% filter(time <= max_time)

  m_data_0 <- data %>%
    filter(recomb_mean == 0) %>%
    filter(num_seq > 5)

  m_data_1 <- data %>%
    filter(recomb_mean == 1) %>%
    filter(num_seq > 5)

  p <- ggplot() +
    geom_line(data=m_data_0,
              aes(x=time, y=p_median,
                  group=run),
              color="indianred1",
              alpha=0.5)
  p <- p+
    geom_line(data=m_data_1,
              aes(x=time, y=p_median,
                  group=run),
              color="skyblue1",
              alpha=0.5)
  p <- p+
    geom_line(data=data_gene,
              aes(x=time, y=p_median,
                  group=run),
              color="lightgreen",
              alpha=0.5)

  data_mean_over_runs <- data %>%
    group_by(time, recomb_mean) %>%
    summarise(p_min    = mean(p_min),
              p_q25    = mean(p_q25),
              p_median = mean(p_median),
              p_q75    = mean(p_q75),
              p_max    = mean(p_max)
              ) %>% data.frame()

  m_data_mean_0 <- data_mean_over_runs %>%
    filter(recomb_mean == 0)

  m_data_mean_1 <- data_mean_over_runs %>%
    filter(recomb_mean == 1)

  data_gene_mean_over_runs <- data_gene %>%
    group_by(time) %>%
    summarise(p_min    = mean(p_min),
              p_q25    = mean(p_q25),
              p_median = mean(p_median),
              p_q75    = mean(p_q75),
              p_max    = mean(p_max)
              ) %>% data.frame()

  linetypes <- c("Lower quartile"="dotted",
                 "Median"="solid",
                 "Upper quartile"="dashed")

  p <- p +
    geom_line(data=m_data_mean_0,
              aes(x=time, y=p_median,
                  linetype="Median",
              color="Bursts without recombination")) +
    geom_line(data=m_data_mean_0,
              aes(x=time, y=p_q25,
                  linetype="Lower quartile",
              color="Bursts without recombination")) +
    geom_line(data=m_data_mean_0,
              aes(x=time, y=p_q75,
                  linetype="Upper quartile",
              color="Bursts without recombination")) +

    geom_line(data=m_data_mean_1,
              aes(x=time, y=p_median,
                  linetype="Median",
              color="Bursts with recombination")) +
    geom_line(data=m_data_mean_1,
              aes(x=time, y=p_q25,
                  linetype="Lower quartile",
              color="Bursts with recombination")) +
    geom_line(data=m_data_mean_1,
              aes(x=time, y=p_q75,
                  linetype="Upper quartile",
              color="Bursts with recombination")) +

    geom_line(data=data_gene,
              aes(x=time, y=p_median,
                  linetype="Median",
              color="No bursts")) +
    geom_line(data=data_gene,
              aes(x=time, y=p_q25,
                  linetype="Lower quartile",
              color="No bursts")) +
    geom_line(data=data_gene,
              aes(x=time, y=p_q75,
                  linetype="Upper quartile",
              color="No bursts")) +

    scale_color_manual(name="",
                       values=cols) +
    scale_linetype_manual(name="",
                          values=linetypes)

  p <- p +
    labs(x="Time (in 1M years)",
         y = "Pairwise sequence similarity (%)")

  p <- p +
         ggplot2::theme(
           axis.title  = ggplot2::element_text(size = 18, face = "bold"),
           axis.text.y = ggplot2::element_text(size = 18, face = "bold"),
           axis.text.x = ggplot2::element_text(size = 18, face = "bold")
         ) +
         ggplot2::theme(
           legend.title = ggplot2::element_text(size = 18),
           legend.text = ggplot2::element_text(size = 18),
           legend.position = c(0.65, 0.75)
         )

  print(p)
  return(p)
  # print(m_p)
  # return(m_p)
}

# MPLOT without comparison init
with_plot_init <- function(data, data_gene, max_time)
{
  data      <- data      %>% filter(time <= max_time)
  data_gene <- data_gene      %>% filter(time <= max_time)
  cols <- c("Bursts without recombination"="red",
            "Bursts with recombination"="blue",
            "No bursts"="green")
  m_data_0 <- data %>%
    filter(recomb_mean == 0) %>%
    filter(num_active_seq > 1)

  m_data_1 <- data %>%
    filter(recomb_mean == 1) %>%
    filter(num_active_seq > 1)

  # linetypes <- c("All"="dotted",
  #                "Active"="solid")
  p <- ggplot() +
    # geom_line(data=m_data_0,
    #           aes(x=time, y=all_median,
    #               group=run,
    #               linetype="All",
    #               color="Bursts without recombination")) +
    # geom_line(data=m_data_1,
    #             aes(x=time, y=all_median,
    #                 group=run,
    #                 linetype="All",
    #                 color="Bursts with recombination")) +

    geom_line(data=m_data_0,
              aes(x=time, y=active_median,
                  group=run,
                  # linetype="Active",
                  color="Bursts without recombination"),
              alpha = 0.9) +
    geom_line(data=m_data_1,
                aes(x=time, y=active_median,
                    group=run,
                    # linetype="Active",
                    color="Bursts with recombination"),
              alpha = 0.5) +
    geom_line(data=data_gene,
                aes(x=time, y=median,
                    group=run,
                    # linetype="Active",
                    color="No bursts"),
              alpha=0.1) +

    # scale_linetype_manual(name="",
    #                    values=linetypes) +
    scale_color_manual(name="",
                       values=cols)

  p <- p +
    labs(x="Time (in 1M years)",
         y = "Sequence similarity to initial sequence (%)")

  p <- p +
         ggplot2::theme(
           axis.title  = ggplot2::element_text(size = 18, face = "bold"),
           axis.text.y = ggplot2::element_text(size = 18, face = "bold"),
           axis.text.x = ggplot2::element_text(size = 18, face = "bold")
         ) +
         ggplot2::theme(
           legend.title = ggplot2::element_text(size = 18),
           legend.text = ggplot2::element_text(size = 18),
           legend.position = c(0.65, 0.75)
         )
  print(p)
  return(p)
}

# MPLOT with pair
with_plot_pair <- function(data, data_gene, max_time = 500, ACT_MIN = 1)
{
  data      <- data      %>% filter(time <= max_time)
  data_gene <- data_gene      %>% filter(time <= max_time)
  cols <- c("No recombination"="red",
            "Recombination mean 1"="blue",
            "Recombination mean 2"="green",
            "Recombination mean 3"="purple",
            "No bursts" = "black"
            )
  linetypes <- c("Active" = "solid",
                 "All" = "dotted")

  data_gene_mean_over_runs <- data_gene %>%
    group_by(time) %>%
    summarise(p_min    = mean(p_min),
              p_q25    = mean(p_q25),
              p_median = mean(p_median),
              p_q75    = mean(p_q75),
              p_max    = mean(p_max)
              ) %>% data.frame()

  data_mean_over_runs <- data %>%
    group_by(time, recomb_mean) %>%
    summarise(p_active_min    = mean(p_active_min),
              p_active_q25    = mean(p_active_q25),
              p_active_median = mean(p_active_median),
              p_active_q75    = mean(p_active_q75),
              p_active_max    = mean(p_active_max),
              p_all_min    = mean(p_all_min),
              p_all_q25    = mean(p_all_q25),
              p_all_median = mean(p_all_median),
              p_all_q75    = mean(p_all_q75),
              p_all_max    = mean(p_all_max),
              num_active_seq = mean(num_active_seq)
              ) %>% data.frame()

  m_data_0 <- data_mean_over_runs %>%
    filter(recomb_mean == 0) %>%
    filter(num_active_seq > ACT_MIN)

  m_data_1 <- data_mean_over_runs %>%
    filter(recomb_mean == 1) %>%
    filter(num_active_seq > ACT_MIN)

  m_data_2 <- data_mean_over_runs %>%
    filter(recomb_mean == 2) %>%
    filter(num_active_seq > ACT_MIN)

  m_data_3 <- data_mean_over_runs %>%
    filter(recomb_mean == 3) %>%
    filter(num_active_seq > ACT_MIN)

  p <- ggplot() +
    geom_line(data=m_data_0,
              aes(x=time, y=p_active_median,
              color="No recombination",
              linetype="Active"),
              alpha=1.0) +
    geom_line(data=m_data_1,
              aes(x=time, y=p_active_median,
              color="Recombination mean 1",
              linetype="Active"),
              alpha=1.0) +
    geom_line(data=m_data_2,
              aes(x=time, y=p_active_median,
              color="Recombination mean 2",
              linetype="Active"),
              alpha=1.0) +
    geom_line(data=m_data_3,
              aes(x=time, y=p_active_median,
              color="Recombination mean 3",
              linetype="Active"),
              alpha=1.0) +

    geom_line(data=m_data_0,
              aes(x=time, y=p_all_median,
              color="No recombination",
              linetype="All"),
              alpha=1.0) +
    geom_line(data=m_data_1,
              aes(x=time, y=p_all_median,
              color="Recombination mean 1",
              linetype="All"),
              alpha=0.75) +
    geom_line(data=m_data_2,
              aes(x=time, y=p_all_median,
              color="Recombination mean 2",
              linetype="All"),
              alpha=0.50) +
    geom_line(data=m_data_3,
              aes(x=time, y=p_all_median,
              color="Recombination mean 3",
              linetype="All"),
              alpha=0.25) +
    geom_line(data=data_gene_mean_over_runs,
              aes(x=time, y=p_median,
              color="No bursts",
              linetype="Active"),
              alpha=1.00) +
    scale_color_manual(name="",
                       values=cols) +
    scale_linetype_manual(name="",
                          values=linetypes)
  p <- p +
    labs(x="Time (in 1M years)",
         y = "Pairwise sequence similarity (%)")

  p <- p +
         ggplot2::theme(
           axis.title  = ggplot2::element_text(size = 18, face = "bold"),
           axis.text.y = ggplot2::element_text(size = 18, face = "bold"),
           axis.text.x = ggplot2::element_text(size = 18, face = "bold")
         ) +
         ggplot2::theme(
           legend.title = ggplot2::element_text(size = 18),
           legend.text = ggplot2::element_text(size = 18),
           legend.position = c(0.65, 0.75)
         )

  print(p)
  return(p)
  # print(m_p)
  # return(m_p)
}

#################################################
################### END #########################
#################################################
sams25/rcombinator_old documentation built on May 28, 2019, 8:40 a.m.