# To analyse the output produced by a cpp simulation
init_cols = c("#D50000", "#64DD17", "#6200EA")
pair_cols = c("#FF0481", "#FFFF00", "#00B0FF")
# Libraries needed
library(ggplot2)
library(tidyr)
library(dplyr)
# constants
WITH_FLAGS = "with_flags"
WITHOUT_FLAGS = "without_flags"
NO_BURSTS = "no_bursts"
HOME_DIR <- "~/SysBioProject/rcombinator/cpp/"
# Main function
get_data <- function(filename_data,
sim_type,
output_loc = "./unified_output/")
{
file_in <- paste0(HOME_DIR, output_loc, filename_data, ".csv")
print(paste("Loading", file_in))
data <- read.csv(file_in)
#########################
# Preprocessing of data #
#########################
data$run <- factor(data$run)
if("recomb_mean" %in% colnames(data))
{
data$recomb_mean <- factor(data$recomb_mean)
}
data <- data %>% mutate(time = time/(10^6))
stats <- c("all_min", "all_q25", "all_median", "all_q75", "all_max",
"p_all_min", "p_all_q25", "p_all_median", "p_all_q75", "p_all_max",
"active_min", "active_q25", "active_median", "active_q75", "active_max",
"p_active_min", "p_active_q25", "p_active_median", "p_active_q75", "p_active_max",
"min", "q25", "median", "q75", "max",
"p_min", "p_q25", "p_median", "p_q75", "p_max"
)
for(stat in stats)
{
if(stat %in% colnames(data))
{
data[, stat] <- (100*(5000-data[, stat]))/5000
}
}
all_times <- unique(data$time)
to_print_time <- rep(FALSE, length(all_times))
for(i in 1:length(to_print_time))
{
if("p_all_min" %in% colnames(data))
{
to_print_time[i] <- data %>% filter(time == all_times[i]) %>% select(p_all_min) %>%
anyNA()
}
else if("p_min" %in% colnames(data))
{
to_print_time[i] <- data %>% filter(time == all_times[i]) %>% select(p_min) %>%
anyNA()
}
else
{
cat("ERROR")
}
}
all_times <- all_times[!to_print_time]
data <- data %>% filter(time %in% all_times)
return(data)
}
# MPLOT gene data
no_bursts_plot <- function(data, title=NULL)
{
cols <- c("To initial sequence"="lightskyblue1",
"Pairwise"="indianred1")
m_data_p <- data
m_p <- ggplot() +
geom_ribbon(data=m_data_p,
aes(x=time, ymax=min, ymin=max,
group=run,
fill="To initial sequence"),
alpha=0.6) +
geom_ribbon(data=m_data_p,
aes(x=time, ymax=p_min, ymin=p_max,
group=run,
fill="Pairwise"),
alpha=0.6) +
scale_fill_manual(name="Similarity",
values=cols)
m_p <- m_p +
labs(x="Time (in 1M years)",
y = "Sequence similarity (%)",
title=title)
m_p <- m_p +
ggplot2::theme(
axis.title = ggplot2::element_text(size = 18, face = "bold"),
axis.text.y = ggplot2::element_text(size = 18, face = "bold"),
axis.text.x = ggplot2::element_text(size = 18, face = "bold")
) +
ggplot2::theme(
legend.title = ggplot2::element_text(size = 18),
legend.text = ggplot2::element_text(size = 18),
legend.position = c(0.65, 0.75)
)
print(m_p)
return(m_p)
}
# MPLOT inactive data
flags_no_bursts_plot_all_runs <- function(data, title=NULL)
{
cols <- c("To initial sequence"="lightskyblue1",
"Pairwise"="indianred1")
m_data_p <- data
m_data_p <- m_data_p %>% filter(num_active_seq > 1)
m_p <- ggplot() +
geom_ribbon(data=m_data_p,
aes(x=time, ymax=active_min, ymin=active_max,
group=run,
fill="To initial sequence"),
alpha=0.7) +
geom_ribbon(data=m_data_p,
aes(x=time, ymax=p_active_min, ymin=p_active_max,
group=run,
fill="Pairwise"),
alpha=0.5) +
scale_fill_manual(name="Similarity",
values=cols)
m_p <- m_p + labs(x="Time (in 1M years)",
y = "Sequence similarity (%)",
title=title)
m_p <- m_p +
ggplot2::theme(
axis.title = ggplot2::element_text(size = 18, face = "bold"),
axis.text.y = ggplot2::element_text(size = 18, face = "bold"),
axis.text.x = ggplot2::element_text(size = 18, face = "bold")
) +
ggplot2::theme(
legend.title = ggplot2::element_text(size = 18),
legend.text = ggplot2::element_text(size = 18),
legend.position = c(0.65, 0.75)
)
print(m_p)
return(m_p)
}
# MPLOT without comparison init
without_plot_init <- function(data, data_gene)
{
cols <- c("Bursts without recombination"="red",
"Bursts with recombination"="blue",
"No bursts"="green")
m_data_0 <- data %>%
filter(recomb_mean == 0) %>%
filter(num_seq > 1)
m_data_1 <- data %>%
filter(recomb_mean == 1) %>%
filter(num_seq > 1)
p <- ggplot() +
geom_line(data=m_data_0,
aes(x=time, y=median,
group=run,
color="Bursts without recombination")) +
geom_line(data=m_data_1,
aes(x=time, y=median,
group=run,
color="Bursts with recombination")) +
geom_line(data=data_gene,
aes(x=time, y=median,
group=run,
color="No bursts"),
alpha=0.5) +
scale_color_manual(name="",
values=cols)
p <- p +
labs(x="Time (in 1M years)",
y = "Sequence similarity to initial sequence (%)")
p <- p +
ggplot2::theme(
axis.title = ggplot2::element_text(size = 18, face = "bold"),
axis.text.y = ggplot2::element_text(size = 18, face = "bold"),
axis.text.x = ggplot2::element_text(size = 18, face = "bold")
) +
ggplot2::theme(
legend.title = ggplot2::element_text(size = 18),
legend.text = ggplot2::element_text(size = 18),
legend.position = c(0.65, 0.75)
)
print(p)
return(p)
}
# MPLOT without pair
without_plot_pair <- function(data, data_gene, max_time = 500)
{
cols <- c("Bursts without recombination"="red",
"Bursts with recombination"="blue",
"No bursts"="green")
data <- data %>% filter(time <= max_time)
data_gene <- data_gene %>% filter(time <= max_time)
m_data_0 <- data %>%
filter(recomb_mean == 0) %>%
filter(num_seq > 5)
m_data_1 <- data %>%
filter(recomb_mean == 1) %>%
filter(num_seq > 5)
p <- ggplot() +
geom_line(data=m_data_0,
aes(x=time, y=p_median,
group=run),
color="indianred1",
alpha=0.5)
p <- p+
geom_line(data=m_data_1,
aes(x=time, y=p_median,
group=run),
color="skyblue1",
alpha=0.5)
p <- p+
geom_line(data=data_gene,
aes(x=time, y=p_median,
group=run),
color="lightgreen",
alpha=0.5)
data_mean_over_runs <- data %>%
group_by(time, recomb_mean) %>%
summarise(p_min = mean(p_min),
p_q25 = mean(p_q25),
p_median = mean(p_median),
p_q75 = mean(p_q75),
p_max = mean(p_max)
) %>% data.frame()
m_data_mean_0 <- data_mean_over_runs %>%
filter(recomb_mean == 0)
m_data_mean_1 <- data_mean_over_runs %>%
filter(recomb_mean == 1)
data_gene_mean_over_runs <- data_gene %>%
group_by(time) %>%
summarise(p_min = mean(p_min),
p_q25 = mean(p_q25),
p_median = mean(p_median),
p_q75 = mean(p_q75),
p_max = mean(p_max)
) %>% data.frame()
linetypes <- c("Lower quartile"="dotted",
"Median"="solid",
"Upper quartile"="dashed")
p <- p +
geom_line(data=m_data_mean_0,
aes(x=time, y=p_median,
linetype="Median",
color="Bursts without recombination")) +
geom_line(data=m_data_mean_0,
aes(x=time, y=p_q25,
linetype="Lower quartile",
color="Bursts without recombination")) +
geom_line(data=m_data_mean_0,
aes(x=time, y=p_q75,
linetype="Upper quartile",
color="Bursts without recombination")) +
geom_line(data=m_data_mean_1,
aes(x=time, y=p_median,
linetype="Median",
color="Bursts with recombination")) +
geom_line(data=m_data_mean_1,
aes(x=time, y=p_q25,
linetype="Lower quartile",
color="Bursts with recombination")) +
geom_line(data=m_data_mean_1,
aes(x=time, y=p_q75,
linetype="Upper quartile",
color="Bursts with recombination")) +
geom_line(data=data_gene,
aes(x=time, y=p_median,
linetype="Median",
color="No bursts")) +
geom_line(data=data_gene,
aes(x=time, y=p_q25,
linetype="Lower quartile",
color="No bursts")) +
geom_line(data=data_gene,
aes(x=time, y=p_q75,
linetype="Upper quartile",
color="No bursts")) +
scale_color_manual(name="",
values=cols) +
scale_linetype_manual(name="",
values=linetypes)
p <- p +
labs(x="Time (in 1M years)",
y = "Pairwise sequence similarity (%)")
p <- p +
ggplot2::theme(
axis.title = ggplot2::element_text(size = 18, face = "bold"),
axis.text.y = ggplot2::element_text(size = 18, face = "bold"),
axis.text.x = ggplot2::element_text(size = 18, face = "bold")
) +
ggplot2::theme(
legend.title = ggplot2::element_text(size = 18),
legend.text = ggplot2::element_text(size = 18),
legend.position = c(0.65, 0.75)
)
print(p)
return(p)
# print(m_p)
# return(m_p)
}
# MPLOT without comparison init
with_plot_init <- function(data, data_gene, max_time)
{
data <- data %>% filter(time <= max_time)
data_gene <- data_gene %>% filter(time <= max_time)
cols <- c("Bursts without recombination"="red",
"Bursts with recombination"="blue",
"No bursts"="green")
m_data_0 <- data %>%
filter(recomb_mean == 0) %>%
filter(num_active_seq > 1)
m_data_1 <- data %>%
filter(recomb_mean == 1) %>%
filter(num_active_seq > 1)
# linetypes <- c("All"="dotted",
# "Active"="solid")
p <- ggplot() +
# geom_line(data=m_data_0,
# aes(x=time, y=all_median,
# group=run,
# linetype="All",
# color="Bursts without recombination")) +
# geom_line(data=m_data_1,
# aes(x=time, y=all_median,
# group=run,
# linetype="All",
# color="Bursts with recombination")) +
geom_line(data=m_data_0,
aes(x=time, y=active_median,
group=run,
# linetype="Active",
color="Bursts without recombination"),
alpha = 0.9) +
geom_line(data=m_data_1,
aes(x=time, y=active_median,
group=run,
# linetype="Active",
color="Bursts with recombination"),
alpha = 0.5) +
geom_line(data=data_gene,
aes(x=time, y=median,
group=run,
# linetype="Active",
color="No bursts"),
alpha=0.1) +
# scale_linetype_manual(name="",
# values=linetypes) +
scale_color_manual(name="",
values=cols)
p <- p +
labs(x="Time (in 1M years)",
y = "Sequence similarity to initial sequence (%)")
p <- p +
ggplot2::theme(
axis.title = ggplot2::element_text(size = 18, face = "bold"),
axis.text.y = ggplot2::element_text(size = 18, face = "bold"),
axis.text.x = ggplot2::element_text(size = 18, face = "bold")
) +
ggplot2::theme(
legend.title = ggplot2::element_text(size = 18),
legend.text = ggplot2::element_text(size = 18),
legend.position = c(0.65, 0.75)
)
print(p)
return(p)
}
# MPLOT with pair
with_plot_pair <- function(data, data_gene, max_time = 500, ACT_MIN = 1)
{
data <- data %>% filter(time <= max_time)
data_gene <- data_gene %>% filter(time <= max_time)
cols <- c("No recombination"="red",
"Recombination mean 1"="blue",
"Recombination mean 2"="green",
"Recombination mean 3"="purple",
"No bursts" = "black"
)
linetypes <- c("Active" = "solid",
"All" = "dotted")
data_gene_mean_over_runs <- data_gene %>%
group_by(time) %>%
summarise(p_min = mean(p_min),
p_q25 = mean(p_q25),
p_median = mean(p_median),
p_q75 = mean(p_q75),
p_max = mean(p_max)
) %>% data.frame()
data_mean_over_runs <- data %>%
group_by(time, recomb_mean) %>%
summarise(p_active_min = mean(p_active_min),
p_active_q25 = mean(p_active_q25),
p_active_median = mean(p_active_median),
p_active_q75 = mean(p_active_q75),
p_active_max = mean(p_active_max),
p_all_min = mean(p_all_min),
p_all_q25 = mean(p_all_q25),
p_all_median = mean(p_all_median),
p_all_q75 = mean(p_all_q75),
p_all_max = mean(p_all_max),
num_active_seq = mean(num_active_seq)
) %>% data.frame()
m_data_0 <- data_mean_over_runs %>%
filter(recomb_mean == 0) %>%
filter(num_active_seq > ACT_MIN)
m_data_1 <- data_mean_over_runs %>%
filter(recomb_mean == 1) %>%
filter(num_active_seq > ACT_MIN)
m_data_2 <- data_mean_over_runs %>%
filter(recomb_mean == 2) %>%
filter(num_active_seq > ACT_MIN)
m_data_3 <- data_mean_over_runs %>%
filter(recomb_mean == 3) %>%
filter(num_active_seq > ACT_MIN)
p <- ggplot() +
geom_line(data=m_data_0,
aes(x=time, y=p_active_median,
color="No recombination",
linetype="Active"),
alpha=1.0) +
geom_line(data=m_data_1,
aes(x=time, y=p_active_median,
color="Recombination mean 1",
linetype="Active"),
alpha=1.0) +
geom_line(data=m_data_2,
aes(x=time, y=p_active_median,
color="Recombination mean 2",
linetype="Active"),
alpha=1.0) +
geom_line(data=m_data_3,
aes(x=time, y=p_active_median,
color="Recombination mean 3",
linetype="Active"),
alpha=1.0) +
geom_line(data=m_data_0,
aes(x=time, y=p_all_median,
color="No recombination",
linetype="All"),
alpha=1.0) +
geom_line(data=m_data_1,
aes(x=time, y=p_all_median,
color="Recombination mean 1",
linetype="All"),
alpha=0.75) +
geom_line(data=m_data_2,
aes(x=time, y=p_all_median,
color="Recombination mean 2",
linetype="All"),
alpha=0.50) +
geom_line(data=m_data_3,
aes(x=time, y=p_all_median,
color="Recombination mean 3",
linetype="All"),
alpha=0.25) +
geom_line(data=data_gene_mean_over_runs,
aes(x=time, y=p_median,
color="No bursts",
linetype="Active"),
alpha=1.00) +
scale_color_manual(name="",
values=cols) +
scale_linetype_manual(name="",
values=linetypes)
p <- p +
labs(x="Time (in 1M years)",
y = "Pairwise sequence similarity (%)")
p <- p +
ggplot2::theme(
axis.title = ggplot2::element_text(size = 18, face = "bold"),
axis.text.y = ggplot2::element_text(size = 18, face = "bold"),
axis.text.x = ggplot2::element_text(size = 18, face = "bold")
) +
ggplot2::theme(
legend.title = ggplot2::element_text(size = 18),
legend.text = ggplot2::element_text(size = 18),
legend.position = c(0.65, 0.75)
)
print(p)
return(p)
# print(m_p)
# return(m_p)
}
#################################################
################### END #########################
#################################################
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.