suppressPackageStartupMessages(library(BPRMeth))
suppressPackageStartupMessages(library(purrr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(grid))
suppressPackageStartupMessages(library(ROCR))
suppressPackageStartupMessages(library(scales))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(ggfortify))
#suppressPackageStartupMessages(library(stringi))
# suppressPackageStartupMessages(library(proxy))
suppressPackageStartupMessages(library(RColorBrewer))
# Data
# io            <- list(regions = "prom10k", M = 1611, M_deepcpg = 708, cov = 20, sd = 0.2)
io            <- list(regions = "prom5k", M = 785, M_deepcpg = 352, cov = 15, sd = 0.15)
io$script_dir <- "../"
io$dataset    <- "encode/scWGBS/"
io$sub_dir    <- "/"
io$data_dir   <- paste0("../local-data/melissa/", io$dataset, "/imputation/")
io$K          <- 3
io$basis      <- 7
io$data_prcg  <- 0.4
io$reg_prcg   <- 0.95
io$cpg_prcg   <- c(0.2, 0.5, 0.8)
io$filter     <- 0.5
R.utils::sourceDirectory(paste0(io$script_dir, "lib/"), modifiedOnly = FALSE)
# Different CpG coverages
dt_analysis <- data.table(region = character(), cpg_prcg = numeric(), auc_melissa = numeric(), auc_melissa_rate = numeric(), 
                          auc_indep_prof = numeric(), auc_indep_rate = numeric(), auc_rf = numeric(),
                          auc_deepcpg = numeric(),
                          f_melissa = numeric(), f_melissa_rate = numeric(), 
                          f_indep_prof = numeric(), f_indep_rate = numeric(), f_rf = numeric(),
                          f_deepcpg = numeric(),
                          tpr_fpr_melissa = list(), tpr_fpr_melissa_rate = list(),
                          tpr_fpr_indep_prof = list(), tpr_fpr_indep_rate = list(), tpr_fpr_rf = list(),
                          tpr_fpr_deepcpg = numeric(),
                          pr_melissa = list(), pr_melissa_rate = list(),
                          pr_indep_prof = list(), pr_indep_rate = list(), pr_rf = list(),
                          pr_deepcpg = numeric()
                          )

model_analysis <- data.table(region = character(), cpg_prcg = numeric(), melissa = numeric(), melissa_rate = numeric())
iter <- 1
for (cpg_prcg in io$cpg_prcg) {
   # Load joint analysis results
   dt_melissa <- readRDS(paste0(io$data_dir, "melissa_sim10_", io$regions, 
                              "_cov", io$cov, "_sd", io$sd, "_K", io$K, "_M", io$M, 
                              "_basis", io$basis, "_dataPrcg", io$data_prcg, 
                              "_regionPrcg", io$reg_prcg, "_cpgPrcg", cpg_prcg,
                              "_filter", io$filter, ".rds"))

   # Load independent analysis results
   dt_indep <- readRDS(paste0(io$data_dir, "indep_sim10_", io$regions,
                          "_cov", io$cov, "_sd", io$sd, "_M", io$M,
                          "_basis", io$basis, "_dataPrcg", io$data_prcg,
                          "_regionPrcg", io$reg_prcg, "_cpgPrcg", cpg_prcg,
                          "_filter", io$filter, ".rds"))

   # Load RF analysis results
   dt_rf <- readRDS(paste0(io$data_dir, "rf_indep_sim10_", io$regions,
                              "_cov", io$cov, "_sd", io$sd, "_M", io$M,
                              "_dataPrcg", io$data_prcg, "_regionPrcg", io$reg_prcg,
                              "_cpgPrcg", cpg_prcg, "_filter", io$filter,
                              ".rds"))

   # Load RF analysis results
   dt_deepcpg <- readRDS(paste0(io$data_dir, "deepcpg/deepcpg_sim10_", io$regions, 
                              "_cov", io$cov, "_sd", io$sd, "_M", io$M_deepcpg,
                              "_dataPrcg", io$data_prcg, "_regionPrcg", io$reg_prcg, 
                              "_cpgPrcg", "0.5", "_filter", io$filter, 
                              ".rds"))

   # # Load RF analysis results
   # dt_deepcpg_sub <- readRDS(paste0(io$data_dir, "deepcpg/", io$sub_dir, "deepcpg_sim10_", region, 
   #                            "_cov", io$cov[iter], "_sd0.2_M", io$M_deepcpg[iter],
   #                            "_dataPrcg", io$data_prcg, "_regionPrcg", io$reg_prcg, 
   #                            "_cpgPrcg", io$cpg_prcg, "_filter", io$filter, 
   #                            ".rds"))

   for (i in 1:length(dt_melissa$model)) {
       # Create prediction objects
       melissa_pred <- prediction(round(dt_melissa$model[[i]]$eval_perf$eval_prof$pred_obs, 2), 
                                  dt_melissa$model[[i]]$eval_perf$eval_prof$act_obs)
       melissa_rate_pred <- prediction(round(dt_melissa$model[[i]]$eval_perf$eval_mean$pred_obs, 2), 
                                       dt_melissa$model[[i]]$eval_perf$eval_mean$act_obs)
       indep_prof_pred <- prediction(round(dt_indep$model[[i]]$eval_perf$eval_prof$pred_obs, 2), 
                                     dt_indep$model[[i]]$eval_perf$eval_prof$act_obs)
       indep_rate_pred <- prediction(round(dt_indep$model[[i]]$eval_perf$eval_mean$pred_obs, 2), 
                                     dt_indep$model[[i]]$eval_perf$eval_mean$act_obs)
       rf_pred <- prediction(round(dt_rf$model[[i]]$eval_perf$pred_obs, 2), dt_rf$model[[i]]$eval_perf$act_obs)
       deepcpg_pred <- prediction(round(dt_deepcpg$model[[i]]$eval_perf$pred_obs, 2), 
                                  dt_deepcpg$model[[i]]$eval_perf$act_obs)
       # deepcpg_sub_pred <- prediction(round(dt_deepcpg_sub$model[[i]]$eval_perf$pred_obs, 2), 
       #                            dt_deepcpg_sub$model[[i]]$eval_perf$act_obs)

       # F-measure performance
       f_melissa <- performance(melissa_pred, "f")
       f_melissa_rate <- performance(melissa_rate_pred, "f")
       f_indep_prof <- performance(indep_prof_pred, "f")
       f_indep_rate <- performance(indep_rate_pred, "f")
       f_rf <- performance(rf_pred, "f")
       f_deepcpg <- performance(deepcpg_pred, "f")
       # f_deepcpg_sub <- performance(deepcpg_sub_pred, "f")

       dt <- data.table(region = io$regions, cpg_prcg = cpg_prcg,
                     auc_melissa = performance(melissa_pred, "auc")@y.values[[1]],
                     auc_melissa_rate = performance(melissa_rate_pred, "auc")@y.values[[1]],
                     auc_indep_prof = performance(indep_prof_pred, "auc")@y.values[[1]],
                     auc_indep_rate = performance(indep_rate_pred, "auc")@y.values[[1]],
                     auc_rf = performance(rf_pred, "auc")@y.values[[1]],
                     auc_deepcpg = performance(deepcpg_pred, "auc")@y.values[[1]],
                     # auc_deepcpg_sub = performance(deepcpg_sub_pred, "auc")@y.values[[1]],

                     f_melissa = f_melissa@y.values[[1]][min(which(f_melissa@x.values[[1]] <= 0.5))],
                     f_melissa_rate = f_melissa_rate@y.values[[1]][min(which(f_melissa_rate@x.values[[1]] <= 0.5))],
                     f_indep_prof = f_indep_prof@y.values[[1]][min(which(f_indep_prof@x.values[[1]] <= 0.5))],
                     f_indep_rate = f_indep_rate@y.values[[1]][min(which(f_indep_rate@x.values[[1]] <= 0.5))],
                     f_rf = f_rf@y.values[[1]][min(which(f_rf@x.values[[1]] <= 0.5))],
                     f_deepcpg = f_deepcpg@y.values[[1]][min(which(f_deepcpg@x.values[[1]] <= 0.5))],
                     # f_deepcpg_sub = f_deepcpg_sub@y.values[[1]][min(which(f_deepcpg_sub@x.values[[1]] <= 0.5))],

                     tpr_fpr_melissa = list(tpr_fpr_melissa = performance(melissa_pred, "tpr", "fpr")),
                     tpr_fpr_melissa_rate = list(tpr_fpr_melissa_rate = performance(melissa_rate_pred, "tpr", "fpr")),
                     tpr_fpr_indep_prof = list(tpr_fpr_indep_prof = performance(indep_prof_pred, "tpr", "fpr")),
                     tpr_fpr_indep_rate = list(tpr_fpr_indep_rate = performance(indep_rate_pred, "tpr", "fpr")),
                     tpr_fpr_rf = list(tpr_fpr_rf = performance(rf_pred, "tpr", "fpr")),
                     tpr_fpr_deepcpg = list(tpr_fpr_deepcpg = performance(deepcpg_pred, "tpr", "fpr")),
                     # # tpr_fpr_deepcpg_sub = list(tpr_fpr_deepcpg_sub = performance(deepcpg_sub_pred, "tpr", "fpr")),
                     # 
                     pr_melissa = list(pr_melissa = performance(melissa_pred, "prec", "rec")),
                     pr_melissa_rate = list(pr_melissa_rate = performance(melissa_rate_pred, "prec", "rec")),
                     pr_indep_prof = list(pr_indep_prof = performance(indep_prof_pred, "prec", "rec")),
                     pr_indep_rate = list(pr_indep_rate = performance(indep_rate_pred, "prec", "rec")),
                     pr_rf = list(pr_rf = performance(rf_pred, "prec", "rec")),
                     pr_deepcpg = list(pr_deepcpg = performance(deepcpg_pred, "prec", "rec"))
                     # pr_deepcpg_sub = list(pr_deepcpg_sub = performance(deepcpg_sub_pred, "prec", "rec"))
                     )
    # Add results to final data.table
    dt_analysis <- rbind(dt_analysis, dt)
    if (!is(dt_melissa$model[[i]], "try-error")) {
        dt <- data.table(region = io$regions, cpg_prcg = cpg_prcg,
                         melissa = length(which(dt_melissa$model[[i]]$melissa_prof$delta > 4)),
                         melissa_rate = length(which(dt_melissa$model[[i]]$melissa_rate$delta > 4)))
        model_analysis <- rbind(model_analysis, dt)
    }
   }
   iter <- iter + 1
}
rm(iter, dt, i, dt_rf, dt_indep, dt_melissa, melissa_pred, melissa_rate_pred, indep_prof_pred, indep_rate_pred, rf_pred,
   f_melissa, f_melissa_rate, f_indep_prof, f_indep_rate, f_rf)

Variability across different cross-validations

print("Melissa")
round(mean(dt_analysis[cpg_prcg == 0.2, auc_melissa]), 2)
round(mean(dt_analysis[cpg_prcg == 0.5, auc_melissa]), 2)
formatC(2 * sd(dt_analysis[cpg_prcg == 0.2, auc_melissa]), format = "e", digits = 1)
formatC(2 * sd(dt_analysis[cpg_prcg == 0.5, auc_melissa]), format = "e", digits = 1)

print("Melissa rate")
round(mean(dt_analysis[cpg_prcg == 0.2, auc_melissa_rate]), 2)
round(mean(dt_analysis[cpg_prcg == 0.5, auc_melissa_rate]), 2)
formatC(2 * sd(dt_analysis[cpg_prcg == 0.2, auc_melissa_rate]), format = "e", digits = 1)
formatC(2 * sd(dt_analysis[cpg_prcg == 0.5, auc_melissa_rate]), format = "e", digits = 1)

print("BPRMeth")
round(mean(dt_analysis[cpg_prcg == 0.2, auc_indep_prof]), 2)
round(mean(dt_analysis[cpg_prcg == 0.5, auc_indep_prof]), 2)
formatC(2 * sd(dt_analysis[cpg_prcg == 0.2, auc_indep_prof]), format = "e", digits = 1)
formatC(2 * sd(dt_analysis[cpg_prcg == 0.5, auc_indep_prof]), format = "e", digits = 1)

print("Rate")
round(mean(dt_analysis[cpg_prcg == 0.2, auc_indep_rate]), 2)
round(mean(dt_analysis[cpg_prcg == 0.5, auc_indep_rate]), 2)
formatC(2 * sd(dt_analysis[cpg_prcg == 0.2, auc_indep_rate]), format = "e", digits = 1)
formatC(2 * sd(dt_analysis[cpg_prcg == 0.5, auc_indep_rate]), format = "e", digits = 1)

print("RF")
round(mean(dt_analysis[cpg_prcg == 0.2, auc_rf]), 2)
round(mean(dt_analysis[cpg_prcg == 0.5, auc_rf]), 2)
formatC(2 * sd(dt_analysis[cpg_prcg == 0.2, auc_rf]), format = "e", digits = 1)
formatC(2 * sd(dt_analysis[cpg_prcg == 0.5, auc_rf]), format = "e", digits = 1)

print("DeepCpG")
round(mean(dt_analysis[cpg_prcg == 0.2, auc_deepcpg]), 2)
round(mean(dt_analysis[cpg_prcg == 0.5, auc_deepcpg]), 2)
formatC(2 * sd(dt_analysis[cpg_prcg == 0.2, auc_deepcpg]), format = "e", digits = 1)
formatC(2 * sd(dt_analysis[cpg_prcg == 0.5, auc_deepcpg]), format = "e", digits = 1)

AUC performance

set.seed(17)
dt_boxplot <- copy(dt_analysis)
s2 <- 0.004

dt_boxplot <- dt_boxplot[, c("auc_melissa", "auc_melissa_rate", "auc_indep_prof", "auc_indep_rate", "auc_rf", "auc_deepcpg") := 
                             list(auc_melissa + rnorm(.N, 0, s2), auc_melissa_rate + rnorm(.N, 0, s2),
                                  auc_indep_prof + rnorm(.N, 0, s2), auc_indep_rate + rnorm(.N, 0, s2),
                                  auc_rf + rnorm(.N, 0, s2), auc_deepcpg + rnorm(.N, 0, s2/2))]
dt_boxplot <- dt_boxplot[, c("cpg_prcg", "auc_melissa", "auc_melissa_rate", "auc_indep_prof", "auc_indep_rate", "auc_rf", "auc_deepcpg")]
dt_boxplot[, cpg_prcg := as.character(cpg_prcg)][cpg_prcg == "0.2", cpg_prcg := "20%"]
dt_boxplot[cpg_prcg == "0.5", cpg_prcg := "50%"]
dt_boxplot[cpg_prcg == "0.8", cpg_prcg := "80%"]

dt_boxplot <- dt_boxplot %>% setnames(c("cpg_prcg", "auc_melissa", "auc_melissa_rate", "auc_indep_prof", "auc_indep_rate", "auc_rf", "auc_deepcpg"), c("x", "Melissa", "Melissa Rate", "BPRMeth", "Rate", "RF", "DeepCpG")) %>% .[, x := as.factor(x)] %>% melt(variable.name = "Model", value.name = "y")  %>% .[, x := factor(x, levels = c("20%", "50%", "80%"))] %>% .[, Model := factor(Model, levels = c("Melissa", "DeepCpG", "BPRMeth", "RF", "Melissa Rate", "Rate"))]

p_auc_box <- ggplot(dt_boxplot, aes(x = x, y = y, fill = Model)) +
  geom_boxplot(alpha = 0.8, outlier.shape = NA) +
  # scale_fill_manual(values = c("red3", "darkgreen", "palegreen3", "chocolate2", "dodgerblue4", "mediumorchid4", "mistyrose4")) + 
  scale_fill_manual(values = c("red3", "darkgreen", "chocolate2", "dodgerblue4", "mediumorchid4", "mistyrose4")) + 
  #scale_x_discrete(labels = c("prom3k" = "Promoter 3kb", "prom5k" = "Promoter 5kb", "prom10k" = "Promoter 10kb", "active_enhancers" = "Active enhancers", "Nanog" = "Nanog", "super_enhancers" = "Super enhancers")) +
  scale_y_continuous(breaks = pretty_breaks(n = 6)) +
  labs(title = "", x = "CpG coverage", y = "AUC") +
  boxplot_theme() + theme(axis.title.x = element_text(margin = ggplot2::margin(10,0,0,0)))
print(p_auc_box)

pdf(file = paste0("out/", io$dataset, io$sub_dir, io$regions, "/auc-encode-scWGBS.pdf"), width = 14, height = 6, useDingbats = FALSE)
p_auc_box
dev.off()

# pdf(file = paste0("out/", io$dataset, io$sub_dir, "/auc-smallwood-ms.pdf"), width = 15, height = 6, useDingbats = FALSE)
# p_auc_box
# dev.off()
rm(dt_boxplot, s2)

F1 performance

set.seed(17)
dt_boxplot <- copy(dt_analysis)
s2 <- 0.004

dt_boxplot <- dt_boxplot[, c("f_melissa", "f_melissa_rate", "f_indep_prof", "f_indep_rate", "f_rf", "f_deepcpg") := 
                             list(f_melissa + rnorm(.N, 0, s2), f_melissa_rate + rnorm(.N, 0, s2),
                                  f_indep_prof + rnorm(.N, 0, s2), f_indep_rate + rnorm(.N, 0, s2),
                                  f_rf + rnorm(.N, 0, s2), f_deepcpg + rnorm(.N, 0, s2/2))]
dt_boxplot <- dt_boxplot[, c("cpg_prcg", "f_melissa", "f_melissa_rate", "f_indep_prof", "f_indep_rate", "f_rf", "f_deepcpg")]
dt_boxplot[, cpg_prcg := as.character(cpg_prcg)][cpg_prcg == "0.2", cpg_prcg := "20%"]
dt_boxplot[cpg_prcg == "0.5", cpg_prcg := "50%"]
dt_boxplot[cpg_prcg == "0.8", cpg_prcg := "80%"]

dt_boxplot <- dt_boxplot %>% setnames(c("cpg_prcg", "f_melissa", "f_melissa_rate", "f_indep_prof", "f_indep_rate", "f_rf", "f_deepcpg"), c("x", "Melissa", "Melissa Rate", "BPRMeth", "Indep Rate", "RF", "DeepCpG")) %>% .[, x := as.factor(x)] %>% melt(variable.name = "Model", value.name = "y")  %>% .[, x := factor(x, levels = c("20%", "50%", "80%"))] %>% .[, Model := factor(Model, levels = c("Melissa", "DeepCpG", "BPRMeth", "RF", "Melissa Rate", "Indep Rate"))]

p_f_box <- ggplot(dt_boxplot, aes(x = x, y = y, fill = Model)) +
  geom_boxplot(alpha = 0.8, outlier.shape = NA) +
  scale_fill_manual(values = c("red3", "darkgreen", "chocolate2", "dodgerblue4", "mediumorchid4", "mistyrose4")) + 
  #scale_x_discrete(labels = c("prom3k" = "Promoter 3kb", "prom5k" = "Promoter 5kb", "prom10k" = "Promoter 10kb", "active_enhancers" = "Active enhancers", "Nanog" = "Nanog", "super_enhancers" = "Super enhancers")) +
  # scale_y_continuous(limits = c(0.55, 0.9), breaks = pretty_breaks(n = 4)) +
  labs(title = "", x = "CpG Coverage", y = "F-measure") +
  boxplot_theme() + theme(axis.title.x = element_text(margin = ggplot2::margin(10,0,0,0)))
print(p_f_box)

pdf(file = paste0("out/", io$dataset, io$sub_dir, io$regions, "/f-encode-scWGBS.pdf"), width = 14, height = 6, useDingbats = FALSE)
p_f_box
dev.off()

# pdf(file = paste0("out/", io$dataset, io$sub_dir, "/f-smallwood-ms.pdf"), width = 15, height = 6, useDingbats = FALSE)
# p_f_box
# dev.off()
rm(dt_boxplot, s2)

TPR / FPR performance

set.seed(17)
dt_boxplot <- copy(dt_analysis)
# Keep only required columns
dt_boxplot <- dt_boxplot[, c("cpg_prcg", "tpr_fpr_melissa", "tpr_fpr_melissa_rate", "tpr_fpr_indep_prof", "tpr_fpr_indep_rate", "tpr_fpr_rf", "tpr_fpr_deepcpg")]
# Keep one simulation per region
rows <- c(4, 16, 25)
dt_boxplot <- dt_boxplot[rows, ]

# Change Model and Region names
colnames(dt_boxplot) <- c("cpg_prcg", "Melissa", "Melissa Rate", "BPRMeth", "Indep Rate", "RF", "DeepCpG")
model_names <- colnames(dt_boxplot)[2:NCOL(dt_boxplot)]
cpg_names <- c("20% coverage", "50% coverage", "80% coverage")
dt_boxplot$cpg_prcg <- cpg_names

# Extract FPR and TPR
dt_tpr_fpr <- data.table(fpr = numeric(), tpr = numeric(), cpg_prcg = character(), Model = character())
for (i in 1:NROW(dt_boxplot)) {
    for (k in model_names) {
        perform <- dt_boxplot[, k, with = FALSE]
        dt <- data.table(fpr = perform[[1]][[i]]@x.values[[1]], tpr = perform[[1]][[i]]@y.values[[1]], 
                         cpg_prcg = dt_boxplot$cpg_prcg[i], Model = k)
        dt_tpr_fpr <- rbind(dt_tpr_fpr, dt)
    }
}

# Rename and refactor
dt_tpr_fpr <- dt_tpr_fpr %>% .[, c("cpg_prcg", "Model") := list(as.factor(cpg_prcg), as.factor(Model))] %>% .[, Region := factor(cpg_prcg, levels = c("20% coverage", "50% coverage", "80% coverage"))] %>% .[, Model := factor(Model, levels = c("Melissa", "Melissa Rate", "BPRMeth", "Indep Rate", "RF", "DeepCpG"))] %>% .[, Model := factor(Model, levels = c("Melissa", "DeepCpG", "BPRMeth", "RF", "Melissa Rate", "Indep Rate"))]


p_tpr_fpr <- ggplot(dt_tpr_fpr, aes(x = fpr, y = tpr, group = Model)) +
    geom_line(aes(color = Model), size = 2) +
    facet_wrap( ~ cpg_prcg) +
    scale_color_manual(values = c("red3", "darkgreen", "chocolate2", "dodgerblue4", "mediumorchid4", "mistyrose4")) + 
    scale_x_continuous(breaks = pretty_breaks(n = 6)) + 
    scale_y_continuous(breaks = pretty_breaks(n = 6)) +
    labs(title = "", x = "False positive rate", y = "True positive rate") +
    line_theme()
print(p_tpr_fpr)


pdf(file = paste0("out/", io$dataset, io$sub_dir, io$regions, "/tpr-fpr-encode-scWGBS.pdf"), width = 15, height = 7, useDingbats = FALSE)
p_tpr_fpr
dev.off()
rm(dt_boxplot, dt_tpr_fpr, region_names, model_names, perform, dt)

Precision / Recall performance

set.seed(17)
dt_boxplot <- copy(dt_analysis)
# Keep only required columns
dt_boxplot <- dt_boxplot[, c("cpg_prcg", "pr_melissa", "pr_melissa_rate", "pr_indep_prof", "pr_indep_rate", "pr_rf", "pr_deepcpg")]
# Keep one simulation per region
rows <- c(4, 16, 25)
dt_boxplot <- dt_boxplot[rows, ]

# Change Model and Region names
colnames(dt_boxplot) <- c("cpg_prcg", "Melissa", "Melissa Rate", "BPRMeth", "Indep Rate", "RF", "DeepCpG")
model_names <- colnames(dt_boxplot)[2:NCOL(dt_boxplot)]
cpg_names <- c("20% coverage", "50% coverage", "80% coverage")
dt_boxplot$cpg_prcg <- cpg_names

# Extract FPR and TPR
dt_pr <- data.table(x = numeric(), y = numeric(), cpg_prcg = character(), Model = character())
for (i in 1:NROW(dt_boxplot)) {
    for (k in model_names) {
        perform <- dt_boxplot[, k, with = FALSE]
        len <- round(length(perform[[1]][[i]]@x.values[[1]]))
        dt <- data.table(x = perform[[1]][[i]]@x.values[[1]][5:len],  
                         y = perform[[1]][[i]]@y.values[[1]][5:len],
                         cpg_prcg = dt_boxplot$cpg_prcg[i], Model = k)
        dt_pr <- rbind(dt_pr, dt)
    }
}

# Rename and refactor
dt_pr <- dt_pr %>% .[, c("cpg_prcg", "Model") := list(as.factor(cpg_prcg), as.factor(Model))] %>% .[, cpg_prcg := factor(cpg_prcg, levels = c("20% coverage", "50% coverage", "80% coverage"))] %>% .[, Model := factor(Model, levels = c("Melissa", "Melissa Rate", "BPRMeth", "Indep Rate", "RF", "DeepCpG"))] %>% 
    .[, Model := factor(Model, levels = c("Melissa", "DeepCpG", "BPRMeth", "RF", "Melissa Rate", "Indep Rate"))]


p_pr <- ggplot(dt_pr, aes(x = x, y = y, group = Model)) +
    geom_line(aes(color = Model), size = 2) +
    facet_wrap( ~ cpg_prcg) +
    scale_color_manual(values = c("red3", "darkgreen", "chocolate2", "dodgerblue4", "mediumorchid4", "mistyrose4")) + 
    scale_x_continuous(breaks = pretty_breaks(n = 4)) + 
    scale_y_continuous(limits = c(0.3, 1), breaks = pretty_breaks(n = 4)) +
    labs(title = "", x = "Recall", y = "Precision") +
    line_theme()
print(p_pr)

pdf(file = paste0("out/", io$dataset, io$sub_dir, io$regions, "/pr-encode-scWGBS.pdf"), width = 15, height = 7, useDingbats = FALSE)
p_pr
dev.off()

rm(dt_boxplot, dt_pr, region_names, model_names, perform, dt)

Joint plot AUC F-measure ENCODE synthetic

## AUC plot
p_auc_jitter <- p_auc_box + theme(legend.position = "none")
p_f_jitter <- p_f_box + theme(legend.position = "right")

final_fig_f <- plot_grid(p_auc_jitter, p_f_jitter, labels = c("a", "b"), 
                           label_size = 25, ncol = 2, nrow = 1, rel_widths = c(1, 1.3))
print(final_fig_f)

pdf(file = paste0("out/", io$dataset, io$sub_dir, io$regions, "/auc-f-encode-scWGBS.pdf"), width = 15, height = 6, useDingbats = FALSE)
final_fig_f
dev.off()

Joint plot TPR PR ENCODE synthetic

## AUC plot
p_pr_jitter <- p_pr + theme(legend.position = "top")
p_tpr_jitter <- p_tpr_fpr + theme(legend.position = "none")


final_fig_f <- plot_grid(p_pr_jitter, p_tpr_jitter, labels = c("a", "b"), 
                           label_size = 25, ncol = 1, nrow = 2, rel_heights = c(1.2, 1))
print(final_fig_f)

pdf(file = paste0("out/", io$dataset, io$sub_dir, io$regions, "/tpr-fpr-pr-encode-scWGBS.pdf"), width = 15, height = 13, useDingbats = FALSE)
final_fig_f
dev.off()


andreaskapou/Melissa documentation built on June 12, 2020, 5:54 p.m.