knitr::opts_chunk$set(echo = TRUE)
library(DMRcompare) library(tidyverse) library(ggpubr) data("dmrcateRes_df") data("probeLassoRes_df") data("bumphunterRes_df") data("combpRes_df")
results_ls <- list() results_ls$DMRcate <- dmrcateRes_df %>% filter(lambda == 500) %>% filter(C == 5) %>% mutate(method = paste0(method, "_lambda500_C5")) %>% select(-one_of("lambda", "C")) results_ls$ProbeLasso <- probeLassoRes_df %>% filter(adjPval == 0.05) %>% filter(mLassoRad == 1000) %>% filter(minDmrSep == 1000) %>% mutate(method = paste0(method, "_adjP0.05_mLassR1000")) %>% select(-one_of("adjPval", "mLassoRad", "minDmrSep")) results_ls$Bumphunter <- bumphunterRes_df %>% filter(cutoffQ == 0.95) %>% filter(maxGap == 250) %>% mutate(nCPG_q1 = as.character(nCPG_q1)) %>% mutate(nCPG_med = as.character(nCPG_med)) %>% mutate(nCPG_q3 = as.character(nCPG_q3)) %>% mutate(method = paste0(method, "_cutQ0.95_maxGap250")) %>% select(-one_of("cutoffQ", "maxGap")) results_ls$Comb_p <- combpRes_df %>% filter(combSeed == 0.05) %>% filter(combDist == 750) %>% mutate(method = paste0(method, "_Seed0.05_Dist750")) %>% select(-one_of("combSeed", "combDist"))
results_df <- results_ls %>% bind_rows() %>% filter(delta > 0) %>% mutate(nCPG_q1 = as.numeric(nCPG_q1)) %>% mutate(nCPG_med = as.numeric(nCPG_med)) %>% mutate(nCPG_q3 = as.numeric(nCPG_q3)) %>% mutate(method = factor(method)) %>% select(method, delta, seed, TP, FP, FN, power, precision, AuPR, mcc, F1, nCPG_q1, nCPG_med, nCPG_q3) resultsAve_df <- results_df %>% group_by(method, delta) %>% summarise( TP = mean(TP), FP = mean(FP), FN = mean(FN), Pwr = mean(power), Precis = mean(precision), AuPR = mean(AuPR), MCC = mean(mcc), F1 = mean(F1), nCPG_q1 = mean(nCPG_q1), nCPG_med = mean(nCPG_med), nCPG_q3 = mean(nCPG_q3) ) results_df %>% group_by(method) %>% summarise(med_nCPGs = median(nCPG_med, na.rm = TRUE)) %>% knitr::kable()
power_gg <- ggplot(data = resultsAve_df) + theme_bw() + theme(panel.grid.minor = element_blank()) + aes(x = delta, y = Pwr, group = method, colour = method) + scale_y_continuous("Power", limits = c(0, 1)) + scale_x_continuous("Treatment Effect", breaks = unique(results_df$delta), labels = prettyNum(unique(results_df$delta)), limits = c(0, 0.4)) + ggtitle("Power under Best-Performing Parameter Set vs. Treatment Effect") + scale_color_discrete(name = "DMR Method", breaks = levels(results_df$method), labels = c("Bumphunter", "Comb-p", "DMRcate", "ProbeLasso")) + geom_line(size = 1) power_gg
precis_gg <- ggplot(data = resultsAve_df) + theme_bw() + theme(panel.grid.minor = element_blank()) + aes(x = delta, y = Precis, group = method, colour = method) + scale_y_continuous("Precision", limits = c(0.0, 1)) + scale_x_continuous("Treatment Effect", breaks = unique(results_df$delta), labels = prettyNum(unique(results_df$delta)), limits = c(0, 0.4)) + ggtitle("Precision under Best-Performing Parameter Set vs. Treatment Effect") + scale_color_discrete(name = "DMR Method", breaks = levels(results_df$method), labels = c("Bumphunter", "Comb-p", "DMRcate", "ProbeLasso")) + geom_line(size = 1) precis_gg
nCPGsFct_df <- resultsAve_df %>% mutate(delta = factor(delta, levels = unique(delta), ordered = TRUE))
power2_gg <- ggplot(data = nCPGsFct_df) + theme_bw() + theme(panel.grid.minor = element_blank()) + aes(x = delta, y = Pwr, group = method, colour = method) + scale_y_continuous("Power", limits = c(0, 1)) + scale_x_discrete(expression(mu), breaks = unique(results_df$delta), labels = prettyNum(unique(results_df$delta))) + scale_color_discrete(name = "DMR Method", breaks = levels(results_df$method), labels = c("Bumphunter", "Comb-p", "DMRcate", "ProbeLasso")) + ggtitle(" Power under Best-Performing \n Parameter Set vs. Treatment Effect") + geom_line(size = 1) precis2_gg <- ggplot(data = nCPGsFct_df) + theme_bw() + theme(panel.grid.minor = element_blank()) + aes(x = delta, y = Precis, group = method, colour = method) + scale_y_continuous("Precision", limits = c(0, 1)) + scale_x_discrete(expression(mu), breaks = unique(results_df$delta), labels = prettyNum(unique(results_df$delta))) + ggtitle(" Precision under Best-Performing \n Parameter Set vs. Treatment Effect") + scale_color_discrete(name = "DMR Method", breaks = levels(results_df$method), labels = c("Bumphunter", "Comb-p", "DMRcate", "ProbeLasso")) + geom_line(size = 1) ggarrange(precis2_gg, power2_gg, nrow = 1, ncol = 2, common.legend = TRUE, legend = "bottom")
nCPGs_gg <- ggplot(data = nCPGsFct_df) + theme_bw() + aes(x = delta, group = method, colour = method) + ggtitle("Median and IQR of DMR Sizes for Each Method under Optimal Parameter Settings") + scale_color_discrete(name = "DMR Method", breaks = levels(nCPGsFct_df$method), labels = c("Bumphunter", "Comb-p", "DMRcate", "ProbeLasso")) + scale_x_discrete("Treatment Effect") + scale_y_continuous("Size of DMRs Detected") + geom_errorbar( mapping = aes(ymin = nCPG_q1, ymax = nCPG_q3), position = position_dodge(width = 0.5), width = 0.5, size = 1 ) + geom_point( aes(y = nCPG_med), position = position_dodge(width = 0.5), size = 4 ) nCPGs_gg
We will first read in the raw performance data for all four methods under their best-performing parameter setting. Zhen did not filter the original Comb-p results to nCPGs
$ >= 5$, so we have to do that here.
fileNames_char <- list.files("../extdata/best_cases_results") nCPGs_ls <- lapply(fileNames_char, function(name){ # Metadata name_char <- strsplit(name, split = "_")[[1]] # data data_ls <- readRDS(paste0("../extdata/best_cases_results/", name)) data_df <- data_ls[[1]] if(name_char[1] == "CombpResults"){ data_df <- data_df[, c("chrom", "start", "end", "z_p", "n_probes")] colnames(data_df) <- c("dmr.chr", "dmr.start", "dmr.end", "dmr.pval", "dmr.n.cpgs") data_df <- data_df[data_df$dmr.n.cpgs > 4, ] } else { data_df <- data_df[, c("dmr.chr", "dmr.start", "dmr.end", "dmr.pval", "dmr.n.cpgs")] } # bind data_df$method <- gsub(pattern = "Results", replacement = "", name_char[1]) data_df$delta <- as.numeric( gsub(pattern = "delta", replacement = "", name_char[2]) ) data_df$seed <- as.numeric( gsub(pattern = "seed", replacement = "", name_char[3]) ) # return data_df }) nCPGs_df <- nCPGs_ls %>% bind_rows() %>% select(delta, method, seed, everything()) %>% arrange(delta) %>% mutate(delta = factor(delta, levels = unique(delta), ordered = TRUE)) nCPGs_df %>% group_by(method) %>% summarise(med_nCPGs = median(dmr.n.cpgs, na.rm = TRUE)) %>% knitr::kable()
Now we have a data frame with the number of CPGs for each method by each design point. We use theme_classic()
to pretty-up the hidden outliers; there are quite a instances where the ProbeLasso method returned large DMR sizes.
ggplot(data = nCPGs_df) + theme_classic() + theme(legend.position = "bottom") + aes(x = delta, y = dmr.n.cpgs, fill = method) + ggtitle("DMR Sizes under Best-Performing Parameter Settings for Considered Methods") + scale_y_continuous("Number of CpGs in Significant DMRs", limits = c(5, 25)) + scale_x_discrete(expression(mu)) + scale_fill_discrete("DMR Method") + geom_boxplot(position = position_dodge(0.7), width = 0.5, outlier.color = "white")
This shows the very long tail for ProbeLasso
ggplot(data = nCPGs_df) + theme_bw() + aes(x = delta, y = dmr.n.cpgs, fill = method) + scale_y_continuous("Size of DMRs Detected") + scale_x_discrete(expression(mu)) + scale_fill_discrete("DMR Method") + geom_violin()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.