# Here, we set default options for our markdown file knitr::opts_chunk$set( collapse = TRUE, comment = "#>", tidy = "styler", fig.width = 7, fig.height = 7, message = FALSE, warning = FALSE ) # The following two variables control which chunks are run. Either the tests are # run and the stored images not printed or the tests can not be run and stored # images printed. To run the tests, set eval_tests to TRUE. To print the stored # images, set eval_tests to FALSE. eval_tests <- FALSE eval_images <- !eval_tests library(coiaf) library(ggplot2) library(patchwork)
In this analysis file, we aim to understand the effect of varying parameters on our COI framework. We will examine both simulation and estimation parameters. The parameters that we will examine are:
coverage
: Coverage at each locus.loci
: The number of loci.alpha
: Shape parameter of the symmetric Dirichlet prior on strain proportions.overdispersion
: The extent to which counts are over-dispersed relative to
the binomial distribution. Counts are Beta-binomially distributed, with the beta
distribution having shape parameters $p/\text{overdispersion}$ and $(1-p) /
\text{overdispersion}$.relatedness
: The probability that a strain in mixed infections is related to
another.epsilon
: The probability of a single read being miscalled as the other allele.
Applies in both directions.coi
: The complexity of infection of the sample.seq_error
: The level of sequencing error that is assumed.use_bins
: Whether or not to group data before estimating the COI.bin_size
: The minimum size of each bin of data.| Parameter | Default Value |
|:---------------:|:---------------------:|
| COI | 3
|
| PLMAF | runif(1000, 0, 0.5)
|
| Coverage | 200
|
| Alpha | 1
|
| Overdispersion | 0
|
| Relatedness | 0
|
| Epsilon | 0
|
| Sequence Error | 0.01
|
| Use Bins | FALSE
|
| Bin Size | 20
|
# Set the seed set.seed(1) # Define number of loci, and distribution of minor allele frequencies L <- 1e3 p <- stats::rbeta(L, 1, 5) p[p > 0.5] <- 1 - p[p > 0.5]
toverall <- disc_sensitivity( coi = 1:20, repetitions = 100, plmaf = p, coverage = 200, seq_error = 0, coi_method = c("variant", "frequency") ) toverall_image <- sensitivity_plot( data = toverall, dims = c(1, 2), result_type = "disc", title = "Predicted COI", sub_title = c("Variant Method", "Frequency Method") ) toverall_error <- error_plot( data = toverall, fill = "coi_method", legend_title = "COI Method", title = "Error", fill_levels = c("Variant Method", "Frequency Method") ) toverall_fig <- toverall_image / toverall_error + plot_annotation( tag_levels = "A", theme = theme(plot.tag = element_text(size = 10)) ) + theme(legend.position = "bottom") toverall_fig
knitr::include_graphics( here::here("vignettes", "figures", "discrete", "toverall.png") )
tcoverage_1 <- disc_sensitivity( coi = 2:20, coverage = c(50, 100, 250, 500, 1000, 2000), repetitions = 100, seq_error = 0.01, plmaf = p, coi_method = "variant" ) tcoverage_image_1 <- sensitivity_plot( data = tcoverage_1, result_type = "disc", title = "Predicted COI", dims = c(2, 3), sub_title = paste0("Coverage = ", c(50, 100, 250, 500, 1000, 2000)) ) tcoverage_error_1 <- error_plot( tcoverage_1, fill = "coverage", legend_title = "Coverage", title = "Error" ) tcoverage_fig_1 <- tcoverage_image_1 / tcoverage_error_1 + plot_annotation( tag_levels = "A", theme = theme(plot.tag = element_text(size = 10)) ) + plot_layout(heights = c(2, 1)) + theme(legend.position = "bottom") tcoverage_fig_1
tcoverage_2 <- disc_sensitivity( coi = 2:20, coverage = c(50, 100, 250, 500, 1000, 2000), repetitions = 100, seq_error = 0.01, plmaf = p, coi_method = "frequency" ) tcoverage_image_2 <- sensitivity_plot( data = tcoverage_2, result_type = "disc", title = "Predicted COI", dims = c(2, 3), sub_title = paste0("Coverage = ", c(50, 100, 250, 500, 1000, 2000)) ) tcoverage_error_2 <- error_plot( tcoverage_2, fill = "coverage", legend_title = "Coverage", title = "Error" ) tcoverage_fig_2 <- tcoverage_image_2 / tcoverage_error_2 + plot_annotation( tag_levels = "A", theme = theme(plot.tag = element_text(size = 10)) ) + plot_layout(heights = c(2, 1)) + theme(legend.position = "bottom") tcoverage_fig_2
knitr::include_graphics( here::here("vignettes", "figures", "discrete", "tcoverage_1.png") ) knitr::include_graphics( here::here("vignettes", "figures", "discrete", "tcoverage_2.png") )
# Set the range over which we will iterate loci <- c(1e2, 1e3, 1e4) # For each loci, reset the PLMAF and then run bloci <- lapply(loci, function(new_L) { new_p <- rbeta(new_L, 1, 5) new_p[new_p > 0.5] <- 1 - new_p[new_p > 0.5] inner_tloci <- disc_sensitivity( coi = 2:20, repetitions = 100, plmaf = new_p, seq_error = 0.01, coi_method = "variant" ) inner_tloci$param_grid$loci <- new_L return(inner_tloci) }) # Extract the relevant information for each output: predicted_coi, probability, # param_grid, and boot_error pc <- do.call(cbind, lapply(bloci, function(test) { return(test$predicted_coi) })) pb <- do.call(cbind, lapply(bloci, function(test) { return(test$probability) })) pg <- do.call(rbind, lapply(bloci, function(test) { return(test$param_grid) })) be <- do.call(rbind, lapply(bloci, function(test) { return(test$boot_error) })) # Fix the naming for predicted_coi num_cois <- length(unique(pg$coi)) num_repeat_cois <- length(pg$coi) / num_cois names(pc) <- paste( "coi", pg$coi, rep(seq(num_repeat_cois), each = num_cois), sep = "_" ) # Create the output tloci_1 <- list( predicted_coi = pc, probability = pb, param_grid = pg, boot_error = be ) # Plot tloci_image_1 <- sensitivity_plot( data = tloci_1, result_type = "disc", title = "Predicted COI", dims = c(1, 3), sub_title = paste0("Loci = ", c(1e2, 1e3, 1e4)) ) # Add a loci column tloci_1$boot_error$loci <- rep( c(1e2, 1e3, 1e4), each = length(unique(tloci_1$boot_error$coi)) ) tloci_error_1 <- error_plot( tloci_1, fill = "loci", legend_title = "Loci", title = "Error" ) tloci_fig_1 <- tloci_image_1 / tloci_error_1 + plot_annotation( tag_levels = "A", theme = theme(plot.tag = element_text(size = 10)) ) + theme(legend.position = "bottom") tloci_fig_1
# Set the range over which we will iterate loci <- c(1e2, 1e3, 1e4) # For each loci, reset the PLMAF and then run bloci <- lapply(loci, function(new_L) { new_p <- rbeta(new_L, 1, 5) new_p[new_p > 0.5] <- 1 - new_p[new_p > 0.5] inner_tloci <- disc_sensitivity( coi = 2:20, repetitions = 100, plmaf = new_p, seq_error = 0.01, coi_method = "frequency" ) inner_tloci$param_grid$loci <- new_L return(inner_tloci) }) # Extract the relevant information for each output: predicted_coi, probability, # param_grid, and boot_error pc <- do.call(cbind, lapply(bloci, function(test) { return(test$predicted_coi) })) pb <- do.call(cbind, lapply(bloci, function(test) { return(test$probability) })) pg <- do.call(rbind, lapply(bloci, function(test) { return(test$param_grid) })) be <- do.call(rbind, lapply(bloci, function(test) { return(test$boot_error) })) # Fix the naming for predicted_coi num_cois <- length(unique(pg$coi)) num_repeat_cois <- length(pg$coi) / num_cois names(pc) <- paste( "coi", pg$coi, rep(seq(num_repeat_cois), each = num_cois), sep = "_" ) # Create the output tloci_2 <- list( predicted_coi = pc, probability = pb, param_grid = pg, boot_error = be ) # Plot tloci_image_2 <- sensitivity_plot( data = tloci_2, result_type = "disc", title = "Predicted COI", dims = c(1, 3), sub_title = paste0("Loci = ", c(1e2, 1e3, 1e4)) ) # Add a loci column tloci_2$boot_error$loci <- rep( c(1e2, 1e3, 1e4), each = length(unique(tloci_2$boot_error$coi)) ) tloci_error_2 <- error_plot( tloci_2, fill = "loci", legend_title = "Loci", title = "Error" ) tloci_fig_2 <- tloci_image_2 / tloci_error_2 + plot_annotation( tag_levels = "A", theme = theme(plot.tag = element_text(size = 10)) ) + theme(legend.position = "bottom") tloci_fig_2
knitr::include_graphics( here::here("vignettes", "figures", "discrete", "tloci_1.png") ) knitr::include_graphics( here::here("vignettes", "figures", "discrete", "tloci_2.png") )
talpha_1 <- disc_sensitivity( coi = 2:20, alpha = seq(0.01, 5.51, 0.5), repetitions = 100, seq_error = 0.01, plmaf = p, coi_method = "variant" ) talpha_image_1 <- sensitivity_plot( data = talpha_1, result_type = "disc", title = "Predicted COI", dims = c(4, 3), sub_title = paste0("Alpha = ", seq(0.01, 5.51, 0.5)) ) talpha_error_1 <- error_plot( talpha_1, fill = "alpha", legend_title = "Alpha", title = "Error" ) talpha_fig_1 <- talpha_image_1 / talpha_error_1 + plot_annotation( tag_levels = "A", theme = theme(plot.tag = element_text(size = 10)) ) + plot_layout(heights = c(5, 1)) + theme(legend.position = "bottom") talpha_fig_1
talpha_2 <- disc_sensitivity( coi = 2:20, alpha = seq(0.01, 5.51, 0.5), repetitions = 100, seq_error = 0.01, plmaf = p, coi_method = "frequency" ) talpha_image_2 <- sensitivity_plot( data = talpha_2, result_type = "disc", title = "Predicted COI", dims = c(4, 3), sub_title = paste0("Alpha = ", seq(0.01, 5.51, 0.5)) ) talpha_error_2 <- error_plot( talpha_2, fill = "alpha", legend_title = "Alpha", title = "Error" ) talpha_fig_2 <- talpha_image_2 / talpha_error_2 + plot_annotation( tag_levels = "A", theme = theme(plot.tag = element_text(size = 10)) ) + plot_layout(heights = c(5, 1)) + theme(legend.position = "bottom") talpha_fig_2
knitr::include_graphics( here::here("vignettes", "figures", "discrete", "talpha_1.png") ) knitr::include_graphics( here::here("vignettes", "figures", "discrete", "talpha_2.png") )
tover <- disc_sensitivity( coi = 2:20, overdispersion = seq(0, 0.25, 0.05), repetitions = 100, seq_error = 0.01, plmaf = p, coi_method = c("variant", "frequency") ) tover_image <- sensitivity_plot( data = tover, result_type = "disc", title = "Predicted COI", dims = c(4, 3), sub_title = paste0( rep(c("Var, ", "Freq, "), each = 6), "Dispersion = ", seq(0, 0.25, 0.05) ) ) tover_error <- error_plot( tover, fill = "overdispersion", legend_title = "Overdispersion", title = "Error", second_fill = "coi_method" ) tover_fig <- tover_image / tover_error + plot_annotation( tag_levels = "A", theme = theme(plot.tag = element_text(size = 10)) ) + plot_layout(heights = c(5, 1)) + theme(legend.position = "bottom") tover_fig
knitr::include_graphics( here::here("vignettes", "figures", "discrete", "tover.png") )
trelated <- disc_sensitivity( coi = 2:20, relatedness = seq(0, 0.5, 0.1), repetitions = 100, seq_error = 0.01, plmaf = p, coi_method = c("variant", "frequency") ) trelated_image <- sensitivity_plot( data = trelated, result_type = "disc", title = "Predicted COI", dims = c(4, 3), sub_title = paste( paste0("Relatedness = ", seq(0, 0.5, 0.1)), rep(c("Variant", "Frequency"), each = 6), sep = ", " ) ) trelated_error <- error_plot( trelated, fill = "relatedness", legend_title = "Related", title = "Error", second_fill = "coi_method" ) trelated_fig <- trelated_image / trelated_error + plot_annotation( tag_levels = "A", theme = theme(plot.tag = element_text(size = 10)) ) + plot_layout(heights = c(6, 1)) + theme(legend.position = "bottom") trelated_fig
knitr::include_graphics( here::here("vignettes", "figures", "discrete", "trelated.png") )
tepsilon <- disc_sensitivity( coi = 2:20, epsilon = seq(0, 0.025, 0.005), repetitions = 100, seq_error = 0.01, plmaf = p, coi_method = c("variant", "frequency") ) tepsilon_image <- sensitivity_plot( data = tepsilon, result_type = "disc", title = "Predicted COI", dims = c(4, 3), sub_title = paste0( rep(c("Var, ", "Freq, "), each = 6), "Epsilon = ", seq(0, 0.025, 0.005) ) ) tepsilon_error <- error_plot( tepsilon, fill = "epsilon", legend_title = "Epsilon", title = "Error", second_fill = "coi_method" ) tepsilon_fig <- tepsilon_image / tepsilon_error + plot_annotation( tag_levels = "A", theme = theme(plot.tag = element_text(size = 10)) ) + plot_layout(heights = c(3, 1)) + theme(legend.position = "bottom") tepsilon_fig
knitr::include_graphics( here::here("vignettes", "figures", "discrete", "tepsilon.png") )
tcoi <- disc_sensitivity( coi = 2:40, max_coi = 40, repetitions = 100, plmaf = p, seq_error = 0.01, coi_method = c("variant", "frequency") ) tcoi_image <- sensitivity_plot( data = tcoi, dims = c(1, 2), result_type = "disc", title = "Predicted COI", sub_title = c("Variant Method", "Frequency Method") ) tcoi_error <- error_plot( tcoi, fill = "coi_method", legend_title = "COI Method", title = "Error", fill_levels = c("Variant Method", "Frequency Method") ) tcoi_fig <- tcoi_image / tcoi_error + plot_annotation( tag_levels = "A", theme = theme(plot.tag = element_text(size = 10)) ) + theme(legend.position = "bottom") tcoi_fig
knitr::include_graphics( here::here("vignettes", "figures", "discrete", "tcoi.png") )
tseq <- disc_sensitivity( coi = 2:20, epsilon = 0.01, seq_error = seq(0, 0.10, 0.02), repetitions = 100, plmaf = p, coi_method = c("variant", "frequency") ) tseq_image <- sensitivity_plot( data = tseq, result_type = "disc", title = "Predicted COI", dims = c(4, 3), sub_title = paste0( rep(c("Variant, ", "Frequency, "), each = 6), "Seq Error = ", seq(0, 0.12, 0.02) ) ) tseq_error <- error_plot( tseq, fill = "seq_error", legend_title = "Sequence Error", title = "Error", second_fill = "coi_method" ) tseq_fig <- tseq_image / tseq_error + plot_annotation( tag_levels = "A", theme = theme(plot.tag = element_text(size = 10)) ) + plot_layout(heights = c(5, 1)) + theme(legend.position = "bottom") tseq_fig
knitr::include_graphics( here::here("vignettes", "figures", "discrete", "tseq.png") )
tbin <- disc_sensitivity( coi = 2:20, use_bins = c(TRUE, FALSE), plmaf = p, repetitions = 100, coi_method = c("variant", "frequency") ) tbin_image <- sensitivity_plot( data = tbin, result_type = "disc", title = "Predicted COI", dims = c(2, 2), sub_title = paste( c("Variant Method,", "Frequency Method,"), "Bins =", rep(c(TRUE, FALSE), each = 2) ) ) tbin_error <- error_plot( tbin, fill = "use_bins", fill_levels = as.character(c(TRUE, FALSE)), legend_title = "COI Method", title = "Error", second_fill = "coi_method" ) tbin_fig <- tbin_image / tbin_error + plot_annotation( tag_levels = "A", theme = theme(plot.tag = element_text(size = 10)) ) + plot_layout(heights = c(3, 1)) + theme(legend.position = "bottom") tbin_fig
knitr::include_graphics( here::here("vignettes", "figures", "discrete", "tbin.png") )
tbin_size <- disc_sensitivity( coi = 2:20, use_bins = TRUE, bin_size = seq(10, 100, 30), plmaf = p, repetitions = 100, seq_error = 0.01, coi_method = c("variant", "frequency") ) tbin_size_image <- sensitivity_plot( data = tbin_size, result_type = "disc", title = "Predicted COI", dims = c(2, 4), sub_title = rep(paste("Bin Size =", seq(10, 100, 30)), 2) ) tbin_size_error <- error_plot( tbin_size, fill = "bin_size", fill_levels = as.character(seq(10, 100, 30)), legend_title = "COI Method", title = "Error" ) tbin_size_fig <- tbin_size_image / tbin_size_error + plot_annotation( tag_levels = "A", theme = theme(plot.tag = element_text(size = 10)) ) + plot_layout(heights = c(3, 1)) + theme(legend.position = "bottom") tbin_size_fig
knitr::include_graphics( here::here("vignettes", "figures", "discrete", "tbin_size.png") )
# Below, we save the runs we performed for future use saved_analysis <- list( toverall, tcoverage_1, tcoverage_2, tloci_1, tloci_2, talpha_1, talpha_2, tover, trelated, tepsilon, tcoi, tseq, tbin, tbin_size ) names(saved_analysis) <- list( "toverall", "tcoverage_1", "tcoverage_2", "tloci_1", "tloci_2", "talpha_1", "talpha_2", "tover", "trelated", "tepsilon", "tcoi", "tseq", "tbin", "tbin_size" ) saveRDS( saved_analysis, file = here::here("vignettes", "saved_analysis_discrete.rds") ) # Here, we update all the stored images update_images <- list( toverall_fig, tcoverage_fig_1, tcoverage_fig_2, tloci_fig_1, tloci_fig_2, talpha_fig_1, talpha_fig_2, tover_fig, trelated_fig, tepsilon_fig, tcoi_fig, tseq_fig, tbin_fig, tbin_size_fig ) names(update_images) <- names(saved_analysis) for (i in seq_len(length(update_images))) { # Save vignette images ggsave( plot = update_images[[i]], filename = here::here( "vignettes", "figures", "discrete", glue::glue("{ names(update_images)[i] }.png") ), width = 7, height = 7, units = "in", device = "png", dpi = "screen" ) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.