# Here, we set default options for our markdown file
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  tidy = "styler",
  fig.width = 7,
  fig.height = 7,
  message = FALSE,
  warning = FALSE
)

# The following two variables control which chunks are run. Either the tests are
# run and the stored images not printed or the tests can not be run and stored
# images printed. To run the tests, set eval_tests to TRUE. To print the stored
# images, set eval_tests to FALSE.
eval_tests <- FALSE
eval_images <- !eval_tests

library(coiaf)
library(ggplot2)
library(patchwork)

Introduction

In this analysis file, we aim to understand the effect of varying parameters on our COI framework. We will examine both simulation and estimation parameters. The parameters that we will examine are:

Default parameters

| Parameter | Default Value | |:---------------:|:---------------------:| | COI | 3 | | PLMAF | runif(1000, 0, 0.5) | | Coverage | 200 | | Alpha | 1 | | Overdispersion | 0 | | Relatedness | 0 | | Epsilon | 0 | | Sequence Error | 0.01 | | Use Bins | FALSE | | Bin Size | 20 |

Setting our PLMAF

# Set the seed
set.seed(1)

# Define number of loci, and distribution of minor allele frequencies
L <- 1e3
p <- stats::rbeta(L, 1, 5)
p[p > 0.5] <- 1 - p[p > 0.5]

Overall performance

toverall <- disc_sensitivity(
  coi = 1:20,
  repetitions = 100,
  plmaf = p,
  coverage = 200,
  seq_error = 0,
  coi_method = c("variant", "frequency")
)

toverall_image <- sensitivity_plot(
  data = toverall,
  dims = c(1, 2),
  result_type = "disc",
  title = "Predicted COI",
  sub_title = c("Variant Method", "Frequency Method")
)

toverall_error <- error_plot(
  data = toverall,
  fill = "coi_method",
  legend_title = "COI Method",
  title = "Error",
  fill_levels = c("Variant Method", "Frequency Method")
)

toverall_fig <- toverall_image / toverall_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  theme(legend.position = "bottom")

toverall_fig
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "toverall.png")
)

Simulation parameters

Coverage

tcoverage_1 <- disc_sensitivity(
  coi = 2:20,
  coverage = c(50, 100, 250, 500, 1000, 2000),
  repetitions = 100,
  seq_error = 0.01,
  plmaf = p,
  coi_method = "variant"
)

tcoverage_image_1 <- sensitivity_plot(
  data = tcoverage_1,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(2, 3),
  sub_title = paste0("Coverage = ", c(50, 100, 250, 500, 1000, 2000))
)

tcoverage_error_1 <- error_plot(
  tcoverage_1,
  fill = "coverage",
  legend_title = "Coverage",
  title = "Error"
)

tcoverage_fig_1 <- tcoverage_image_1 / tcoverage_error_1 +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(2, 1)) +
  theme(legend.position = "bottom")

tcoverage_fig_1
tcoverage_2 <- disc_sensitivity(
  coi = 2:20,
  coverage = c(50, 100, 250, 500, 1000, 2000),
  repetitions = 100,
  seq_error = 0.01,
  plmaf = p, coi_method = "frequency"
)

tcoverage_image_2 <- sensitivity_plot(
  data = tcoverage_2,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(2, 3),
  sub_title = paste0("Coverage = ", c(50, 100, 250, 500, 1000, 2000))
)

tcoverage_error_2 <- error_plot(
  tcoverage_2,
  fill = "coverage",
  legend_title = "Coverage",
  title = "Error"
)

tcoverage_fig_2 <- tcoverage_image_2 / tcoverage_error_2 +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(2, 1)) +
  theme(legend.position = "bottom")

tcoverage_fig_2
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tcoverage_1.png")
)

knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tcoverage_2.png")
)

Loci

# Set the range over which we will iterate
loci <- c(1e2, 1e3, 1e4)

# For each loci, reset the PLMAF and then run
bloci <- lapply(loci, function(new_L) {
  new_p <- rbeta(new_L, 1, 5)
  new_p[new_p > 0.5] <- 1 - new_p[new_p > 0.5]

  inner_tloci <- disc_sensitivity(
    coi = 2:20,
    repetitions = 100,
    plmaf = new_p,
    seq_error = 0.01,
    coi_method = "variant"
  )
  inner_tloci$param_grid$loci <- new_L
  return(inner_tloci)
})

# Extract the relevant information for each output: predicted_coi, probability,
# param_grid, and boot_error
pc <- do.call(cbind, lapply(bloci, function(test) {
  return(test$predicted_coi)
}))
pb <- do.call(cbind, lapply(bloci, function(test) {
  return(test$probability)
}))
pg <- do.call(rbind, lapply(bloci, function(test) {
  return(test$param_grid)
}))
be <- do.call(rbind, lapply(bloci, function(test) {
  return(test$boot_error)
}))

# Fix the naming for predicted_coi
num_cois <- length(unique(pg$coi))
num_repeat_cois <- length(pg$coi) / num_cois
names(pc) <- paste(
  "coi",
  pg$coi,
  rep(seq(num_repeat_cois), each = num_cois),
  sep = "_"
)

# Create the output
tloci_1 <- list(
  predicted_coi = pc,
  probability   = pb,
  param_grid    = pg,
  boot_error    = be
)

# Plot
tloci_image_1 <- sensitivity_plot(
  data = tloci_1,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(1, 3),
  sub_title = paste0("Loci = ", c(1e2, 1e3, 1e4))
)

# Add a loci column
tloci_1$boot_error$loci <- rep(
  c(1e2, 1e3, 1e4),
  each = length(unique(tloci_1$boot_error$coi))
)
tloci_error_1 <- error_plot(
  tloci_1,
  fill = "loci",
  legend_title = "Loci",
  title = "Error"
)

tloci_fig_1 <- tloci_image_1 / tloci_error_1 +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  theme(legend.position = "bottom")

tloci_fig_1
# Set the range over which we will iterate
loci <- c(1e2, 1e3, 1e4)

# For each loci, reset the PLMAF and then run
bloci <- lapply(loci, function(new_L) {
  new_p <- rbeta(new_L, 1, 5)
  new_p[new_p > 0.5] <- 1 - new_p[new_p > 0.5]

  inner_tloci <- disc_sensitivity(
    coi = 2:20,
    repetitions = 100,
    plmaf = new_p,
    seq_error = 0.01,
    coi_method = "frequency"
  )
  inner_tloci$param_grid$loci <- new_L
  return(inner_tloci)
})

# Extract the relevant information for each output: predicted_coi, probability,
# param_grid, and boot_error
pc <- do.call(cbind, lapply(bloci, function(test) {
  return(test$predicted_coi)
}))
pb <- do.call(cbind, lapply(bloci, function(test) {
  return(test$probability)
}))
pg <- do.call(rbind, lapply(bloci, function(test) {
  return(test$param_grid)
}))
be <- do.call(rbind, lapply(bloci, function(test) {
  return(test$boot_error)
}))

# Fix the naming for predicted_coi
num_cois <- length(unique(pg$coi))
num_repeat_cois <- length(pg$coi) / num_cois
names(pc) <- paste(
  "coi",
  pg$coi,
  rep(seq(num_repeat_cois), each = num_cois),
  sep = "_"
)

# Create the output
tloci_2 <- list(
  predicted_coi = pc,
  probability   = pb,
  param_grid    = pg,
  boot_error    = be
)

# Plot
tloci_image_2 <- sensitivity_plot(
  data = tloci_2,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(1, 3),
  sub_title = paste0("Loci = ", c(1e2, 1e3, 1e4))
)

# Add a loci column
tloci_2$boot_error$loci <- rep(
  c(1e2, 1e3, 1e4),
  each = length(unique(tloci_2$boot_error$coi))
)
tloci_error_2 <- error_plot(
  tloci_2,
  fill = "loci",
  legend_title = "Loci",
  title = "Error"
)

tloci_fig_2 <- tloci_image_2 / tloci_error_2 +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  theme(legend.position = "bottom")

tloci_fig_2
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tloci_1.png")
)

knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tloci_2.png")
)

Alpha

talpha_1 <- disc_sensitivity(
  coi = 2:20,
  alpha = seq(0.01, 5.51, 0.5),
  repetitions = 100,
  seq_error = 0.01,
  plmaf = p,
  coi_method = "variant"
)

talpha_image_1 <- sensitivity_plot(
  data = talpha_1,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(4, 3),
  sub_title = paste0("Alpha = ", seq(0.01, 5.51, 0.5))
)

talpha_error_1 <- error_plot(
  talpha_1,
  fill = "alpha",
  legend_title = "Alpha",
  title = "Error"
)

talpha_fig_1 <- talpha_image_1 / talpha_error_1 +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(5, 1)) +
  theme(legend.position = "bottom")

talpha_fig_1
talpha_2 <- disc_sensitivity(
  coi = 2:20,
  alpha = seq(0.01, 5.51, 0.5),
  repetitions = 100,
  seq_error = 0.01,
  plmaf = p,
  coi_method = "frequency"
)

talpha_image_2 <- sensitivity_plot(
  data = talpha_2,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(4, 3),
  sub_title = paste0("Alpha = ", seq(0.01, 5.51, 0.5))
)

talpha_error_2 <- error_plot(
  talpha_2,
  fill = "alpha",
  legend_title = "Alpha",
  title = "Error"
)

talpha_fig_2 <- talpha_image_2 / talpha_error_2 +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(5, 1)) +
  theme(legend.position = "bottom")

talpha_fig_2
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "talpha_1.png")
)

knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "talpha_2.png")
)

Overdispersion

tover <- disc_sensitivity(
  coi = 2:20,
  overdispersion = seq(0, 0.25, 0.05),
  repetitions = 100,
  seq_error = 0.01,
  plmaf = p,
  coi_method = c("variant", "frequency")
)

tover_image <- sensitivity_plot(
  data = tover,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(4, 3),
  sub_title = paste0(
    rep(c("Var, ", "Freq, "), each = 6),
    "Dispersion = ",
    seq(0, 0.25, 0.05)
  )
)

tover_error <- error_plot(
  tover,
  fill = "overdispersion",
  legend_title = "Overdispersion",
  title = "Error",
  second_fill = "coi_method"
)

tover_fig <- tover_image / tover_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(5, 1)) +
  theme(legend.position = "bottom")

tover_fig
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tover.png")
)

Relatedness

trelated <- disc_sensitivity(
  coi = 2:20,
  relatedness = seq(0, 0.5, 0.1),
  repetitions = 100,
  seq_error = 0.01,
  plmaf = p,
  coi_method = c("variant", "frequency")
)

trelated_image <- sensitivity_plot(
  data = trelated,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(4, 3),
  sub_title = paste(
    paste0("Relatedness =  ", seq(0, 0.5, 0.1)),
    rep(c("Variant", "Frequency"), each = 6),
    sep = ", "
  )
)

trelated_error <- error_plot(
  trelated,
  fill = "relatedness",
  legend_title = "Related",
  title = "Error",
  second_fill = "coi_method"
)

trelated_fig <- trelated_image / trelated_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(6, 1)) +
  theme(legend.position = "bottom")

trelated_fig
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "trelated.png")
)

Epsilon

tepsilon <- disc_sensitivity(
  coi = 2:20,
  epsilon = seq(0, 0.025, 0.005),
  repetitions = 100,
  seq_error = 0.01,
  plmaf = p,
  coi_method = c("variant", "frequency")
)

tepsilon_image <- sensitivity_plot(
  data = tepsilon,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(4, 3),
  sub_title = paste0(
    rep(c("Var, ", "Freq, "), each = 6),
    "Epsilon = ",
    seq(0, 0.025, 0.005)
  )
)

tepsilon_error <- error_plot(
  tepsilon,
  fill = "epsilon",
  legend_title = "Epsilon",
  title = "Error",
  second_fill = "coi_method"
)

tepsilon_fig <- tepsilon_image / tepsilon_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(3, 1)) +
  theme(legend.position = "bottom")

tepsilon_fig
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tepsilon.png")
)

Estimation parameters

COI

tcoi <- disc_sensitivity(
  coi = 2:40,
  max_coi = 40,
  repetitions = 100,
  plmaf = p,
  seq_error = 0.01,
  coi_method = c("variant", "frequency")
)

tcoi_image <- sensitivity_plot(
  data = tcoi,
  dims = c(1, 2),
  result_type = "disc",
  title = "Predicted COI",
  sub_title = c("Variant Method", "Frequency Method")
)

tcoi_error <- error_plot(
  tcoi,
  fill = "coi_method",
  legend_title = "COI Method",
  title = "Error",
  fill_levels = c("Variant Method", "Frequency Method")
)

tcoi_fig <- tcoi_image / tcoi_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  theme(legend.position = "bottom")

tcoi_fig
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tcoi.png")
)

Sequencing error

tseq <- disc_sensitivity(
  coi = 2:20,
  epsilon = 0.01,
  seq_error = seq(0, 0.10, 0.02),
  repetitions = 100,
  plmaf = p,
  coi_method = c("variant", "frequency")
)

tseq_image <- sensitivity_plot(
  data = tseq,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(4, 3),
  sub_title = paste0(
    rep(c("Variant, ", "Frequency, "), each = 6),
    "Seq Error = ",
    seq(0, 0.12, 0.02)
  )
)

tseq_error <- error_plot(
  tseq,
  fill = "seq_error",
  legend_title = "Sequence Error",
  title = "Error",
  second_fill = "coi_method"
)

tseq_fig <- tseq_image / tseq_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(5, 1)) +
  theme(legend.position = "bottom")

tseq_fig
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tseq.png")
)

Bins

tbin <- disc_sensitivity(
  coi = 2:20,
  use_bins = c(TRUE, FALSE),
  plmaf = p,
  repetitions = 100,
  coi_method = c("variant", "frequency")
)

tbin_image <- sensitivity_plot(
  data = tbin,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(2, 2),
  sub_title = paste(
    c("Variant Method,", "Frequency Method,"),
    "Bins =",
    rep(c(TRUE, FALSE), each = 2)
  )
)

tbin_error <- error_plot(
  tbin,
  fill = "use_bins",
  fill_levels = as.character(c(TRUE, FALSE)),
  legend_title = "COI Method",
  title = "Error",
  second_fill = "coi_method"
)

tbin_fig <- tbin_image / tbin_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(3, 1)) +
  theme(legend.position = "bottom")

tbin_fig
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tbin.png")
)

Bin size

tbin_size <- disc_sensitivity(
  coi = 2:20,
  use_bins = TRUE,
  bin_size = seq(10, 100, 30),
  plmaf = p,
  repetitions = 100,
  seq_error = 0.01,
  coi_method = c("variant", "frequency")
)

tbin_size_image <- sensitivity_plot(
  data = tbin_size,
  result_type = "disc",
  title = "Predicted COI",
  dims = c(2, 4),
  sub_title = rep(paste("Bin Size =", seq(10, 100, 30)), 2)
)

tbin_size_error <- error_plot(
  tbin_size,
  fill = "bin_size",
  fill_levels = as.character(seq(10, 100, 30)),
  legend_title = "COI Method",
  title = "Error"
)

tbin_size_fig <- tbin_size_image / tbin_size_error +
  plot_annotation(
    tag_levels = "A",
    theme = theme(plot.tag = element_text(size = 10))
  ) +
  plot_layout(heights = c(3, 1)) +
  theme(legend.position = "bottom")

tbin_size_fig
knitr::include_graphics(
  here::here("vignettes", "figures", "discrete", "tbin_size.png")
)
# Below, we save the runs we performed for future use
saved_analysis <- list(
  toverall, tcoverage_1, tcoverage_2, tloci_1, tloci_2, talpha_1, talpha_2,
  tover, trelated, tepsilon, tcoi, tseq, tbin, tbin_size
)

names(saved_analysis) <- list(
  "toverall", "tcoverage_1", "tcoverage_2", "tloci_1", "tloci_2",
  "talpha_1", "talpha_2", "tover", "trelated", "tepsilon", "tcoi",
  "tseq", "tbin", "tbin_size"
)

saveRDS(
  saved_analysis,
  file = here::here("vignettes", "saved_analysis_discrete.rds")
)

# Here, we update all the stored images
update_images <- list(
  toverall_fig, tcoverage_fig_1, tcoverage_fig_2, tloci_fig_1, tloci_fig_2,
  talpha_fig_1, talpha_fig_2, tover_fig, trelated_fig, tepsilon_fig,
  tcoi_fig, tseq_fig, tbin_fig, tbin_size_fig
)
names(update_images) <- names(saved_analysis)

for (i in seq_len(length(update_images))) {
  # Save vignette images
  ggsave(
    plot = update_images[[i]],
    filename = here::here(
      "vignettes",
      "figures",
      "discrete",
      glue::glue("{ names(update_images)[i] }.png")
    ),
    width = 7,
    height = 7,
    units = "in",
    device = "png",
    dpi = "screen"
  )
}


bailey-lab/coiaf documentation built on April 26, 2023, 6:32 p.m.