inst/doc/L4_IllustrativeExamples.R

## ----setup, message=FALSE-----------------------------------------------------
library(phylosamp)
library(ggplot2)
library(RColorBrewer)
library(cowplot)

## ----gen_dist_compare_ebov, fig.width=6, fig.height=4, warning=FALSE----------

# ebola-like pathogen
R <- 1.5
mut_rate <- 1

# use simulated generation distributions
data("genDistSim")
mean_gens_pdf <- as.numeric(genDistSim[genDistSim$R == R, -(1:2)])

# get theoretical genetic distance dist based on mutation rate and generation parameters
dists <- as.data.frame(gendist_distribution(mut_rate = mut_rate, 
                                            mean_gens_pdf = mean_gens_pdf, 
                                            max_link_gens = 1))

# reshape dataframe for plotting
dists <- reshape2::melt(dists, 
                        id.vars = "dist", 
                        variable.name = "status", 
                        value.name = "prob")

# get sensitivity and specificity using the same paramters
roc_calc <- gendist_roc_format(cutoff = 1:(max(dists$dist)-1), 
                               mut_rate = mut_rate,
                               mean_gens_pdf = mean_gens_pdf)
  
# get the optimal value for the ROC plot
roc_optim <- optim_roc_threshold(roc_calc)
threshold <- roc_optim$cutoff
optim_point <- c(roc_optim$specificity, roc_optim$sensitivity)

# plot the distributions of genetic distance
pal <- RColorBrewer::brewer.pal(5, "PuOr")
  
p1 <- ggplot(dists, aes(x=dist, y=prob, fill=status)) +
    geom_bar(alpha=0.5, stat="identity", position="identity") +
    scale_fill_manual(name="", values=c(pal[5], pal[2])) +
    scale_x_continuous(limits = c(-1,35), expand = c(0,0)) +
    scale_y_continuous(limits = c(0,0.4), expand = c(0,0)) +
    xlab('Genetic distance') + ylab('Density') +
    theme_classic() + theme(legend.position='none') +
    geom_vline(xintercept=threshold, linetype=2, size=0.5, color = "black")
  
# add ROC curve on top
r1 <- ggplot(data=roc_calc, aes(x=specificity, y=sensitivity)) +
    geom_line(size=1.5) + xlim(0,1) + ylim(0,1) +
    geom_point(x=optim_point[1], y=optim_point[2],
               size = 3, stroke=1, shape=21, fill = "chartreuse") +
    geom_abline(slope=1, intercept = 0, linetype=2, alpha=0.5) +
    xlab("1-specificity") +
    theme_bw()
  
cowplot::ggdraw(p1) + cowplot::draw_plot(r1, hjust=-0.16, vjust=-0.16, scale=0.6)


## ----optim_value--------------------------------------------------------------
threshold

## ----sens_spec_values---------------------------------------------------------
roc_optim$sensitivity ## sensitivity
1 - roc_optim$specificity ## specificity

## ----gen_dist_compare_mev-----------------------------------------------------

# faster-spreading pathogen
R <- 3
mut_rate <- 0.2

mean_gens_pdf <- genDistSim[genDistSim["R"]==R][3:dim(genDistSim)[2]]


## ----gen_dist_mev_plot, fig.width=6, fig.height=4, echo=FALSE, warning=FALSE----

# get theoretical genetic distance dist based on mutation rate and generation parameters
dists <- as.data.frame(gendist_distribution(mut_rate = mut_rate, 
                                            mean_gens_pdf = mean_gens_pdf, max_link_gens = 1))
# reshape dataframe for plotting
dists <- reshape2::melt(dists, id.vars = "dist", variable.name = "status", value.name = "prob")

# get sensitivity and specificity using the same paramters
roc_calc <- gendist_roc_format(cutoff = 1:(max(dists$dist)-1), mut_rate = mut_rate,
                          mean_gens_pdf = mean_gens_pdf)
  
# get the optimal value for the ROC plot using closest-to-corner metric
roc_optim <- roc_calc
roc_optim$dist <- sqrt((1-roc_optim$sensitivity)^2 + (roc_optim$specificity)^2)
roc_optim <- roc_optim[2:(dim(roc_optim)[1]),] # remove first row with zero threshold
optim_value <- min(roc_optim$dist)
threshold <- roc_optim[roc_optim$dist==optim_value,]$cutoff
optim_point <- c(roc_optim[roc_optim$cutoff==threshold,]$specificity,
                 roc_optim[roc_optim$cutoff==threshold,]$sensitivity)
  
# plot the distributions of genetic distance
pal <- RColorBrewer::brewer.pal(5, "PuOr")
  
p1 <- ggplot(dists, aes(x=dist, y=prob, fill=status)) +
    geom_bar(alpha=0.5, stat="identity", position="identity") +
    scale_fill_manual(name="", values=c(pal[5], pal[2])) +
    scale_x_continuous(limits = c(-1,35), expand = c(0,0)) +
    scale_y_continuous(limits = c(0,0.4), expand = c(0,0)) +
    xlab('Genetic distance') + ylab('Density') +
    theme_classic() + theme(legend.position='none') +
    geom_vline(xintercept=threshold, linetype=2, size=0.5, color = "black")
  
# add ROC curve on top
r1 <- ggplot(data=roc_calc, aes(x=specificity, y=sensitivity)) +
    geom_line(size=1.5) + xlim(0,1) + ylim(0,1) +
    geom_point(x=optim_point[1], y=optim_point[2],
               size = 3, stroke=1, shape=21, fill = "chartreuse") +
    geom_abline(slope=1, intercept = 0, linetype=2, alpha=0.5) +
    xlab("1-specificity") +
    theme_bw()
  
cowplot::ggdraw(p1) + cowplot::draw_plot(r1, hjust=-0.16, vjust=-0.16, scale=0.6)


## ----optim_value_slow---------------------------------------------------------
threshold

## ----sens_spec_values_slow----------------------------------------------------
roc_optim[roc_optim$cutoff==threshold,]$sensitivity ## sensitivity
1 - roc_optim[roc_optim$cutoff==threshold,]$specificity ## specificity

## ----fdr_final----------------------------------------------------------------
translink_tdr(sensitivity=0.9963402, specificity=0.9801562, 
              rho=0.20, M=200, R=1) # ebola-like pathogen
translink_tdr(sensitivity=0.8187308, specificity=0.8539157, 
              rho=0.20, M=200, R=1) # slower-mutating pathogen

Try the phylosamp package in your browser

Any scripts or data that you put into this service are public.

phylosamp documentation built on May 31, 2023, 5:23 p.m.