knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(CoreMicro) library(MCMCpack) library(tidyverse) library(ggplot2) library(RColorBrewer) library(gridExtra) library(patchwork)
ntaxa<-1000 ncore<-25 nsites<-50 nreads<-2e06 nsims<-250 corenoncoreratio <-c(1,2,5,10,25) p.core <-corenoncoreratio/(ntaxa - ncore + corenoncoreratio*ncore) p.noncore <-( 1-(p.core*ncore) )/ (ntaxa - ncore) theta <-c(1, 2, 10, 25, 50) #intensity parameter ## do a set of simulations across input parameter values and number of replicates ------ ## set up the storage of input and output for the 25 simulation conditions ## quantify TP (true positives) and FP (false positives) sim.df<- data.frame(expand.grid(corenoncoreratio=corenoncoreratio, theta=theta), p.core=rep(p.core, length(theta)), p.noncore=rep(p.noncore, length(theta)), HardCut_TP= numeric(length(p.core) * length(theta)), PropRead_TP = numeric(length(p.core) * length(theta)), PropReps_TP = numeric(length(p.core) * length(theta)), PropReadReps_TP = numeric(length(p.core) * length(theta)), HardCut_FP= numeric(length(p.core) * length(theta)), PropRead_FP = numeric(length(p.core) * length(theta)), PropReps_FP = numeric(length(p.core) * length(theta)), PropReadReps_FP = numeric(length(p.core) * length(theta)), HardCut_FP_num= numeric(length(p.core) * length(theta)), PropRead_FP_num = numeric(length(p.core) * length(theta)), PropReps_FP_num = numeric(length(p.core) * length(theta)), PropReadReps_FP_num = numeric(length(p.core) * length(theta)), HardCut_TP_num= numeric(length(p.core) * length(theta)), PropRead_TP_num = numeric(length(p.core) * length(theta)), PropReps_TP_num = numeric(length(p.core) * length(theta)), PropReadReps_TP_num = numeric(length(p.core) * length(theta)) ) #error2.5 = numeric(length(p.core) * length(theta)), #error97.5 = numeric(length(p.core) * length(theta))) #sim.df for(trt in 1:(nrow(sim.df))){ # create empty vectors to store output for each method for every simulation tmp.err.HardCutTP <- numeric(nsims) tmp.err.PropReadTP <- numeric(nsims) tmp.err.PropRepsTP <- numeric(nsims) tmp.err.PropReadRepsTP <- numeric(nsims) tmp.err.HardCutFP <- numeric(nsims) tmp.err.PropReadFP <- numeric(nsims) tmp.err.PropRepsFP <- numeric(nsims) tmp.err.PropReadRepsFP <- numeric(nsims) #tmp.err.HardCutTP_num <- numeric(nsims) #tmp.err.PropReadTP_num <- numeric(nsims) #tmp.err.PropRepsTP_num <- numeric(nsims) #tmp.err.PropReadRepsTP_num <- numeric(nsims) #tmp.err.HardCutFP_num <- numeric(nsims) #tmp.err.PropReadFP_num <- numeric(nsims) #tmp.err.PropRepsFP_num <- numeric(nsims) #tmp.err.PropReadRepsFP_num <- numeric(nsims) for(i in 1:nsims){ # create a single OTU table for all four methods to be compared against replicateSim <- round(rdirichlet(nsites, c(rep(sim.df$p.core[trt] * sim.df$theta[trt], ncore), rep(sim.df$p.noncore[trt] * sim.df$theta[trt], ntaxa-ncore))) * nreads) # find true and false positives # true positives are those that meet in the criterion in the first 1:ncore taxa. The remaining taxa are not part of the core, so any taxa meeting the criterion in that set are false positives # hard cut off method # tmp.err.HardCut[i] <- sum(colSums((replicateSim >= 25)) >= 5) - ncore tmp.err.HardCutTP[i] <- sum((colSums((replicateSim >= 25)) >= 5)[1:ncore]) tmp.err.HardCutFP[i] <- sum((colSums((replicateSim >= 25)) >= 5)[(ncore+1):ntaxa]) # Prop reads #tmp.err.PropRead[i] <- sum(cumsum(sort(colSums(replicateSim)/sum(rowSums(replicateSim))))>=0.25) - ncore tmp.err.PropReadTP[i] <- sum((cumsum(sort(colSums(replicateSim)/sum(rowSums(replicateSim)), decreasing = T))<= 0.75)[1:ncore]) tmp.err.PropReadFP[i] <- sum((cumsum(sort(colSums(replicateSim)/sum(rowSums(replicateSim)), decreasing = T))<= 0.75)[(ncore+1):ntaxa]) # Prop Reps #tmp.err.PropReps[i] <- sum(colSums(replicateSim)>10*nrow(replicateSim)) - ncore #Sites in rows, otus in Columns tmp.err.PropRepsTP[i] <- sum((colSums(replicateSim>0) >= (0.5 * nsites))[1:ncore]) tmp.err.PropRepsFP[i] <- sum((colSums(replicateSim>0) >= (0.5 * nsites))[(ncore+1):ntaxa]) # Proportion of reps and reads #tmp.err.PropReadReps[i] <- sum((colSums(replicateSim>0) >= (0.5 * nsites)) & (colSums(replicateSim) >= sum(replicateSim) * 1/1000) ) - ncore tmp.err.PropReadRepsTP[i] <- sum(((colSums(replicateSim>0) >= (0.5 * nsites)) & (colSums(replicateSim) >= sum(replicateSim) * 1/1000) )[1:ncore]) tmp.err.PropReadRepsFP[i] <- sum(((colSums(replicateSim>0) >= (0.5 * nsites)) & (colSums(replicateSim) >= sum(replicateSim) * 1/1000) )[(ncore+1):ntaxa]) } # fill the dataframe from the chunk above with these values for later plotting sim.df$HardCut_TP[trt] <- mean(tmp.err.HardCutTP) / ncore sim.df$PropRead_TP[trt] <- mean(tmp.err.PropReadTP) / ncore sim.df$PropReps_TP[trt] <- mean(tmp.err.PropRepsTP) / ncore sim.df$PropReadReps_TP[trt] <- mean(tmp.err.PropReadRepsTP) / ncore sim.df$HardCut_FP[trt] <- mean(tmp.err.HardCutFP) / (ntaxa - ncore) sim.df$PropRead_FP[trt] <- mean(tmp.err.PropReadFP) / (ntaxa - ncore) sim.df$PropReps_FP[trt] <- mean(tmp.err.PropRepsFP) / (ntaxa - ncore) sim.df$PropReadReps_FP[trt] <- mean(tmp.err.PropReadRepsFP) / (ntaxa - ncore) sim.df$HardCut_TP_num[trt] <- mean(tmp.err.HardCutTP) sim.df$PropRead_TP_num[trt] <- mean(tmp.err.PropReadTP) sim.df$PropReps_TP_num[trt] <- mean(tmp.err.PropRepsTP) sim.df$PropReadReps_TP_num[trt] <- mean(tmp.err.PropReadRepsTP) sim.df$HardCut_FP_num[trt] <- mean(tmp.err.HardCutFP) sim.df$PropRead_FP_num[trt] <- mean(tmp.err.PropReadFP) sim.df$PropReps_FP_num[trt] <- mean(tmp.err.PropRepsFP) sim.df$PropReadReps_FP_num[trt] <- mean(tmp.err.PropReadRepsFP) }
myPalette <- c("#800000", "#A04040", "#DFBFBF", "#ffffff", "#0b45a3") myPalette2 <- c("#ffffff", "#C2D1E8", "#678BC6", "#0b45a3") myPalette3 <- c("#ffffff", "#C2D1E8")
Here we change the TP-FP to an integer not the ratio. With the ratios we were assuming all calls TP vs. FP were equally valuable. With this we just look at the differencea and make no assumiption of the value.
sim.df$HardCut_ratio <- sim.df$HardCut_TP_num - sim.df$HardCut_FP_num sim.df$PropRead_ratio <- sim.df$PropRead_TP_num - sim.df$PropRead_FP_num sim.df$PropReps_ratio <- sim.df$PropReps_TP_num - sim.df$PropReps_FP_num sim.df$PropReadReps_ratio <- sim.df$PropReadReps_TP_num - sim.df$PropReadReps_FP_num sim.df_all <- sim.df %>% gather(method, mean, c(5:8, 9:12, 21:24)) %>% separate(method, into = c('method', 'error'), sep = "_") %>% mutate(error = case_when( error == "TP" ~ "True Positive", error == "FP" ~ "False Positive", error == "ratio" ~ "TP-FP" ) %>% factor(levels = c("True Positive", "False Positive", "TP-FP")) ) %>% mutate(method = case_when( method == "HardCut" ~ "Hard Cutoff", method == "PropRead" ~ "Summation Sequence Reads", method == "PropReps" ~ "Proportion of Sequence Replicates", method == "PropReadReps" ~ "Proportion of Sequence Reads and Replicates" ) %>% factor(levels = c("Summation Sequence Reads", "Proportion of Sequence Replicates", "Proportion of Sequence Reads and Replicates", "Hard Cutoff")) )
tp <- sim.df_all %>% filter(sim.df_all$error == "True Positive") levels(tp$method)[1] <- 'Abundance' levels(tp$method)[2] <- 'Occupancy' levels(tp$method)[3] <- 'Abundance and Occupancy' levels(tp$method)[4] <- 'Hard Cutoffs' tp_plot <- ggplot(tp, aes(x = as.factor(corenoncoreratio), y = as.factor(theta))) + geom_tile(aes(fill = mean), colour = "white") + facet_grid(error~method, labeller = label_wrap_gen(multi_line = TRUE)) + theme(legend.text=element_text(size=2)) + theme_minimal() + scale_fill_gradientn(colours = myPalette2, values = scales::rescale(c(-1000, -500, -25,0, 100, 500, 1000))) + geom_text(aes(label = round(mean, digits = 2)), size = 8) + labs(x = expression(paste("Ratio of core:non-core abundance", " (", pi, " core / ", pi, " non-core)" )), y = expression(paste("Variance (", theta, ")")), fill = "Mean Error") + theme(strip.text.x = element_text(size = 12)) + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) + theme(text = element_text(size = 20)) + guides(fill=guide_legend(title="Proportion of\nTrue Positives"))+ theme(strip.text.x = element_text(size = 20)) + theme(text = element_text(size = 20)) + theme(legend.text=element_text(size=22)) + theme(legend.title = element_text(size = 24)) tp_plot
fp <- sim.df_all %>% filter(sim.df_all$error == "False Positive") levels(fp$method)[1] <- 'Abundance' levels(fp$method)[2] <- 'Occupancy' levels(fp$method)[3] <- 'Abundance and Occupancy' levels(fp$method)[4] <- 'Hard Cutoffs' fp_plot <- ggplot(fp, aes(x = as.factor(corenoncoreratio), y = as.factor(theta))) + geom_tile(aes(fill = mean), colour = "white") + facet_grid(error~method, labeller = label_wrap_gen(multi_line = TRUE)) + theme(legend.text=element_text(size=2)) + theme_minimal() + scale_fill_gradientn(colours = myPalette2, values = scales::rescale(c(1000, 500, 25,0, -100, -500, -1000))) + geom_text(aes(label = round(fp$mean, digits = 2)), size = 8) + labs(x = expression(paste("Ratio of core:non-core abundance", " (", pi, " core / ", pi, " non-core)" )), y = expression(paste("Variance (", theta, ")")), fill = "Mean Error") + theme(strip.text.x = element_text(size = 0)) + theme(text = element_text(size = 20)) + guides(fill=guide_legend(title="Proportion of\nFalse Positives")) + theme(legend.text=element_text(size=22)) + theme(legend.title = element_text(size = 24)) fp_plot
tf <- sim.df_all %>% filter(sim.df_all$error == "TP-FP") levels(tf$method)[1] <- 'Abundance' levels(tf$method)[2] <- 'Occupancy' levels(tf$method)[3] <- 'Abundance and Occupancy' levels(tf$method)[4] <- 'Hard Cutoffs' breaks <- c(-900, -400, 0, 25) tf_plot <- ggplot(tf, aes(x = as.factor(corenoncoreratio), y = as.factor(theta))) + geom_tile(aes(fill = mean), colour = "white") + #facet_grid(rows = error ~ method) + facet_wrap(~ method, ncol = 2) + theme(legend.text=element_text(size=12)) + theme_minimal() + theme(axis.title = element_text(size = 28)) + scale_fill_gradientn(colours = myPalette, breaks = breaks) + geom_text(aes(label = round(tf$mean, digits = 2)), size=8) + labs(x = expression(paste("Ratio of core:non-core abundance", " (", pi, " core / ", pi, " non-core)" )), y = expression(paste("Variance (", theta, ")")), fill = "Mean Error") + theme(strip.text.x = element_text(size = 24)) + theme(text = element_text(size = 20)) + guides(fill=guide_legend(title="TP-FP")) + theme(legend.text=element_text(size=22)) + theme(legend.title = element_text(size = 24)) tf_plot
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.