library(rotations) library(ggplot2) library(gridExtra) outFolder <- "C:\\Dropbox\\resistanceResults\\rotations\\" # load simulation outputs for some plots # beware that each object is called df_res_all # some plots are created by directly running the model load("df_res_all_rot_nocost_nomig1000.rda") #nocost nomig df_nocost_nomig <- df_res_all load("df_res_all_rot10000.rda") #cost df_cost <- df_res_all load("df_res_all_rot_mig10000.rda") #mig 10k df_mig <- df_res_all load("df_res_all_rot_cost_mig10000.rda") #costmig 10k df_cost_mig <- df_res_all load("df_res_all_rot_nocost_nomig10000_mgs5.rda") df_nocost_nomig_mgs5 <- df_res_all load("df_res_all_rot_cost10000_mgs5.rda") df_cost_mgs5 <- df_res_all
#looking at classifying runs as to whether succeed_both (2), succeed_rot (1), fail_both (0). Then see what are the best predictors of these ? # add a column for percent difference between rot & seq df_cost <- mutate(df_cost, pcent_rot_adv = rot_minus_seq * 100/gens_seq) df_cost <- mutate(df_cost, success_rot_seq = ifelse(gens_seq==max_gen & gens_rot==max_gen,'rot_seq', ifelse(gens_rot==max_gen,'rot', ifelse(gens_seq==max_gen,'seq', 'neither' )))) # count(df_cost, success_rot_seq) # 1 neither 5948 # 2 rot 209 # 3 rot_seq 3841 # 4 seq 2 # not very informative # 2D plot dom_sel, dom_cos, facetted by whether neither succeed, rot only or both # does show that rot_only and rot_seq more abundant at low dom_sel and high dom_cos gg <- ggplot(df_cost, aes(x=dom_sel, y=dom_cos, col=success_rot_seq)) + geom_point(shape=1, size=1, alpha=0.5) + #geom_bin2d(bins=5) + #didn't look great facet_grid(~success_rot_seq) + viridis::scale_color_viridis(discrete = TRUE) #direction=-1) plot(gg) #effectiveness & exposure gg <- ggplot(df_cost, aes(x=eff, y=expo_hi, col=success_rot_seq)) + geom_point(shape=1, size=1, alpha=0.5) + #geom_bin2d(bins=5) + #didn't look great facet_grid(~success_rot_seq) + viridis::scale_color_viridis(discrete = TRUE) #direction=-1) plot(gg)
# have a quick go at a decision tree for chhosing between neither, rot or rot_seq library(rpart) library(rpart.plot) #tree <- rpart::rpart(as.formula(paste(treeResponse,"~",treePredictorString)), data = treeInput, method = 'class') tree <- rpart::rpart("success_rot_seq ~ cost + expo_hi + male_expo_prop + eff + rr + dom_sel + dom_cos + start_freqs + n_insecticides + rot_interval", data = df_cost, method = 'class') prp(tree, extra=1, varlen = 0, under=TRUE, cex=0.6, round=0, split.font=1, ycompress = TRUE, eq="\n= ", lt="\n< ", ge="\n>= " ) #initial tree results kind of make sense, but not very informative #doesn't give rot_only as an output ? #cost is first branch #odd that higher rr and expo_hi favour both working #odd that dom_sel >0.15 favours both working ? #try it with the 'successes' removed df_cost2 <- filter(df_cost, success_rot_seq != 'rot_seq') #'neither' tree <- rpart::rpart("success_rot_seq ~ cost + expo_hi + male_expo_prop + eff + rr + dom_sel + dom_cos + start_freqs + n_insecticides + rot_interval", data = df_cost2, method = 'class') prp(tree, extra=1, varlen = 0, under=TRUE, cex=0.6, round=0, split.font=1, ycompress = TRUE, eq="\n= ", lt="\n< ", ge="\n>= " ) #dom_sel & dom_cos first branches but tree not very helpful #with 'neither' removed lower cost & lower rr favour rot_only
# trying simple dot & trend plots for gens_rot & seq versus inputs # work as expected when 'successes' not removed # n_insecticides doesn't work because not enough points inputs <- c('cost', 'expo_hi', 'male_expo_prop', 'eff', 'rr', 'dom_sel', 'dom_cos', 'start_freqs', 'n_insecticides', 'rot_interval') for(input in inputs) { y <- 'gens_rot' gg <- ggplot(df_cost, aes_string(x=input, y=y)) + geom_point(shape='.', size=1, alpha=0.2, show.legend=FALSE) + geom_smooth(linetype='dashed',size=0.5) plot(gg) } # try it with the 'successes' removed # doesn't look as good for cost df_cost2 <- filter(df_cost, success_rot_seq != 'rot_seq') for(input in inputs) { y <- 'gens_rot' gg <- ggplot(df_cost2, aes_string(x=input, y=y)) + geom_point(shape='.', size=1, alpha=0.2, show.legend=FALSE) + geom_smooth(linetype='dashed',size=0.5) plot(gg) }
# this could do it to show how the different results (neither, rot, or rot_seq) # have different distributions of input values inputs <- c('cost', 'expo_hi', 'male_expo_prop', 'eff', 'rr', 'dom_sel', 'dom_cos', 'start_freqs', 'n_insecticides', 'rot_interval') for(input in inputs) { print(ggplot(df_cost, aes_string(x='success_rot_seq',y=input, color='success_rot_seq')) + #ylim(0,450) + #coord_cartesian( ylim=c(0, 350)) + geom_violin(draw_quantiles = c(0.25, 0.75), show.legend=FALSE, adjust = 1 ) + theme_minimal() + #this is an example of how you might facet #facet_grid(n_insecticides ~.) + theme(panel.grid.minor.y=element_blank())) #, # axis.text.x=element_blank()) #geom_jitter(shape='.',height=0,width=0.2) ) }
# I could maybe make the data longer and then facet by input # think I would filter out, n_insecticides, start_freqs, male_expo_prop & rot_interval that don't show much rship #TODO improve labels for these inputs <- c('cost', 'expo_hi', 'eff', 'rr', 'dom_sel', 'dom_cos','start_freqs','rot_interval') # something like this example from SO https://stackoverflow.com/a/48940750/1718356 #data_long <- tidyr::gather(data, key = type_col, value = categories, -col4) #ggplot(data_long, aes(x = categories, fill = col4)) + # geom_bar() + # facet_wrap(~ type_col, scales = "free_x") #data_long <- tidyr::gather(data, key = type_col, value = categories, -col4) #first just select the columns I want df_cost_long <- select(df_cost,c(inputs,'success_rot_seq')) #gather data to be longer df_cost_long <- tidyr::gather(df_cost_long, key=input_name, value=input_value, -success_rot_seq) #there were just an odd couple of results where seq better, remove them here or earlier df_cost_long <- filter(df_cost_long, success_rot_seq != 'seq') ggplot(df_cost_long, aes_string(x='success_rot_seq',y='input_value', color='success_rot_seq')) + #ylim(0,450) + #coord_cartesian( ylim=c(0, 350)) + geom_violin(draw_quantiles = c(0.25, 0.75), show.legend=FALSE, adjust = 1 ) + theme_minimal() + #this is an example of how you might facet facet_wrap(~input_name, scales = "free_y") + theme(panel.grid.minor.y=element_blank()) #, # axis.text.x=element_blank()) #geom_jitter(shape='.',height=0,width=0.2) )
\pagebreak Showing that using min_gens_switch_back=5 instead of min_rwr_interval=5 seems to make no difference in main results.
# if I put all scenarios in one df with a column for scenario I can do one ggplot and facet # would work for the previous plot too and would simplify adding the new columns # first add the scenario column df_nocost_nomig$scenario <- 'no cost or dispersal' df_cost$scenario <- 'costs' df_mig$scenario <- 'dispersal' df_cost_mig$scenario <- 'costs and dispersal' df_nocost_nomig_mgs5$scenario <- 'no cost or dispersal mgs5' df_cost_mgs5$scenario <- 'costs mgs5' df_all <- dplyr::bind_rows(df_nocost_nomig, df_nocost_nomig_mgs5, df_cost, df_mig, df_cost_mig, df_cost_mgs5) #order for ggplot df_all$scenario <- factor(df_all$scenario, levels=c('no cost or dispersal','no cost or dispersal mgs5','costs','costs mgs5','dispersal','costs and dispersal')) # add a column for classification of whether, both, rotation, or neither succeeded df_all <- mutate(df_all, success = ifelse(gens_seq==max_gen & gens_rot==max_gen,'both', ifelse(gens_rot==max_gen,'rotation only', ifelse(gens_seq==max_gen,'sequence only', 'neither' )))) #count(df_all, scenario, success) #hack to add subplot labels to facets abcd <- data.frame(label = c("A","B","C","D","E","F"), scenario = levels(df_all$scenario)) ggplot( df_all, aes(x=success)) + #geom_bar() + stat_count(mapping = aes(y=..prop.., group=1)) + #puts proprtions in axes scale_y_continuous(breaks=seq(0.2,0.8,0.2)) + theme_minimal() + ylab('proportion of model runs') + facet_wrap(~scenario, ncol=1) + #, scales='free_y') + #can remove free_y later if I do 10k nocost runs geom_text(data=abcd, aes(x = -Inf, y = +Inf, label=label), hjust = 0, vjust = 0.9 )
\pagebreak Using min_gens_switch_back=5 gives greater benefits to sequences when there are no costs or dispersal. I suspect this is an artefact. Try looking at some of the runs.
#try quicker way with df_all and facet df_all <- mutate(df_all, pcent_rot_adv = rot_minus_seq * 100/gens_seq) custom_bins <- c(-20,-10,-1,1,10,20,100,450) df_all$rot_adv_bins <- cut(df_all$pcent_rot_adv, custom_bins) bin_labs <- levels(df_all$rot_adv_bins) #hack to add subplot labels to facets abcd <- data.frame(label = c("A","B","C","D","E","F"), scenario = levels(df_all$scenario)) ggplot( df_all, aes(x=rot_adv_bins)) + #geom_bar() + stat_count(mapping = aes(y=..prop.., group=1)) + #puts proprtions in axes scale_y_continuous(breaks=seq(0,0.8,0.2)) + xlab('Percent benefit of rotation over sequence') + ylab('proportion of model runs') + #ggtitle('') + theme_minimal() + facet_wrap(~scenario, ncol=1) + scale_x_discrete( limits=bin_labs, labels=c("-20 to -10", "-10 to -1", "-1 to 1", "1 to 10", "10 to 20", "20 to 100", "100 to 500"))+ theme(panel.grid.minor.y=element_blank(), axis.text.x = element_text(angle = 20,hjust = 1, vjust = 1)) + geom_text(data=abcd, aes(x = -Inf, y = +Inf, label=label), hjust = 0, vjust = 0.9 )
\pagebreak Using min_gens_switch_back=5 example of when sequence does better.
df_seqbetter <- filter( df_all, pcent_rot_adv < -5 & scenario == 'no cost or dispersal mgs5' ) #ooo scenario 5 has rot_adv of -97% #but remember it is not scenario5 in the original dataset ! #Aha! all these big sequence advantages (well those >50%) #were due to artfefact when rotation interval=5 and num insecticides=2 rotation couldn't return to first insecticide because it was only 5 generations ago !! df_nocost_nomig_mgs5 <- filter( df_all, scenario == 'no cost or dispersal mgs5' ) #which(df_nocost_nomig_mgs5$pcent_rot_adv < -90) #217 1933 3435 4289 5649 7100 7925 7990 scenarios_to_plot <- c(217) for(scenario in scenarios_to_plot) { #cat(scenario,"\n") linputs <- as.list( df_nocost_nomig_mgs5[scenario, 1:15] ) # run one scenario dfres <- do.call(run_rot, linputs) ggrot <- rot_plot_resistance(dfres, plot_refuge=FALSE, logy=TRUE, plot=FALSE) #now run as sequence linputs$rot_interval <- 0 dfres <- do.call(run_rot, linputs) ggseq <- rot_plot_resistance(dfres, plot_refuge=FALSE, logy=TRUE, plot=FALSE) grid.arrange(ggrot, ggseq, ncol=1) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.