library(rotations)
library(ggplot2)
library(gridExtra)

outFolder <- "C:\\Dropbox\\resistanceResults\\rotations\\"

  # load simulation outputs for some plots
  # beware that each object is called df_res_all
  # some plots are created by directly running the model
  load("df_res_all_rot_nocost_nomig1000.rda") #nocost nomig  
  df_nocost_nomig <- df_res_all
  load("df_res_all_rot10000.rda") #cost
  df_cost <- df_res_all
  load("df_res_all_rot_mig10000.rda") #mig 10k
  df_mig <- df_res_all
  load("df_res_all_rot_cost_mig10000.rda") #costmig 10k
  df_cost_mig <- df_res_all
  load("df_res_all_rot_nocost_nomig10000_mgs5.rda")
  df_nocost_nomig_mgs5 <- df_res_all 
  load("df_res_all_rot_cost10000_mgs5.rda")
  df_cost_mgs5 <- df_res_all  
#looking at classifying runs as to whether succeed_both (2), succeed_rot (1), fail_both (0). Then see what are the best predictors of these ?


# add a column for percent difference between rot & seq
df_cost <- mutate(df_cost, pcent_rot_adv = rot_minus_seq * 100/gens_seq)

df_cost <- mutate(df_cost, success_rot_seq = ifelse(gens_seq==max_gen & gens_rot==max_gen,'rot_seq',
                                               ifelse(gens_rot==max_gen,'rot',
                                               ifelse(gens_seq==max_gen,'seq', 'neither'        
                                                      ))))
# count(df_cost, success_rot_seq)
# 1         neither  5948
# 2             rot   209
# 3         rot_seq  3841
# 4             seq     2

# not very informative
# 2D plot dom_sel, dom_cos, facetted by whether neither succeed, rot only or both
# does show that rot_only and rot_seq more abundant at low dom_sel and high dom_cos  
gg <- ggplot(df_cost, aes(x=dom_sel, y=dom_cos, col=success_rot_seq)) + 
  geom_point(shape=1, size=1, alpha=0.5) +
  #geom_bin2d(bins=5) + #didn't look great
  facet_grid(~success_rot_seq) +
  viridis::scale_color_viridis(discrete = TRUE) #direction=-1)  

plot(gg)

#effectiveness & exposure
gg <- ggplot(df_cost, aes(x=eff, y=expo_hi, col=success_rot_seq)) + 
  geom_point(shape=1, size=1, alpha=0.5) +
  #geom_bin2d(bins=5) + #didn't look great
  facet_grid(~success_rot_seq) +
  viridis::scale_color_viridis(discrete = TRUE) #direction=-1)  

plot(gg)
# have a quick go at a decision tree for chhosing between neither, rot or rot_seq

library(rpart)
library(rpart.plot)

#tree <- rpart::rpart(as.formula(paste(treeResponse,"~",treePredictorString)), data = treeInput, method = 'class')

tree <- rpart::rpart("success_rot_seq ~ cost + expo_hi + male_expo_prop + eff + rr + dom_sel + dom_cos + start_freqs + n_insecticides + rot_interval", data = df_cost, method = 'class')

prp(tree, extra=1, varlen = 0, under=TRUE, 
    cex=0.6, round=0, split.font=1, ycompress = TRUE,
    eq="\n= ", lt="\n< ", ge="\n>= " )

#initial tree results kind of make sense, but not very informative
#doesn't give rot_only as an output ? 
#cost is first branch
#odd that higher rr and expo_hi favour both working
#odd that dom_sel >0.15 favours both working ?    

#try it with the 'successes' removed
df_cost2 <- filter(df_cost, success_rot_seq != 'rot_seq') #'neither'

tree <- rpart::rpart("success_rot_seq ~ cost + expo_hi + male_expo_prop + eff + rr + dom_sel + dom_cos + start_freqs + n_insecticides + rot_interval", data = df_cost2, method = 'class')

prp(tree, extra=1, varlen = 0, under=TRUE, 
    cex=0.6, round=0, split.font=1, ycompress = TRUE,
    eq="\n= ", lt="\n< ", ge="\n>= " ) 

#dom_sel & dom_cos first branches but tree not very helpful

#with 'neither' removed lower cost & lower rr favour rot_only
# trying simple dot & trend plots for gens_rot & seq versus inputs
# work as expected when 'successes' not removed
# n_insecticides doesn't work because not enough points

inputs <- c('cost', 'expo_hi', 'male_expo_prop', 'eff', 'rr', 'dom_sel', 'dom_cos', 'start_freqs', 'n_insecticides', 'rot_interval')

for(input in inputs)
{
    y <- 'gens_rot'    

    gg <- ggplot(df_cost, aes_string(x=input, y=y)) + 

             geom_point(shape='.', size=1, alpha=0.2, show.legend=FALSE) + 
             geom_smooth(linetype='dashed',size=0.5)
    plot(gg)
}

# try it with the 'successes' removed
# doesn't look as good for cost
df_cost2 <- filter(df_cost, success_rot_seq != 'rot_seq')

for(input in inputs)
{
    y <- 'gens_rot'    

    gg <- ggplot(df_cost2, aes_string(x=input, y=y)) + 

             geom_point(shape='.', size=1, alpha=0.2, show.legend=FALSE) + 
             geom_smooth(linetype='dashed',size=0.5)
    plot(gg)
}
# this could do it to show how the different results (neither, rot, or rot_seq)
# have different distributions of input values

inputs <- c('cost', 'expo_hi', 'male_expo_prop', 'eff', 'rr', 'dom_sel', 'dom_cos', 'start_freqs', 'n_insecticides', 'rot_interval')

for(input in inputs)
{

  print(ggplot(df_cost, aes_string(x='success_rot_seq',y=input, color='success_rot_seq')) + 
        #ylim(0,450) +
        #coord_cartesian( ylim=c(0, 350)) +
        geom_violin(draw_quantiles = c(0.25, 0.75), show.legend=FALSE, adjust = 1 ) +
        theme_minimal() +
        #this is an example of how you might facet
        #facet_grid(n_insecticides ~.) +  
        theme(panel.grid.minor.y=element_blank())) #,
        #           axis.text.x=element_blank())
        #geom_jitter(shape='.',height=0,width=0.2) )
}
# I could maybe make the data longer and then facet by input
# think I would filter out, n_insecticides, start_freqs, male_expo_prop & rot_interval that don't show much rship
#TODO improve labels for these
inputs <- c('cost', 'expo_hi', 'eff', 'rr', 'dom_sel', 'dom_cos','start_freqs','rot_interval')
# something like this example from SO https://stackoverflow.com/a/48940750/1718356
#data_long <- tidyr::gather(data, key = type_col, value = categories, -col4)
#ggplot(data_long, aes(x = categories, fill = col4)) +
#  geom_bar() + 
#  facet_wrap(~ type_col, scales = "free_x")

#data_long <- tidyr::gather(data, key = type_col, value = categories, -col4)
#first just select the columns I want
df_cost_long <- select(df_cost,c(inputs,'success_rot_seq'))
#gather data to be longer
df_cost_long <- tidyr::gather(df_cost_long, key=input_name, value=input_value, -success_rot_seq)

#there were just an odd couple of results where seq better, remove them here or earlier
df_cost_long <- filter(df_cost_long, success_rot_seq != 'seq')

ggplot(df_cost_long, aes_string(x='success_rot_seq',y='input_value', color='success_rot_seq')) + 
        #ylim(0,450) +
        #coord_cartesian( ylim=c(0, 350)) +
        geom_violin(draw_quantiles = c(0.25, 0.75), show.legend=FALSE, adjust = 1 ) +
        theme_minimal() +
        #this is an example of how you might facet
        facet_wrap(~input_name, scales = "free_y") +  
        theme(panel.grid.minor.y=element_blank()) #,
        #           axis.text.x=element_blank())
        #geom_jitter(shape='.',height=0,width=0.2) )

\pagebreak Showing that using min_gens_switch_back=5 instead of min_rwr_interval=5 seems to make no difference in main results.

# if I put all scenarios in one df with a column for scenario I can do one ggplot and facet 
# would work for the previous plot too and would simplify adding the new columns
# first add the scenario column
df_nocost_nomig$scenario <- 'no cost or dispersal'
df_cost$scenario <- 'costs'
df_mig$scenario <- 'dispersal'
df_cost_mig$scenario <- 'costs and dispersal'

df_nocost_nomig_mgs5$scenario <- 'no cost or dispersal mgs5'
df_cost_mgs5$scenario <- 'costs mgs5'

df_all <- dplyr::bind_rows(df_nocost_nomig,
                           df_nocost_nomig_mgs5,
                           df_cost,
                           df_mig,
                           df_cost_mig,
                           df_cost_mgs5)
#order for ggplot
df_all$scenario <- factor(df_all$scenario, levels=c('no cost or dispersal','no cost or dispersal mgs5','costs','costs mgs5','dispersal','costs and dispersal'))

# add a column for classification of whether, both, rotation, or neither succeeded
df_all <- mutate(df_all, success = ifelse(gens_seq==max_gen & gens_rot==max_gen,'both',
                                               ifelse(gens_rot==max_gen,'rotation only',
                                               ifelse(gens_seq==max_gen,'sequence only', 'neither'  
                                                      ))))
#count(df_all, scenario, success)

#hack to add subplot labels to facets
abcd <- data.frame(label = c("A","B","C","D","E","F"),
                   scenario = levels(df_all$scenario))

ggplot( df_all, aes(x=success)) + 
        #geom_bar() +
        stat_count(mapping = aes(y=..prop.., group=1)) + #puts proprtions in axes
        scale_y_continuous(breaks=seq(0.2,0.8,0.2)) +
        theme_minimal() +
        ylab('proportion of model runs') +
        facet_wrap(~scenario, ncol=1) + #, scales='free_y') + #can remove free_y later if I do 10k nocost runs    
        geom_text(data=abcd,
                  aes(x = -Inf, y = +Inf, label=label),
                  hjust = 0,
                  vjust = 0.9 )

\pagebreak Using min_gens_switch_back=5 gives greater benefits to sequences when there are no costs or dispersal. I suspect this is an artefact. Try looking at some of the runs.

  #try quicker way with df_all and facet
  df_all <- mutate(df_all, pcent_rot_adv = rot_minus_seq * 100/gens_seq)
  custom_bins <- c(-20,-10,-1,1,10,20,100,450)
  df_all$rot_adv_bins <- cut(df_all$pcent_rot_adv, custom_bins)  
  bin_labs <- levels(df_all$rot_adv_bins)

  #hack to add subplot labels to facets
  abcd <- data.frame(label = c("A","B","C","D","E","F"),
                   scenario = levels(df_all$scenario))

  ggplot( df_all, aes(x=rot_adv_bins)) +  

             #geom_bar() +
             stat_count(mapping = aes(y=..prop.., group=1)) + #puts proprtions in axes
             scale_y_continuous(breaks=seq(0,0.8,0.2)) +
             xlab('Percent benefit of rotation over sequence') +
             ylab('proportion of model runs') +
             #ggtitle('') +
             theme_minimal() +
             facet_wrap(~scenario, ncol=1) +
             scale_x_discrete( limits=bin_labs,
                               labels=c("-20 to -10", "-10 to -1", "-1 to 1", "1 to 10", "10 to 20", "20 to 100", "100 to 500"))+     
             theme(panel.grid.minor.y=element_blank(),
                   axis.text.x = element_text(angle = 20,hjust = 1, vjust = 1)) +
             geom_text(data=abcd,
                  aes(x = -Inf, y = +Inf, label=label),
                  hjust = 0,
                  vjust = 0.9 )

\pagebreak Using min_gens_switch_back=5 example of when sequence does better.

df_seqbetter <- filter( df_all, pcent_rot_adv < -5 &
                                scenario == 'no cost or dispersal mgs5' )

#ooo scenario 5 has rot_adv of -97%
#but remember it is not scenario5 in the original dataset !
#Aha! all these big sequence advantages (well those >50%) 
#were due to artfefact when rotation interval=5 and num insecticides=2 rotation couldn't return to first insecticide because it was only 5 generations ago !!

df_nocost_nomig_mgs5 <- filter( df_all, scenario == 'no cost or dispersal mgs5' ) 

#which(df_nocost_nomig_mgs5$pcent_rot_adv < -90)
#217 1933 3435 4289 5649 7100 7925 7990

scenarios_to_plot <- c(217)

for(scenario in scenarios_to_plot)
{
  #cat(scenario,"\n")

  linputs <- as.list( df_nocost_nomig_mgs5[scenario, 1:15] )

  # run one scenario
  dfres <- do.call(run_rot, linputs)

  ggrot <- rot_plot_resistance(dfres, plot_refuge=FALSE, logy=TRUE, plot=FALSE)

  #now run as sequence
  linputs$rot_interval <- 0
  dfres <- do.call(run_rot, linputs)

  ggseq <- rot_plot_resistance(dfres, plot_refuge=FALSE, logy=TRUE, plot=FALSE) 

  grid.arrange(ggrot, ggseq, ncol=1)

}


ian-hastings/rotations documentation built on Dec. 14, 2020, 11:42 p.m.