In bochocki/correlatedtraits: Data and Analyses for [paper title]

knitr::opts_chunk$set(echo = F, message = F, error = F)
dat <- read.csv("C:/Users/tm9/Dropbox/Manuscripts/Brad demography dispersal correlation/ct_sim_out_tidy.csv")
#dat <- read.csv("D:/Dropbox/Manuscripts/Brad demography dispersal correlation/ct_sim_out_tidy.csv")
library(tidyverse)
library(data.table) 
library(scales)

multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

This document summarizes the output of Brad's simulation study. The output is gathered and tidied in a separate script ('wrangle_sim_dat.R'). This script imports the tidied data frame (from Tom's dropbox folder), generates graphics, and provides some interpretation.

The first thing I want to check is the simulation sample sizes. The target was 1000 but there were several missing files in the sequence 1:1000. I think Brad ran extras when this occurred so that the sample size is always in fact 1000 but I did not import these into my tidy summary (the loop only ran to 1000). So let's see what we ended up with:

dat %>% 
  group_by(gen,location,P_var,h2,rho_G,rho_E) %>% 
  summarise(count = n()) %>% 
  ggplot()+geom_histogram(aes(x=count))

For all treatments there are at least 950 reps. Apparently there are two groups, not sure why. Regardless, I think these are sufficient sample sizes to proceed. 1000 is an arbitrary sample size anyway.

Speed / Extent

Now let's calculate the mean and CV of invasion extent in generation 20 across treatments.

extent_dat <- dat %>%
  filter(gen == 20,
         location == "rightmost1") %>% 
  group_by(P_var,h2,rho_G,rho_E) %>%  
  summarise(mean_extent = mean(patch),
            CV_extent = sd(patch) / mean(patch))

Here I will try to re-create the figures from Brad's thesis (beetle-specific h2 and V values).

extent_dat_beetle <- extent_dat %>% 
  filter(h2 == "default"| h2 == "none", P_var == "default") 

A <- ggplot(data =  filter(extent_dat_beetle,rho_E==-0.9 | rho_E==0 | rho_E==0.9))+
  geom_line(aes(x=rho_G,y=mean_extent,linetype=h2,color=as.factor(rho_E)))

B <- ggplot(data =  filter(extent_dat_beetle,rho_G==-0.9 | rho_G==0 | rho_G==0.9))+
  geom_line(aes(x=rho_E,y=mean_extent,linetype=h2,color=as.factor(rho_G)))

C <- ggplot(data =  filter(extent_dat_beetle,rho_E==-0.9 | rho_E==0 | rho_E==0.9))+
  geom_line(aes(x=rho_G,y=CV_extent,linetype=h2,color=as.factor(rho_E)))

D <- ggplot(data =  filter(extent_dat_beetle,rho_G==-0.9 | rho_G==0 | rho_G==0.9))+
  geom_line(aes(x=rho_E,y=CV_extent,linetype=h2,color=as.factor(rho_G)))

multiplot(A,B,C,D,cols=2)

Yes, this looks like a good correspondence to Brad's original thesis figure. Let's see if I can find a cleaner way to present the data. For the beetle parameters, genetic correlations had a stronger accelerating effect than environmental correlations, and I would like to emphasize that result visually, while keeping emphasis on the fact that evolving invasions were always faster than non-evolving ones, even with very strong negative correlations.

A <- ggplot()+
  geom_line(data =  filter(extent_dat_beetle,rho_E==-0.16),
            aes(x=rho_G,y=mean_extent,linetype=h2))+
  geom_line(data =  filter(extent_dat_beetle,rho_G==-0.37),
            aes(x=rho_E,y=mean_extent,linetype=h2,col="E"))+
  geom_vline(xintercept = c(-0.37,-0.16))
B <- ggplot()+
  geom_line(data =  filter(extent_dat_beetle,rho_E==-0.16),
            aes(x=rho_G,y=CV_extent,linetype=h2))+
  geom_line(data =  filter(extent_dat_beetle,rho_G==-0.37),
            aes(x=rho_E,y=CV_extent,linetype=h2,col="blue"))

multiplot(A,B,cols=2)

Now I would like to quantify and display fold-change in extent and CV, relative to the no-evolution case.

mean_extent_fold_change <- extent_dat_beetle %>% 
  select(-CV_extent,P_var) %>% 
  spread(key = h2, value = mean_extent) %>% 
  mutate(fold_change_mean = default / none)

beetle_change <- mean_extent_fold_change %>% filter(rho_G==-0.37, rho_E==-0.16)

with(mean_extent_fold_change,{
     plot(rho_G,fold_change_mean,type="n",ylim=c(1,1.5),
          xlab=expression(paste("Genetic correlation (",rho,G,")")),
          ylab="Fold-change in invasion extent")
     lines(rho_G[rho_E == -.9],fold_change_mean[rho_E == -.9],lwd=1,lty=1)
     lines(rho_G[rho_E == -.16],fold_change_mean[rho_E == -.16],lwd=4,col=alpha("blue",0.75))
     lines(rho_G[rho_E == 0],fold_change_mean[rho_E == 0],lwd=1,lty=2)
     lines(rho_G[rho_E == .9],fold_change_mean[rho_E == .9],lwd=1,lty=3)
     #abline(h=1,col="gray")
     arrows(-0.37,0.8,-0.37,beetle_change$fold_change_mean,col=alpha("blue",0.75),length=0)
     arrows(-1.1,beetle_change$fold_change_mean,-0.37,beetle_change$fold_change_mean,col=alpha("blue",0.75),length=0)
     legend("topleft",bty="o",title=expression(paste("Environmental correlation (",rho,E,")")),cex=0.8,
            legend=c(-0.9,-0.16,0.0,0.9),lty=c(1,1,2,3),lwd=c(1,4,1,1),
            col=c("black",alpha("blue",0.75),"black","black"))
})

Here is an interesting discovery. Consider these two figures for mean invasion extent, shown in both raw distances and fold-change due to evolution. The figure on the left shows that both genetic and environmental correlations can speed up invasions, and that evolving invasions are always faster than invasions with no genetic variation. So, as long as there is some genetic variation, the fastest invasions occur when genetic and environmental correlations are both strongly positive. What is more suprising, though, is the right panel, which shows the fold change in invasion speed due to evolution. Genetic correlations that are more positive cause greater evolutionary acceleration, because selection can cherry-pick the combination of high fertility and high dispersal. But look at how the environmental correlations stack up. Evolution causes the greatest acceleration, proportionally, when environmental correlations are negative. I spent a long time scratching my head about this, but I think I get it. With positive E correlations, invasions are already going fast, because high fertility and high dispersal show up together at the front. In this case, there is only so much more that evolution can do to make things faster, given a fixed amount of total phenotypic variance. In contrast, negative E correlations would be holding invasions back, so evolution working with positive G correlations would counter-act the influence of environment, and would therefore hold greater potential to speed things up.

A <- ggplot(data =  extent_dat_beetle)+
  geom_line(aes(x=rho_G,y=mean_extent,linetype=h2,color=as.factor(rho_E)))
B<-ggplot(data =  mean_extent_fold_change)+
  geom_line(aes(x=rho_G,y=fold_change_mean,color=as.factor(rho_E)))
multiplot(A,B,cols=2)

Now same for CV

CV_fold_change <- extent_dat_beetle %>% 
  select(-mean_extent,P_var) %>% 
  spread(key = h2, value = CV_extent) %>% 
  mutate(fold_change_CV = default / none)

A <- ggplot(data =  extent_dat_beetle)+
  geom_line(aes(x=rho_G,y=CV_extent,linetype=h2,color=as.factor(rho_E)))
B<-ggplot(data =  CV_fold_change)+
  geom_line(aes(x=rho_G,y=fold_change_CV,color=as.factor(rho_E)))
multiplot(A,B,cols=2)

Generalizing beyond beetles

This is the full set of treatments for extent. On a quick inspection, a few things stand out. First, the qualitative trend from the beetle system seems to hold: genetic correlations speed up invasions and evolving invasions are just about always faster than non-evolving ones, even with very strong negative correlations. There seems to be one exception to this: equally high trait heritabilities with double the phenotypic variance. In this case, the strongest negative correlation goes slower than the no-evolution case, presumably because this increases the efficiency of selection against high fertility (I am assuming it is ferrtility that loses out at the expense of dispersal, but we can test this with the trait data). There is a potentially interesting result in the swap treatment: invasions are generally slower than the default treatment. Recall that 'swap' means that fertility has higher heritability than dispersal. So a few lines of evidence are pointing to the idea that evolution of dispersal is a more important accelerating force than evolution of fertility.

#ggplot(data =  filter(extent_dat, rho_E==-0.9 | rho_E==0 | rho_E==0.9))+
 # geom_line(aes(x=rho_G,y=mean_extent,color=as.factor(rho_E)))+
  #facet_grid(P_var ~ h2)

extent_dat %>% 
  select(-CV_extent) %>% 
  spread(key = h2, value = mean_extent) %>% 
  mutate(fold_change_default = default / none,
         fold_change_swap = swap / none,
         fold_change_eqhi = eqhi / none,
         fold_change_eqlo = eqlo / none) %>% 
  select(P_var,rho_G,rho_E,fold_change_default,fold_change_swap,fold_change_eqhi,fold_change_eqlo) %>%      
  gather(fold_change_default,fold_change_swap,fold_change_eqhi,fold_change_eqlo,
         key = h2, value = fold_change) %>% 
  ggplot()+
  geom_line(aes(x=rho_G,y=log(fold_change),color=as.factor(rho_E)))+
  facet_grid(P_var ~ h2)

And here are the same plots but for CV instead. This looks the way that I expected it should. No effects of E correlations on variance, greater evolutionary potential, greater increase in variance. Again, there is an interesting asymmetry in default vs swap treatments. One problem here is that we did not control for trait differences in phenotypic variance. They are fairly similar in magnitude (dispersal: 0.41, fertility: 0.35) but the direction of difference could possibly explain why the default invasions (greater heritability of the trait with more phenotypic variance) go faster than the swap invasions (greater heritability of the trait with less phenotypic variance.)

extent_dat %>% 
  select(-mean_extent) %>% 
  spread(key = h2, value = CV_extent) %>% 
  mutate(fold_change_default = default / none,
         fold_change_swap = swap / none,
         fold_change_eqhi = eqhi / none,
         fold_change_eqlo = eqlo / none) %>% 
  select(P_var,rho_G,rho_E,fold_change_default,fold_change_swap,fold_change_eqhi,fold_change_eqlo) %>% 
  gather(fold_change_default,fold_change_swap,fold_change_eqhi,fold_change_eqlo,
         key = h2, value = fold_change) %>% 
  ggplot()+
  geom_line(aes(x=rho_G,y=log(fold_change),color=as.factor(rho_E)))+
  facet_grid(P_var ~ h2)

Traits

Moving on to trait evolution. Here I need to contrast the generation 1 / generation 20 phenotype values. Here I am calculating the fold-change in trait values, where the hard-coded numbers are the initial trait values of the simulations. I can calculate this fold change for each replicate, then take the average and CV across reps.

dat %>% 
  group_by(location,P_var,h2,rho_G,rho_E,gen)

trait_dat <- dat %>% 
  mutate(fold_change_D = Mean_D / 1.63,
            fold_change_r = Mean_r / 2.74)%>%
  group_by(location,P_var,h2,rho_G,rho_E,gen) %>% 
  select(location,P_var,h2,rho_G,rho_E,gen,Mean_D,Mean_r,fold_change_D,fold_change_r)  %>% 
  summarise(log_patches = mean(Mean_D),
            log_kids = mean(Mean_r),
            CV_patches = sd(Mean_D)/log_patches,
            CV_kids = sd(Mean_r)/log_kids,
            mean_change_D = mean(fold_change_D),
            mean_change_r = mean(fold_change_r))
  ## note: phenotypes are on log scale. Need to decide whether fold change should be on log scale or not

both_traits_mean_change <- trait_dat %>% filter(gen==20,location=="rightmost1") %>%   select(P_var,location,h2,rho_G,mean_change_D,mean_change_r) %>% 
  gather(mean_change_D,mean_change_r,key="trait",value="fold_change")

both_traits_mean_change %>%  
  ggplot()+
  geom_line(aes(x=rho_G,y=log(fold_change),color=as.factor(rho_E),linetype=P_var))+
  geom_hline(yintercept = 0)+
  facet_grid(trait~h2)



both_traits_CV <- trait_dat %>% filter(gen==20,location=="rightmost1") %>%   select(P_var,location,h2,rho_G,CV_patches,CV_kids) %>% 
  gather(CV_patches,CV_kids,key="trait",value="CV")

  both_traits_CV %>% 
  ggplot()+
  geom_line(aes(x=rho_G,y=CV,color=as.factor(rho_E),linetype=P_var))+
  geom_hline(yintercept = 0)+
  facet_grid(trait~h2)

  dist_mean %>% 
  ggplot()+
  geom_line(aes(x=rho_G,y=(log_patches),linetype=P_var))+
  facet_grid(location~h2)

trait_dat <- dat %>% 
  mutate(fold_change_D = Mean_D / 1.63,
            fold_change_r = Mean_r / 2.74)%>%
  group_by(location,P_var,h2,rho_G,rho_E,gen) %>% 
  select(location,P_var,h2,rho_G,rho_E,gen,Mean_D,Mean_r,fold_change_D,fold_change_r)  %>% 
  summarise(log_patches = mean(Mean_D),
            log_kids = mean(Mean_r),
            CV_patches = sd(Mean_D)/log_patches,
            CV_kids = sd(Mean_r)/log_kids,
            mean_change_D = mean(fold_change_D),
            mean_change_r = mean(fold_change_r))

Here I am taking a different approach. Instead of comparing final to initial trait values, I am comparing trait values to 'control' treatments with h2 = 0. Since this is how I estimated effects on invasion dynamics, that may also be the right way to analyze traits to make them comparable.

trait_dat_trt_contrast <- dat %>% 
  group_by(location,P_var,h2,rho_G,rho_E,gen) %>% 
  summarise(log_patches = mean(Mean_D),
            log_kids = mean(Mean_r),
            CV_patches = sd(Mean_D)/log_patches,
            CV_kids = sd(Mean_r)/log_kids)

mean_dist_contrast <- trait_dat_trt_contrast %>% 
  select(-log_kids,-CV_patches,-CV_kids) %>% 
  filter(gen==20) %>% 
  spread(key = h2, value = log_patches)%>% 
  mutate(fold_change_default = default / none,
         fold_change_swap = swap / none,
         fold_change_eqhi = eqhi / none,
         fold_change_eqlo = eqlo / none) %>% 
  select(-default,-eqhi,-eqlo,-none,-swap)%>% 
  gather(fold_change_default,fold_change_swap,fold_change_eqhi,fold_change_eqlo, key = h2, value = fold_change_mean_dist)

mean_dist_contrast %>% filter(P_var=="default",location=="rightmost1") %>%   
ggplot()+
  geom_line(aes(x=rho_G,y=log(fold_change_mean_dist),color=as.factor(rho_E)))+
  geom_hline(yintercept = 0)+
  facet_grid(.~h2)


mean_fert_contrast <- trait_dat_trt_contrast %>% 
  select(-log_patches,-CV_patches,-CV_kids) %>% 
  filter(gen==20) %>% 
  spread(key = h2, value = log_kids)%>% 
  mutate(fold_change_default = default / none,
         fold_change_swap = swap / none,
         fold_change_eqhi = eqhi / none,
         fold_change_eqlo = eqlo / none) %>% 
  select(-default,-eqhi,-eqlo,-none,-swap)%>% 
  gather(fold_change_default,fold_change_swap,fold_change_eqhi,fold_change_eqlo, key = h2, value = fold_change_mean_fert)

mean_fert_contrast %>% filter(rho_E==0.0) %>%   
ggplot()+
  geom_line(aes(x=rho_G,y=fold_change_mean_fert,linetype=P_var))+
  geom_hline(yintercept = 1)+
  facet_grid(location~h2)

trait_dat %>% 
  filter(h2 == "default" | h2 == "none", 
         P_var == "default",
         rho_E == -.16,
         gen==20) %>% 
  ggplot()+
  geom_line(aes(x = rho_G, y=mean_PD, linetype=h2))+
  #geom_vline(xintercept = -0.37)+
  geom_hline(yintercept = 1.63)+
  facet_wrap(~location)

trait_dat %>% 
  filter(h2 == "default" | h2 == "none", 
         P_var == "default",
         rho_E == -.16,
         gen==20) %>% 
  ggplot()+
  geom_line(aes(x = rho_G, y=mean_Pr, linetype=h2))+
  #geom_vline(xintercept = -0.37)+
  geom_hline(yintercept = 2.74)+
  facet_wrap(~location)

trait_dat %>% 
  filter(location == "rightmost2",
         rho_E == 0,
         gen==20) %>% 
  ggplot()+
  geom_line(aes(x = rho_G, y=mean_Pr, linetype=h2))+
  #geom_vline(xintercept = -0.37)+
  geom_hline(yintercept = 2.74)+
  facet_grid(P_var~h2)

trait_dat %>% 
  filter(location == "rightmost2",
         rho_E == 0,
         gen==20) %>% 
  ggplot()+
  geom_line(aes(x = rho_G, y=mean_PD, linetype=h2))+
  #geom_vline(xintercept = -0.37)+
  geom_hline(yintercept = 1.63)+
  facet_grid(P_var~h2)


trait_change <- dcast(setDT(trait_dat), P_var+h2+rho_G+rho_E ~ gen,value.var = c("mean_PD", "mean_Pr")) %>% 
mutate(foldchange_dispersal = mean_PD_20/mean_PD_1,
         foldchange_fertility = mean_Pr_20/mean_Pr_1)

#beetle traits
trait_change %>% 
  filter(h2 == "default" | h2 == "none", P_var == "double",rho_E == -0.16) %>% 
  ggplot()+
  geom_line(aes(x = rho_G, y=foldchange_dispersal, linetype=h2))+
  geom_line(aes(x = rho_G, y=foldchange_fertility, linetype=h2),color="red")+
  geom_vline(xintercept = -0.37)

First thing to note is that this is not 'trait evolution', this is just final trait values. Second, the trait values are sampled often from just one individual, the farthest one, so that could be a source of noise. Brad's output included a sample of the wave behind the very edge (though I am currently not sure exactly where) - so that may be useful to work with. Third, I think am plot trait values on the observed scale (realized dispersal distance and realized fecundity) but it occurs to me that the dispersal distances are too far for beetles. Perhaps I should not be exponentiating the values? Check with Brad.

These results are odd to me. The dispersal results mostly make sense but the fertility results are surprising: strong effects of environmental correlations. This could perhaps just be explained by the fact that the leading edge disperser will always have a high dispersal value, so any environmental correlation with fertility should appear there. Need to think more about this, but I think re-querying the output for trait change rather than trait values is a better way to go.

A <- ggplot(data =  filter(trait_dat_beetle,rho_E==-0.9 | rho_E==0 | rho_E==0.9))+
  geom_line(aes(x=rho_G,y=foldchange_dispersal,linetype=h2,color=as.factor(rho_E)))

B <- ggplot(data =  filter(trait_dat_beetle,rho_G==-0.9 | rho_G==0 | rho_G==0.9))+
  geom_line(aes(x=rho_E,y=foldchange_dispersal,linetype=h2,color=as.factor(rho_G)))

C <- ggplot(data =  filter(trait_dat_beetle,rho_E==-0.9 | rho_E==0 | rho_E==0.9))+
  geom_line(aes(x=rho_G,y=foldchange_fertility,linetype=h2,color=as.factor(rho_E)))

D <- ggplot(data =  filter(trait_dat_beetle,rho_G==-0.9 | rho_G==0 | rho_G==0.9))+
  geom_line(aes(x=rho_E,y=foldchange_fertility,linetype=h2,color=as.factor(rho_G)))

multiplot(A,B,C,D,cols=2)

ggplot(data =  filter(trait_change, rho_E==-0.9 | rho_E==0 | rho_E==0.9))+
  geom_line(aes(x=rho_G,y=foldchange_dispersal,color=as.factor(rho_E)))+
  facet_grid(P_var ~ h2)+
  geom_hline(yintercept = 1)

ggplot(data =  filter(trait_change, rho_E==-0.9 | rho_E==0 | rho_E==0.9))+
  geom_line(aes(x=rho_G,y=foldchange_fertility,color=as.factor(rho_E)))+
  facet_grid(P_var ~ h2)+
  geom_hline(yintercept = 1)

I would like to make a figure summarizing fold-change in invasion metrics and traits, and maybe even comparing it to observed values in invasion experiment.

beetles <- dat %>%
  filter(gen == 20,
         location == "rightmost1",
         rho_G == -0.37,
         rho_E == -0.16,
         h2 == "default" | h2 == "none",
         P_var == "default") %>%
  group_by(h2) %>% 
  summarise(mean_extent = mean(patch),
            CV_extent = sd(patch) / mean(patch),
            log_patches = mean(Mean_D),
            log_kids = mean(Mean_r)) 

beetles_change <- log(beetles[1,2:5]/beetles[2,2:5]) %>% gather(key=measurement,value=value)

ggplot(beetles_change)+
  geom_bar(stat = "identity", aes(x=measurement,y=value))

barplot(beetles_change$value,names.arg = beetles_change$measurement,ylim=c(-.5,2));abline(h=0)