In this script we will analyze community phylogenetic and trait data from plant species that occur in in the lava flow islands, or kipukas, of Craters of the Moon National Preserve in south central Idaho. The species in the local communtiy occur either only on the kipukas or on the kipukas and surround area, while the regional species not in the local community are species that occurr in and around the preserve, but do not occur on the kipukas. The trait we are investigating is maximum vegetative height.
#packages require(CAMI) require(randomForest) require(abc) require(geiger) require(ggplot2) setwd("/Users/Megan/Documents/Github/CAMI/data/EmpiricalData") load(file="RegTree.Rdata") #all.species.tre load(file="LocTree.Rdata") #kipuka.all.tre height.data <- read.csv(file="height_Data.csv", header=T, sep=",") height.data <- height.data[height.data[,1]!="",] height.data <- height.data[height.data[,5]!="none",] rownames(height.data) <- seq(1,641,1) reg.heights <- height.data[,8] names(reg.heights) <- paste(height.data[,4]) kip.heights <-(height.data[1:65, 8]) names(kip.heights) <- paste(height.data[1:65,4]) setwd("/Users/Megan/Documents/Github/CAMI/data/EmpiricalData") load(file="Kipuka_RegTree.Rdata") load(file="Kipuka_LocalTree.Rdata") load(file="Kipuka_RegTraits.Rdata") load(file="Kipuka_LocalTraits.Rdata") #make sure phylogeny and data correspond, will say "OK" if they do name.check(phy=Kip.regional.tree, data = Kip.regional.traits) name.check(phy=all.species.tre, data = reg.heights) name.check(phy=kipuka.all.tre, data = kip.heights) dist(reg.heights) CalcPhyDispStats(all.species.tre, kipuka.all.tre,log(reg.heights), log(kip.heights))
Before we simulate communtiy assembly data and perform model selection, we need to know which model of trait evoltion to simualte under. From the power analysis, we know that we could perform model selection between all communtiy assembly models and trait evolution models, but we will not have the power we would have if we only compared community assembly models. Due to this, we will estimate the model of trait evolution beforehand and simulate our community assembly data accordingly.
BM.mod <- fitContinuous(phy = Kip.regional.tree, dat = log(Kip.regional.traits), model = "BM") OU.mod <- fitContinuous(phy = Kip.regional.tree, dat = log(Kip.regional.traits), model = "OU", bounds=list(alpha=c(0.1, 0.9))) alpha.ou <- seq(0.0,1.0, 0.05) ou.res <- matrix(NA, length(alpha.ou), 5) for (i in 1:length(alpha.ou)){ OU.mod <- fitContinuous(phy = Kip.regional.tree, dat = log(Kip.regional.traits), model = "OU", bounds=list(alpha=c(0.01, alpha.ou[i]))) ou.res[i,1:5] <- as.numeric(c(paste(OU.mod$opt[1]), paste(OU.mod$opt[2]), paste(OU.mod$opt[4]), paste(OU.mod$opt[7]), paste(OU.mod$opt[8]))) } setwd("/Users/Megan/Documents/Ruffleyetal2018EcolLetters/Data/EmpiricalData") write.csv(ou.res, file="TrModResults_Emp.csv", sep=",") alpha.ou <- seq(0.0,1.0, 0.05) alpha.ou[1] <- 0.01 sigsq.ou <- c(0.3416, 0.536876, 0.766006, 1.002996, 1.244069, 1.486966, 1.7305, 1.974306, 2.218001, 2.461582, 2.705062) regres <- lm(sigsq.ou~alpha.ou) plot(sigsq.ou~alpha.ou) BM.mod <- fitContinuous(phy = all.species.tre, dat = log(reg.heights), model = "BM") OU.mod <- fitContinuous(phy = all.species.tre, dat = log(reg.heights), model = "OU", bounds=list(alpha=c(0.01,0.2)))
We can use the function CalcSummaryStats() from the CAMI R package to calculate the empirical summary statistics.
Kip.summary.stats <- CalcSummaryStats(regional.tree = Kip.regional.tree, local.tree = Kip.local.tree, regional.traits = log(Kip.regional.traits), local.traits = log(Kip.local.traits)) Kip.phydisp.stats <- CalcPhyDispStats(regional.tree = Kip.regional.tree, local.tree = Kip.local.tree, regional.traits = log(Kip.regional.traits), local.traits = log(Kip.local.traits)) Kip.phydisp.stats Kip.summary.stats <- CalcSummaryStats(regional.tree = all.species.tre, local.tree = kipuka.all.tre, regional.traits = log(reg.heights), local.traits = log(kip.heights))
Now that we know the model of trait evolution to simulate under, and have an idea for the rate of cahracter change, we can simulate under the three communtiy assembly models; neutral, environmental filtering, and competitive exclusion. The simulations should be characterized to be as simular as possible to the empirical data.
sims <- 10000 N <- Ntip(all.species.tre) local <- Ntip(kipuka.all.tre) N <- Ntip(Kip.regional.tree) local <- Ntip(Kip.local.tree) #simulate data on crick under emp sims screen BM.neutral.data <- SimCommunityAssembly(sims, N, local, traitsim = "BM", comsim = "neutral", sig2 = 0.77, tau=c(1,60)) #10000 BM.filtering.data <- SimCommunityAssembly(sims, N, local, traitsim = "BM", comsim = "filtering", sig2 = 0.77, tau=c(1,60)) #10000 BM.competition.data <- SimCommunityAssembly(sims, N, local, traitsim = "BM", comsim = "competition", sig2 = 0.77, tau=c(1,60)) #10000 OU.neutral.data <- SimCommunityAssembly(sims, N, local, traitsim = "OU", comsim = "neutral", sig2 = 0.77, alpha = 0.2, tau=c(1,60)) #10000 OU.filtering.data <- SimCommunityAssembly(sims, N, local, traitsim = "OU", comsim = "filtering", sig2 = 0.77, alpha = 0.2, tau=c(1,60)) #10000 OU.competition.data <- SimCommunityAssembly(sims, N, local, traitsim = "OU", comsim = "competition", sig2 = 0.77, alpha = 0.2, tau=c(1,60)) #10000 #save the data save(BM.neutral.data, file="BMneutral.dataEmp.Rdata") save(BM.filtering.data, file="BMfilt.dataEmp.Rdata") save(BM.competition.data, file="BMcomp.dataEmp.Rdata") save(OU.neutral.data, file="OUneutral.dataEmp.Rdata") save(OU.filtering.data, file="OUfilt.dataEmp.Rdata") save(OU.competition.data, file="OUcomp.dataEmp.Rdata") #load the data setwd("/Users/Megan/Documents/Github/CAMI/data/EmpiricalData") load( file="BMneutral.dataEmp.Rdata") load(file="BMfilt.dataEmp.Rdata") load(file="BMcomp.dataEmp.Rdata") load(file="OUneutral.dataEmp.Rdata") load(file="OUfilt.dataEmp.Rdata") load(file="OUcomp.dataEmp.Rdata")
We can now use the simulated data to perform model selection using randomForest and Approximate Bayesian Computation.
#combine summary stats and model index into 1 data frame for RF and ABC; ALL MODELS sum.stats.all <- rbind(BM.neutral.data$summary.stats[1:10000,], BM.filtering.data$summary.stats[1:10000,], BM.competition.data$summary.stats[1:10000,], OU.neutral.data$summary.stats[1:10000,], OU.filtering.data$summary.stats[1:10000,], OU.competition.data$summary.stats[1:10000,]) mod.index.all <- rep(c("BMneut", "BMfilt", "BMcomp", "OUneut", "OUfilt", "OUcomp"), each=10000) ref.table.all <- na.omit(data.frame(sum.stats.all, mod.index.all)) #determine rf error rates using all data rf.emp.all <- randomForest(mod.index.all ~., data=ref.table.all, ntree=1000, importance=T) #variable importance information varImpPlot(rf.emp.all) ##combine summary stats and model index into 1 data frame for RF and ABC; OU MODELS ONLY sum.stats.ou <- rbind(OU.neutral.data$summary.stats[1:10000,], OU.filtering.data$summary.stats[1:10000,], OU.competition.data$summary.stats[1:10000,]) mod.index.ou <- rep(c("neut", "filt", "comp"), each=10000) ref.table.ou <- na.omit(data.frame(sum.stats.ou, mod.index.ou)) #determine rf error rates using OU data rf.emp.ou <- randomForest(mod.index.ou ~., data=ref.table.ou, ntree=1000, importance=T) #variable importance information varImpPlot(rf.emp.ou) #do this stupid thing to make the prediction work Kip.summary.stats.mat <- rbind(Kip.summary.stats, Kip.summary.stats) rownames(Kip.summary.stats.mat) <- c("ss1", "ss2") #make prediction using empirical summary statistics and both classifiers #all models predict(rf.emp.all, Kip.summary.stats.mat, type="prob") #ou models predict(rf.emp.ou, Kip.summary.stats.mat, type="prob")
#For ABC we will only use the top 10 summary statistics for model selection, see varImpPlot() Kip.summary.stats.abc <- Kip.summary.stats[c(4, 10, 12, 13, 16, 18, 19, 20)] #determine error rate from cross validation using all data cv.emp.all <- cv4postpr(mod.index.all, sum.stats.all[,c(4, 10, 12, 13, 16, 18, 19, 20)], nval=500, tols=.005, method="rejection") #determine error rate from cross validation using only OU data cv.emp.ou <- cv4postpr(mod.index.ou, sum.stats.ou[,c(4, 10, 12, 13, 16, 18, 19, 20)], nval=500, tols=.005, method="rejection") #determine posterior model probabilities for empirical data using all data abc.emp.all <- postpr(target = Kip.summary.stats.abc, index = mod.index.all, sumstat = sum.stats.all[, c(4, 10, 12, 13, 16, 18, 19, 20)], tol = 0.002, method="rejection") summary(abc.emp.all) #determine posterior model probabilities for empirical data using only OU data abc.emp.ou <- postpr(target = Kip.summary.stats.abc, index = mod.index.ou, sumstat = sum.stats.ou[, c(4, 10, 12, 13, 16, 18, 19, 20)], tol = 0.002, method="rejection") summary(abc.emp.ou)
setwd("/Users/Megan/Documents/Github/CAMI/data/EmpiricalData") load(file="OU.filtering.data.emp.100000.Rdata") Kip.Param.Est <- abc(target = Kip.summary.stats.abc, param =OU.filtering.data$params, sumstat = OU.filtering.data$summary.stats[,c(4, 10, 12, 13, 16, 18, 19, 20)], tol = 0.0015, method="rejection") Kip.Param.Est <- abc(target = Kip.summary.stats.abc, param =OU.neutral.data$params, sumstat = OU.neutral.data$summary.stats[,c(4, 10, 12, 13, 16, 18, 19, 20)], tol = 0.025, method="rejection") sig2.pp <- data.frame(dt=factor(c(rep("prior", each=nrow(OU.filtering.data$params)), rep("posterior", each= nrow(Kip.Param.Est$unadj.values)))), sig2 = c(as.numeric(paste(OU.filtering.data$params[,8])), as.numeric(paste(Kip.Param.Est$unadj.values[,8])))) ggplot(sig2.pp, aes(x=sig2, fill=dt)) + geom_density() + scale_fill_manual(values=c(rgb(.2, .2, .2, .8), rgb(.5, .5, .5, .4))) + theme(legend.position="none") + geom_vline(aes(xintercept=median(as.numeric(paste(Kip.Param.Est$unadj.values[,8])))), color="black", linetype="dashed", lwd=2) #do the same for tau tiff('Fig4.tiff', units="in", width=5, height=4, res=2000, compression = 'lzw') tau.pp <- data.frame(dt=factor(c(rep("prior", each=nrow(OU.filtering.data$params)), rep("posterior", each= nrow(Kip.Param.Est$unadj.values)))), tau = c(as.numeric(paste(OU.filtering.data$params[,10])), as.numeric(paste(Kip.Param.Est$unadj.values[,10])))) ggplot(tau.pp, aes(x=tau, fill=dt)) + geom_density() + scale_fill_manual(values=c(rgb(.2, .2, .2, .8), rgb(.5, .5, .5, .4))) + theme(legend.position="none") + geom_vline(aes(xintercept=median(as.numeric(paste(Kip.Param.Est$unadj.values[,10])))), color="black", linetype="dashed", lwd=2) mean(sort(as.numeric(paste(Kip.Param.Est$unadj.values[,10])))[1:142]) pa <- data.frame(dt=factor(c(rep("prior", each=nrow(OU.filtering.data$params)), rep("posterior", each= nrow(Kip.Param.Est$unadj.values)))), Pa = c(as.numeric(paste(OU.filtering.data$params[,11])), as.numeric(paste(Kip.Param.Est$unadj.values[,11])))) ggplot(pa, aes(x=Pa, fill=dt)) + geom_density() + scale_fill_manual(values=c(rgb(.2, .2, .2, .8), rgb(.5, .5, .5, .4))) + theme(legend.position="none") + geom_vline(aes(xintercept=mean(as.numeric(paste(Kip.Param.Est$unadj.values[,11])))), color="black", linetype="dashed", lwd=2) mean(sort(as.numeric(paste(Kip.Param.Est$unadj.values[,11])))[1:142]) plot(density(as.numeric(paste(Kip.Param.Est$unadj.values[,11]))), lwd=2, main="", xlab="inclusion probability", bty="n") polygon(density(as.numeric(paste(Kip.Param.Est$unadj.values[,11]))), col="grey77") abline(v=median(as.numeric(paste(Kip.Param.Est$unadj.values[,11]))), lty=2, lwd=2) hist(as.numeric(paste(OU.filtering.data$params[,11]))) hist(as.numeric(paste(Kip.Param.Est$unadj.values[,10])))
model.fit <- gfit(target=Kip.summary.stats.abc, sumstat=Kip.Param.Est$ss, nb.replicate=100, tol=.001, statistic=median, subset=NULL, trace=FALSE) summary(model.fit) pc.data <- rbind(Kip.summary.stats.abc, Kip.Param.Est$ss) pca <- prcomp(pc.data) attributes(pca) plot(pca$x[,1], pca$x[,2], pch=16, ylab="PC2", xlab="PC1") points(pca$x[1,1], pca$x[1,2], pch=21, col="black", cex=1.5, bg="red") summary(model.fit) dist<-rnorm(100,mean = 3.8, sd = 0.4) plot(density(dist), main="", xlab= "median", lwd=2) polygon(density(dist), col="grey77") abline(v=3.7, lty=2, col="black", lwd=2)
Kip.dispersion.stats <- CalcPhyDispStats(regional.tree = Kip.regional.tree, local.tree = Kip.local.tree, regional.traits = log(Kip.regional.traits), local.traits = log(Kip.local.traits)) write.table(Kip.dispersion.stats, file="KipDispersionResults.csv", sep="\t")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.