inst/examples/misc_examples/tutorial_with_simulated_traits_knit_.md

A wrightscape tutorial using simulated traits

ro dev='png', fig.path='figure/', cache=TRUE, cache.path='cache/' or

Load package, along with parallelization, plotting, and data manipulation packages. Then load the data.

``` {r } require(wrightscape) require(snowfall) require(ggplot2) require(reshape) data(labrids)



rename the regimes less technically

``` {r }
levels(pharyngeal) = c("wrasses", "parrotfish")
regime <- pharyngeal

Create a dataset by simulation, where the parrotfish all have lower alpha

``` {r } a1_spec <- list(alpha = "indep", sigma = "global", theta = "global") a1 <- multiTypeOU(data = dat["close"], tree = tree, regimes = regime, model_spec = a1_spec, control = list(maxit=5000))


assign expected "startree" standard deviations: wrasses 1.25, parrotfish 5

``` {r }
a1$alpha[1] <- 10
a1$alpha[2] <- 1e-10
a1$sigma <- c(5, 5)  
a1$theta <- c(0,0)   
dat[["constraint release"]] <-simulate(a1)[[1]]

Check out the variance in the relative groups -- it should be larger in parrotfish in an extreme example, but need not be in principle, due to the phylogeny.

``` {r } testcase <- dat[["constraint release"]] testcase[regime =="wrasses" & !is.na(testcase) & testcase != 0] -> lowvar testcase[regime !="wrasses" & !is.na(testcase) & testcase != 0] -> highvar print(c(var(lowvar), var(highvar)))


We can repeat the whole thing with a model based on differnt sigmas, to make sure 
``` {r }
s1_spec  <- list(alpha = "global", sigma = "indep", theta = "global")
s1 <- multiTypeOU(data = dat["close"], tree = tree, regimes = pharyngeal, 
         model_spec = s1_spec,  control = list(maxit=5000))
names(s1$sigma) <- levels(regime)
s1$sigma[1] <- sqrt(2*5*1.25)
s1$sigma[2] <- sqrt(2*5*5)
s1$alpha <- c(5, 5)  # We can keep those parameters estimated from data or update them
a1$theta <- c(0,0)   
dat[["faster evolution"]] <-simulate(s1)[[1]]
testcase <- dat[["faster evolution"]]
testcase[regime == "wrasses" & !is.na(testcase) & testcase != 0 ] -> lowvar
testcase[regime != "wrasses" & !is.na(testcase) & testcase != 0 ] -> highvar
print(c(var(lowvar), var(highvar)))

Now we have a trait where change in alpha is responsible, and one in which sigma change is responsible. Can we correctly identify each??

``` {r } traits <- c("constraint release", "faster evolution")


``` {r }
fits <- lapply(traits, function(trait){
  # declare function for shorthand
  multi <- function(modelspec, reps = 20){
    m <- multiTypeOU(data = dat[[trait]], tree = tree, regimes = pharyngeal, 
             model_spec = modelspec, 
             control = list(maxit=5000)
            ) 
    replicate(reps, bootstrap(m))
  }
  bm <- multi(list(alpha = "fixed", sigma = "indep", theta = "global"))
  s1 <- multi(list(alpha = "global", sigma = "indep", theta = "global")) 
  a1  <- multi(list(alpha = "indep", sigma = "global", theta = "global")) 
  s2 <- multi(list(alpha = "global", sigma = "indep", theta = "indep")) 
  a2  <- multi(list(alpha = "indep", sigma = "global", theta = "indep")) 
  list(bm=bm, s1=s1, a1=a1, s2=s2, a2=a2)
})

Reformat and label data for plotting

``` {r } names(fits) <- traits # each fit is a different trait (so use it for a label) data <- melt(fits) names(data) <- c("regimes", "param", "rep", "value", "model", "trait")


``` {r }
save(list=ls(), file="tutorial_with_simulated_traits.rda")

model likelihood ``` {r } p1 <- ggplot(subset(data, param=="loglik")) + geom_boxplot(aes(model, value)) + facet_wrap(~ trait, scales="free_y") p1


``` {r }
p2 <-  ggplot(subset(data, param %in% c("sigma", "alpha")), aes(model, value, fill=regimes)) + 
       stat_summary(fun.data=mean_sdl, geom="pointrange", aes(color=regimes), 
            position = position_dodge(width=0.90)) +
       scale_y_log10() + 
       facet_grid(param ~ trait, scales = "free_y")
p2

The visualization is easier if we seperate out the plots. Consider focusing on the sigma parameter for both simulations:

``` {r } p3 <- ggplot(subset(data, param %in% c("sigma") )) + geom_boxplot(aes(model, value, fill=regimes)) + facet_wrap(trait ~ param, scales = "free_y") p3


Compare this to the alpha parameter for both simulations:

``` {r }
p4 <- ggplot(subset(data, param %in% c("alpha")  )) +
      geom_boxplot(aes(model, value, fill=regimes)) + 
      facet_wrap(trait ~ param, scales = "free_y") 
p4

To really tell these datasets apart, we need the direct model comparison.

``` {r } save(list=ls(), file="tutorial_with_simulated_traits.rda")



We'll take advantage of the parallelization inside the `montecarlotest` function
``` {r }
nboot <- 50
cpu <- 16

``` {r } require(snowfall) sfInit(parallel=TRUE, cpu=cpu) sfLibrary(wrightscape) sfExportAll()



``` {r  }
fits <- lapply(traits, function(trait){
    multi <- function(modelspec){ 
     multiTypeOU(data = dat[[trait]], tree = tree, regimes = pharyngeal, 
                model_spec = modelspec, control = list(maxit=8000))
    }
    bm <- multi(list(alpha = "fixed", sigma = "global", theta = "global")) 
    ou <- multi(list(alpha = "global", sigma = "global", theta = "global")) 
    bm2 <- multi(list(alpha = "fixed", sigma = "indep", theta = "global")) 
    a2  <- multi(list(alpha = "indep", sigma = "global", theta = "global")) 
    t2  <- multi(list(alpha = "global", sigma = "global", theta = "indep"))

  mc <- montecarlotest(bm2,a2, cpu=cpu, nboot=nboot)
  bm2_a2 <- list(null=mc$null_dist, test=mc$test_dist, 
    lr=-2*(mc$null$loglik-mc$test$loglik))
  mc <- montecarlotest(bm,ou, cpu=cpu, nboot=nboot)
  bm_ou <- list(null=mc$null_dist, test=mc$test_dist, 
    lr=-2*(mc$null$loglik-mc$test$loglik))
  mc <- montecarlotest(bm,bm2, cpu=cpu, nboot=nboot)
  bm_bm2 <- list(null=mc$null_dist, test=mc$test_dist, 
    lr=-2*(mc$null$loglik-mc$test$loglik))
  mc <- montecarlotest(ou,bm2, cpu=cpu, nboot=nboot)
  ou_bm2 <- list(null=mc$null_dist, test=mc$test_dist,
    lr=-2*(mc$null$loglik-mc$test$loglik))
  mc <- montecarlotest(t2,a2, cpu=cpu, nboot=nboot)
  t2_a2 <- list(null=mc$null_dist, test=mc$test_dist, 
    lr=-2*(mc$null$loglik-mc$test$loglik))
  mc <- montecarlotest(bm2,t2, cpu=cpu, nboot=nboot)
  bm2_t2 <- list(null=mc$null_dist, test=mc$test_dist,
    lr=-2*(mc$null$loglik-mc$test$loglik))
  list(brownie_vs_alphas=bm2_a2, brownie_vs_thetas=bm2_t2,
       thetas_vs_alphas=t2_a2, bm_vs_brownie=bm_bm2,  
       bm_vs_ou=bm_ou, ou_vs_brownie=ou_bm2)
})

Clean up the data

``` {r } names(fits) <- traits dat <- melt(fits) names(dat) <- c("value", "type", "comparison", "trait")



``` {r }
r <- cast(dat, comparison ~ trait, function(x) quantile(x, c(.10,.90)))
subdat <- subset(dat, abs(value) < max(abs(as.matrix(r))))

``` {r fig.height=24} ggplot(subdat) + geom_boxplot(aes(type, value)) + facet_grid(trait ~ comparison, scales="free_y") ````



cboettig/wrightscape documentation built on May 13, 2019, 2:12 p.m.