knitr::opts_chunk$set( collapse = TRUERUE, comment = "##" )
This is a temporal Rmd file to facilitate develoement.
library(dplyr, quietly = TRUE) comm = phyr::comm_a comm$site = row.names(comm) dat = tidyr::gather(comm, key = "sp", value = "freq", -site) %>% left_join(phyr::envi, by = "site") %>% left_join(phyr::traits, by = "sp") dat$pa = as.numeric(dat$freq > 0) tree = phyr::phylotree data = dat tree_site = ape::rtree(n = nlevels(data$site), tip.label = levels(data$site)) formula = freq ~ 1 + shade + (1 | sp__) + (1 | site) + (1 | sp__@site) + (1|sp@site) formula = freq ~ 1 + shade + (1 | sp__) + (1 | site) + (1 | sp__@site) + (shade | sp) + (shade | sp__@site) data$freq2 = 20 formula = cbind(freq, freq2) ~ 1 + shade + (1 | sp__) + (1 | site) + (1 | sp__@site) cov_ranef = list(sp = phylotree) repulsion = F REML = F s2.init = NULL B.init = NULL reltol = 10^-12 maxit = 500 tol.pql = 10^-4 maxit.pql = 200 verbose = FALSE random.effects = NULL optimizer = "bobyqa" optimizer = "Nelder-Mead" prep.s2.lme4 = FALSE cpp = TRUE prep_re = TRUE family = "gaussian" family = "binomial" tol.pql = 10^-6 maxit.pql = 200 add.obs.re = F dat_prepared = phyr::prep_dat_pglmm(formula, data, tree, repulsion, prep_re, family, prep.s2.lme4, tree_site) formula = dat_prepared$formula data = dat_prepared$data sp = dat_prepared$sp site = dat_prepared$site random.effects = dat_prepared$random.effects if (family == "binomial") s2.init = 0.25 dm = phyr::get_design_matrix(formula, data, na.action = NULL, sp, site, random.effects) X = dm$X; Y = dm$Y; St = dm$St; Zt = dm$Zt; nested = dm$nested p <- ncol(X) n <- nrow(X) q <- length(random.effects) if(family == "gaussian"){ B.init <- t(matrix(lm(formula = formula, data = data)$coefficients, ncol = p)) s2.init <- var(lm(formula = formula, data = data)$residuals)/q } else { B.init <- t(matrix(glm(formula = formula, data = data, family = binomial, na.action = na.omit)$coefficients, ncol = p)) } B <- B.init s <- as.vector(array(s2.init^0.5, dim = c(1, q))) ss <- as.vector(array(s2.init^0.5, dim = c(1, q))) par = s
library(phyr) library(dplyr) comm = comm_a comm$site = row.names(comm) dat = TRUEidyr::gather(comm, key = "sp", value = "freq", -site) %>% left_join(envi, by = "site") %>% left_join(traits, by = "sp") dat$pa = as.numeric(dat$freq > 0) formula = freq ~ 1 + shade + (1|sp__) + (1|site) + (1|sp__@site) data = dat tree = phylotree family = "gaussian" random.effects = NULL add.tree = TRUERUE tree_site = NULL repulsion = FALSE library(Matrix) test = communityPGLMM.plot.random.effects(freq ~ 1 + shade + (1|sp__) + (1|site) + (1|sp__@site), dat, tree = phylotree, show.image = FALSE, show.sim.image = TRUE, tree.panel.space = 0)
if(optimizer == "bobyqa"){ system.time({ opts <- list("algorithm" = "NLOPT_LN_BOBYQA", "ftol_rel" = reltol/1000, "ftol_abs" = reltol/1000, "xtol_rel" = 0.00001, "maxeval" = maxit) S0 <- nloptr::nloptr(x0 = s, eval_f = phyr:::pglmm_gaussian_LL_calc, opts = opts, X = X, Y = Y, Zt = Zt, St = St, nested = nested, REML = REML, verbose = verbose, optim_ll = TRUE) opt <- list(par = S0$solution, value = S0$objective, counts = S0$iterations, convergence = S0$status, message = S0$message) }) system.time(opt1 <- nloptr::bobyqa(x0 = s, fn = phyr:::pglmm_gaussian_LL_calc, X = X, Y = Y, Zt = Zt, St = St, nested = nested, REML = REML, verbose = verbose, optim_ll = TRUE, control = list("ftol_rel" = reltol, "xtol_rel" = 0.0001, "maxeval" = maxit))) testthat::expect_equal(opt, opt1, ignore_attr = TRUE) } system.time({ opts <- list("algorithm" = "NLOPT_LN_NELDERMEAD", "ftol_rel" = reltol, "xtol_rel" = 0.0001, "maxeval" = maxit) S0 <- nloptr::nloptr(x0 = s, eval_f = phyr:::pglmm_gaussian_LL_calc, opts = opts, X = X, Y = Y, Zt = Zt, St = St, nested = nested, REML = REML, verbose = verbose, optim_ll = TRUE) opt3 <- list(par = S0$solution, value = S0$objective, counts = S0$iterations, convergence = S0$status, message = S0$message) }) # 4.472 system.time(opt33 <- nloptr::neldermead(x0 = s, fn = phyr:::pglmm_gaussian_LL_calc, X = X, Y = Y, Zt = Zt, St = St, nested = nested, REML = REML, verbose = verbose, optim_ll = TRUE, control = list("ftol_rel" = reltol, "xtol_rel" = 0.0001, "maxeval" = maxit))) system.time(opt4 <- nloptr::sbplx(x0 = s, fn = phyr:::pglmm_gaussian_LL_calc, X = X, Y = Y, Zt = Zt, St = St, nested = nested, REML = REML, verbose = verbose, optim_ll = TRUE, control = list("ftol_rel" = reltol, "ftol_abs" = reltol, "xtol_rel" = 0.0001, "maxeval" = maxit))) if(optimizer == "Nelder-Mead"){ if (q > 1) { system.time( opt2 <- optim(fn = phyr:::pglmm_gaussian_LL_calc, par = s, X = X, Y = Y, Zt = Zt, St = St, nested = nested, REML = REML, verbose = verbose, optim_ll = TRUE, method = "Nelder-Mead", control = list(maxit = maxit, reltol = reltol)) ) } else { opt <- optim(fn = pglmm_gaussian_LL_calc, par = s, X = X, Y = Y, Zt = Zt, St = St, nested = nested, REML = REML, verbose = verbose, method = "L-BFGS-B", control = list(maxit = maxit)) } } opts <- list("ftol_rel" = reltol, "ftol_abs" = reltol, "xtol_rel" = 0.0001, "maxeval" = maxit) microbenchmark::microbenchmark( nloptr::bobyqa(x0 = s, fn = phyr:::pglmm_gaussian_LL_calc, X = X, Y = Y, Zt = Zt, St = St, nested = nested, REML = REML, verbose = verbose, optim_ll = TRUE, control = opts), nloptr::sbplx(x0 = s, fn = phyr:::pglmm_gaussian_LL_calc, X = X, Y = Y, Zt = Zt, St = St, nested = nested, REML = REML, verbose = verbose, optim_ll = TRUE, control = opts), nloptr::neldermead(x0 = s, fn = phyr:::pglmm_gaussian_LL_calc, X = X, Y = Y, Zt = Zt, St = St, nested = nested, REML = REML, verbose = verbose, optim_ll = TRUE, control = opts), optim(fn = phyr:::pglmm_gaussian_LL_calc, par = s, X = X, Y = Y, Zt = Zt, St = St, nested = nested, REML = REML, verbose = verbose, optim_ll = TRUE, method = "Nelder-Mead", control = list(maxit = maxit, reltol = reltol)), times = 5 ) c("nelder-mead-nlopt", "subplex")
# bipartite nspp = 5 # pollinators nsite = 10 # plants phy_sp = ape::rtree(nspp) phy_site = ape::rtree(nsite) phy_site$tip.label = paste0("site", 1:nsite) Vsp = ape::vcv(phy_sp) Vsite = ape::vcv(phy_site) library(Matrix) re.sp0 = as(kronecker(diag(nsite), matrix(1, nrow=nspp, ncol=nspp)), "dgCMatrix") image(re.sp0) re.site0 <- as(kronecker(matrix(1, nrow=nsite, ncol=nsite), nspp), "dgCMatrix") image(re.site0) re.sp <- as(kronecker(Vsite, matrix(1, nrow=nspp, ncol=nspp)), "dgCMatrix") image(re.sp) re.site <- as(kronecker(matrix(1, nrow=nsite, ncol=nsite), Vsp), "dgCMatrix") image(re.site) re.sp.site <- as(kronecker(diag(nrow=nsite), Vsp), "dgCMatrix") image(re.sp.site) re.site.sp <- as(kronecker(Vsite, diag(nrow=nspp)), "dgCMatrix") image(re.site.sp) re.cophy <- as(kronecker(Vsite, Vsp), "dgCMatrix") image(re.cophy)
library(ape) library(phyr) library(Matrix) # source("communityPLMM_2Dec17.R") # See edit(phyr::get_design_matrix) # edit(phyr::get_design_matrix) w <- read.table("dt.txt",header = TRUE,row.names = 1)###here,I convert the dt.csv file to tab delimited text file in w$site <- as.factor(rownames(w)) w[,1:7] <- w[,1:7]^.5 rownames(w) <- NULL host_tree <- read.tree("phylo_19") mist_tree <- read.tree("phylomist_19") ###"phylomist_19"is a phylo file(phylogenetic tree of host species) obtained from phylomatic online tools## mist_tree$tip.label <- c("vsp", "sc", "scy", "mb", "dp", "hp", "mc") # check that the names of hosts and mist are the same d = TRUEidyr::gather(w, key = "sp", value = "count", -site) d = dplyr::arrange(d, site, sp) d$sp = as.factor(d$sp) # Set value at base of tree to zero host_tree$root.edge <- 0 host_tree <- multi2di(host_tree) # Separately specifying the complete covariance matrices didn't work nspp <- Ntip(mist_tree) nsite <- Ntip(host_tree) Vsp <- vcv(mist_tree) Vsite <- vcv(host_tree) Vsp <- Vsp/(det(Vsp)^(1/nspp)) Vsite <- Vsite/(det(Vsite)^(1/nsite)) # ---- reorder phylogenies Vsp = Vsp[levels(d$sp), levels(d$sp)] Vsite = Vsite[levels(d$site), levels(d$site)] # Set up random effects using the format in pez for d re.sp0 <- list(1, sp = d$sp, covar = diag(nspp)) re.site0 <- list(1, site = d$site, covar = diag(nsite)) re.sp <- list(1, sp = d$sp, covar = Vsp) re.site <- list(1, site = d$site, covar = Vsite) re.sp.site <- list(1, site = d$sp, covar = Vsp, d$site) re.site.sp <- list(1, site = d$site, covar = Vsite, d$sp) re.cophy <- list(kronecker(Vsp,Vsite)) # ---- shouldn't it bekronecker(Vsite,Vsp) ? data = d tree=mist_tree tree_site=host_tree formula = count ~ 1 + (1|sp__) + (1|site__) + (1|sp__@site) + (1|sp@site__) repulsion = F REML = F s2.init = NULL B.init = NULL reltol = 10^-12 maxit = 500 tol.pql = 10^-4 maxit.pql = 200 verbose = FALSE random.effects = NULL optimizer = "bobyqa" optimizer = "Nelder-Mead" prep.s2.lme4 = FALSE cpp = TRUE prep_re = TRUERUE family = "gaussian" tol.pql = 10^-6 maxit.pql = 200 dat_prepared = phyr::prep_dat_pglmm(formula, data, tree, repulsion, prep_re, family, prep.s2.lme4, tree_site) formula = dat_prepared$formula data = dat_prepared$data sp = dat_prepared$sp site = dat_prepared$site random.effects = dat_prepared$random.effects if (family == "binomial") s2.init = 0.25 dm = phyr::get_design_matrix(formula, data, na.action = NULL, sp, site, random.effects) X = dm$X; Y = dm$Y; St = dm$St; Zt = dm$Zt; nested = dm$nested p <- ncol(X) n <- nrow(X) q <- length(random.effects) if(family == "gaussian"){ B.init <- t(matrix(lm(formula = formula, data = data)$coefficients, ncol = p)) s2.init <- var(lm(formula = formula, data = data)$residuals)/q } else { B.init <- t(matrix(glm(formula = formula, data = data, family = binomial, na.action = na.omit)$coefficients, ncol = p)) } B <- B.init s <- as.vector(array(s2.init^0.5, dim = c(1, q))) ss <- as.vector(array(s2.init^0.5, dim = c(1, q))) par = s
library(tidyverse) sim_dat = function(nspp = 30, nsite = 30, physig.trait1 = TRUERUE, physig.trait2 = TRUERUE, alpha = 1, beta_1 = 1, beta_2 = 1, beta_3 = 1, beta_4 = 1, beta_5 = 0, beta_6 = 1){ # simulate the phylogeny phy = geiger::sim.bdtree(b = 1, d = 0, stop = "taxa", n = nspp, extinct = FALSE) phy$tip.label = paste0("sp", 1:nspp) # vphy = ape::vcv(phy) # var-cov matrix # vphy = vphy/(det(vphy)^(1/nspp)) # to standardize the matrix # corphy = cov2cor(vphy) # corr matrix # simulate traits if(physig.trait1){ trait1 = phytools::fastBM(tree = phy, mu = 0, sig2 = 1) } else { trait1 = rnorm(nspp, 0, 1) } if(physig.trait2){ trait2 = phytools::fastBM(tree = phy, mu = 0, sig2 = 1) } else { trait2 = rnorm(nspp, 0, 1) } trait = data.frame(sp = paste0("sp", 1:nspp), trait1 = TRUErait1, trait2 = TRUErait2, stringsAsFactors = FALSE) # simulate two envi variables envi = data.frame(site = paste0("site", 1:nsite), envi1 = sort(runif(n = nsite, min = -1, max = 1)), envi2 = rnorm(n = nsite), stringsAsFactors = FALSE) # simulate abundance data dat = expand.grid(sp = paste0("sp", 1:nspp), site = paste0("site", 1:nsite)) %>% mutate(sp = as.character(sp), site = as.character(site)) dat = left_join(dat, trait, by = "sp") %>% left_join(envi, by = "site") dat = mutate(dat, abund = alpha + beta_1 * envi1 + beta_2 * envi2 + beta_3 * trait1 + beta_4 * trait2 + beta_5 * envi1 * trait1 + beta_6 * envi1 * trait2 + rnorm(nspp * nsite, 0, 1)) %>% mutate(sp = factor(sp, levels = paste0("sp", 1:nspp)), site = factor(site, levels = paste0("site", 1:nsite))) %>% as.tibble() list(dat = dat, phy = phy) } d = sim_dat() z.phy = phyr::communityPGLMM(abund ~ 1 + envi1 * trait1 + (1|sp__) + (envi1|sp__) + (1|site), data = d$dat, tree = d$phy, family = "gaussian", REML = FALSE) z.phy$ss z.no.interaction = phyr::communityPGLMM(abund ~ 1 + envi1 + trait1 + (1|sp__) + (envi1|sp__) + (1|site), data = d$dat, tree = d$phy, family = "gaussian", REML = FALSE) microbenchmark::microbenchmark(communityPGLMM.predicted.values(x, cpp = TRUE), communityPGLMM.predicted.values(x, cpp = FALSE), times = 20)
library(lme4) library(phyr) nsite <- 10 nsp <- 20 n <- nsp*nsite set.seed(12345) d <- data.frame(x1=0, x2=0, y=0, sp=rep(1:nsp, times=nsite), site=rep(1:nsite, each=nsp)) d$sp <- as.factor(d$sp) d$site <- as.factor(d$site) b1 <- 1 b2 <- 0 sd1 <- 1.5 d$x1 <- rnorm(n=n) d$x2 <- rnorm(n=n) d$y <- b1 * d$x1 + b2 * d$x2 + rep(rnorm(n=nsp, sd=sd1), each=nsite) + rep(rnorm(n=nsite, sd=sd1), times=nsp) + rnorm(n=n) d$p <- d$y > 0 fm = as.formula("p ~ x1 + (1 | sp)") # fm = as.formula("p ~ x1 + (1 | sp) + (1|site)") z_lmer_bi <- glmer(fm, data=d, family="binomial") z_phyr_bi <- communityPGLMM(fm, data=d, REML = FALSE, family="binomial") summary(z_lmer_bi) z_phyr_bi z_lmer_ga = lmer(fm, data=d) z_phyr_ga = communityPGLMM(fm, data = d) summary(z_lmer_ga) z_phyr_ga # fitted round(unname(fitted(z_lmer_ga)) - communityPGLMM.predicted.values(z_phyr_ga)$Y_hat, 3) plot(unname(fitted(z_lmer_ga)), communityPGLMM.predicted.values(z_phyr_ga)$Y_hat) abline(a = 0, b = 1) # almost there round(unname(predict(z_lmer_bi)) - communityPGLMM.predicted.values(z_phyr_bi)$Y_hat, 3) plot(unname(predict(z_lmer_bi)), communityPGLMM.predicted.values(z_phyr_bi)$Y_hat) abline(a = 0, b = 1) # close unname(fitted(z_lmer_bi)) == z_lmer_bi@resp$mu # true plot(z_phyr_bi$mu, z_lmer_bi@resp$mu) abline(a = 0, b = 1) # close unname(fitted(z_lmer_ga)) == z_lmer_ga@resp$mu # true; == communityPGLMM.predicted.values(z_phyr_ga)$Y_hat # residuals residuals(z_phyr_ga, type = "deviance") plot(residuals(z_lmer_ga, type = "response"), residuals(z_phyr_ga, type = "response")) abline(a = 0, b = 1) # close plot(residuals(z_lmer_bi, type = "response"), residuals(z_phyr_bi, type = "response")) abline(a = 0, b = 1) # close plot(residuals(z_lmer_bi, type = "deviance"), residuals(z_phyr_bi, type = "deviance")) abline(a = 0, b = 1) # close # predictions pred_lmer <- predict(z_lmer) logit = make.link("logit")$linkfun plot(logit(fitted(z_lmer)), pred_lmer) # same pred_phyr <- communityPGLMM.predicted.values(mod)$Y_hat # XB + b plot(pred_lmer, pred_phyr) # same plot(mod$mu, fitted(z_lmer)) # same plot(logit(mod$mu), pred_phyr) plot(fitted(z_lmer), z_lmer@resp$mu) # same hat_phyr = rr2::inv.logit(pred_phyr) hat_lmer = predict(z, re.form = NULL) hat_lmer = rr2::inv.logit(hat_lmer) resi_lmer = residuals(z, type="pearson") plot(resi_lmer, d$p - hat_lmer) plot(resid(z), d$p - hat_lmer) plot(resid(z), (d$p - mod$mu)/sqrt(mod$mu * (1 - mod$mu))) # hum... plot(resid(z), (d$p - hat_phyr)/sqrt(hat_phyr * (1 - hat_phyr))) # hum... plot(resi_lmer, (d$p - hat_lmer)/sqrt(hat_lmer * (1 - hat_lmer))) # hum... plot(pred_phyr, fitted(z)) plot(fitted(z), predict(z)) plot(resid(z), (d$p - fitted(z))/sqrt(fitted(z) * (1 - fitted(z)))) plot(resid(z), d$p - pred_lmer) plot(resid(z), rr2::inv.logit(mod$H[, 1])) # hum... plot(resid(z), mod$H[, 1]) # hum... plot(make.link("logit")$linkfun(mod$mu), residuals(z)) plot(d$p - d$mod, residuals(z)) plot(make.link("logit")$linkfun(mod$mu), d$p - d$mod) with(d, plot(z, mod)) z1 = lmer(y ~ x1 + (1 | sp), data=d) z1_phyr = communityPGLMM(y ~ x1 + (1|sp), data = d) summary(z1) z1_phyr # fitted values plot(fitted(z1), predict(z1)) # same pred_phyr <- communityPGLMM.predicted.values(z1_phyr, gaussian.pred = "nearest_node")$Y_hat plot(predict(z1), pred_phyr) # good plot(predict(z1), communityPGLMM.predicted.values(z1_phyr, gaussian.pred = "tip_rm")$Y_hat) # different way # residuals plot(residuals(z1_lmer, type = "response"), residuals(z1_phyr, type = "response")) resi_lmer = resid(z1) resi_phyr = z1_phyr$H[,1] plot(resi_phyr, resi_lmer) # err... plot(d$y - predict(z1), resi_lmer) # same plot(d$y - pred_phyr, resi_lmer) # good # so why z1_phyr$H is not exactly the residual? Because it does not account for random terms plot(resi_phyr, d$y - predict(z1, re.form = NA))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.