lump.split.pool.ENM: Comparing different ENM approaches

Documented in fit_bayesmix fit_lump fit_mixed fit_pglmm fit_split

#' Fit split model
#'
#' @param simdata List generated by \code{\link{simul_data}}.
#'
#' @return A data frame with 4 columns (intercept estimate and standard error, and slope estimate and standard error), and as many rows as taxa.
#' @export
#' @import dplyr

fit_split <- function(simdata) {

  df <- simdata$data2model

  split.model <- df %>%
    group_by(taxon) %>%
    do(split.m = glm(presabs ~ env, data = ., family = binomial)) %>%
    summarise(interc.split = coef(split.m)[1],
              interc.split.se = arm::se.coef(split.m)[1],
              slope.split = coef(split.m)[2],
              slope.split.se = arm::se.coef(split.m)[2])

  split.model

}


#' Fit lump model
#'
#' @param simdata List generated by \code{\link{simul_data}}.
#'
#' @return A data frame with 4 columns (intercept estimate and standard error, and slope estimate and standard error), and as many rows as taxa.
#' @export
#' @import dplyr

fit_lump <- function(simdata) {

  df <- simdata$data2model

  lump.model <- glm(presabs ~ env, data = df, family = binomial)
  lump.params <- broom::tidy(lump.model)
  interc <- lump.params[lump.params$term == "(Intercept)", 2:3]
  slope <- lump.params[lump.params$term == "env", 2:3]
  lump.pars <- data.frame(interc, slope)
  names(lump.pars) <- c("interc.lump", "interc.lump.se", "slope.lump", "slope.lump.se")


  lump.df <- lump.pars
  for (i in 2:length(unique(df$taxon))) {
    lump.df <- bind_rows(lump.df, lump.pars)
  }

  lump.df

}



#' Fit mixed model (lme4)
#'
#' @param simdata List generated by \code{\link{simul_data}}.
#'
#' @return A data frame with 4 columns (intercept estimate and standard error, and slope estimate and standard error), and as many rows as taxa.
#' @export
#' @import dplyr

fit_mixed <- function(simdata) {

  df <- simdata$data2model

  mixed.model <- lme4::glmer(presabs ~ env + (1 + env | taxon), data = df, family = binomial)
  mixed.params <- broom::tidy(mixed.model, effects = "ran_modes")
  intercs <- mixed.params[mixed.params$term == "(Intercept)", 4:5]
  slopes <- mixed.params[mixed.params$term == "env", 4:5]
  mixed.pars <- data.frame(intercs, slopes)
  names(mixed.pars) <- c("interc.mixed", "interc.mixed.se", "slope.mixed", "slope.mixed.se")
  mixed.pars

}



#' Fit Bayesian mixed model (brms)
#'
#' @param simdata List generated by \code{\link{simul_data}}.
#' @param delta adapt_delta parameter for brm
#'
#' @return A data frame with 4 columns (intercept estimate and standard error, and slope estimate and standard error), and as many rows as taxa.
#' @export
#' @import brms

fit_bayesmix <- function(simdata, delta = 0.95) {

  df <- simdata$data2model

  bayesmix <- brm(
    presabs ~ env + (1 + env | taxon),
    data = df,
    family = bernoulli(),
    #cov_ranef = list(taxa = A),
    prior = c(
      prior(student_t(3, 0, 10), class = "Intercept"),
      prior(normal(0, 1), class = "b"),
      prior(student_t(3, 0, 5), class = "sd")),  # sd of group random effect
    chains = 3, cores = 3, iter = 2000,
    #control = list(adapt_delta = delta),
    #sample_prior = "only",
    file = "bayesmix"
  )

  bayesmix <- update(bayesmix,
                     newdata = df,
                     chains = 3, cores = 3, iter = 2000,
                     control = list(adapt_delta = delta))

  #summary(bayesmix)

  bayesmix.params <- data.frame(coef(bayesmix)$taxon[, , "Intercept"][, 1:2],
                             coef(bayesmix)$taxon[, , "env"][, 1:2])
  names(bayesmix.params) <- c("interc.bayesmix", "interc.bayesmix.se", "slope.bayesmix", "slope.bayesmix.se")

  bayesmix.params

}




#' Fit Bayesian phylogenetic mixed model (brms)
#'
#' @param simdata List generated by \code{\link{simul_data}}.
#' @param delta adapt_delta parameter for brm
#'
#' @return A data frame with 2 columns (intercept and slope estimate), and as many rows as taxa.
#' @export
#' @import brms

fit_pglmm <- function(simdata, delta = 0.95) {

  phy <- simdata$phylog
  inv.phylo <- MCMCglmm::inverseA(phy, nodes = "TIPS", scale = TRUE)
  A <- solve(inv.phylo$Ainv)
  rownames(A) <- rownames(inv.phylo$Ainv)

  df <- simdata$data2model
  df$phylo <- df$taxon


  if (!file.exists("pglmm.rds")) {

    pglmm <- brm(
      #presabs ~ env + (1 + env | taxon),
      presabs ~ env + (1 + env | taxon) + (1 + env | phylo),
      data = df,
      family = bernoulli(),
      #cov_ranef = list(taxon = A),
      cov_ranef = list(phylo = A),
      prior = c(
        prior(student_t(3, 0, 10), class = "Intercept"),
        prior(normal(0, 1), class = "b"),
        prior(student_t(3, 0, 5), class = "sd")),  # sd of group random effect
      chains = 3, cores = 3, iter = 2000,
      #control = list(adapt_delta = delta),
      #sample_prior = "only",
      file = "pglmm"
    )

  } else {

    pglmm <- brm(
      #presabs ~ env + (1 + env | taxon),
      presabs ~ env + (1 + env | taxon) + (1 + env | phylo),
      data = df,
      family = bernoulli(),
      #cov_ranef = list(taxon = A),
      cov_ranef = list(phylo = A),
      prior = c(
        prior(student_t(3, 0, 10), class = "Intercept"),
        prior(normal(0, 1), class = "b"),
        prior(student_t(3, 0, 5), class = "sd")),  # sd of group random effect
      chains = 3, cores = 3, iter = 2000,
      #control = list(adapt_delta = delta),
      #sample_prior = "only",
      file = "pglmm"
    )

    # this seems to give error when changing number of taxa (unless recompiling)
    pglmm <- update(pglmm,
                    newdata = df,
                    #cov_ranef = list(taxon = A),
                    cov_ranef = list(phylo = A),
                    #recompile = TRUE,
                    chains = 3, cores = 3, iter = 2000,
                    control = list(adapt_delta = delta))

  }




  pglmm.params <- data.frame(fixef(pglmm)[1, 1] +
                               ranef(pglmm)$taxon[, , "Intercept"][, 1] +
                               ranef(pglmm)$phylo[, , "Intercept"][, 1],
                             fixef(pglmm)[2, 1] +
                               ranef(pglmm)$taxon[, , "env"][, 1] +
                               ranef(pglmm)$phylo[, , "env"][, 1])

  #names(pglmm.params) <- c("interc.pglmm", "interc.pglmm.se", "slope.pglmm", "slope.pglmm.se")
  names(pglmm.params) <- c("interc.pglmm", "slope.pglmm")

  pglmm.params

}