#-----------------------------------------------------
#' Run the modern (calibration) model
#'
#' @param modern_elevation A dataframe of modern elevations
#' @param modern_species A dataframe of modern counts (to be sorted with \code{\link{sort_modern}})
#' @param scale_x Set to TRUE to scale elevation data to have mean 0 and sd 1
#' @param sigma_z_priors priors for foram variability (if available)
#' @param dx The elevation interval for spacing the spline knots. Defaults to 0.2
#' @param ChainNums The number of MCMC chains to run
#' @param n.iter The number of iterations
#' @param n.burnin The number of burnin samples
#' @param n.thin The number of thinning
#' @param validation.run Defaults to FALSE. Set to TRUE if running a validation
#' @param fold Fold number for cross validation (CV)
#'
#' @return a list of objects including data, parameter values and scaling information
#' @export
#'
#' @examples
#' \donttest{
#' test_modern_mod <- run_modern(
#' modern_elevation = NJ_modern_elevation,
#' modern_species = NJ_modern_species,
#' n.iter = 10,
#' n.burnin = 1,
#' n.thin = 1
#' )
#' }
#'
run_modern <- function(modern_elevation = NULL,
modern_species = NULL,
scale_x = FALSE,
sigma_z_priors = NULL,
dx = 0.1,
ChainNums = seq(1, 3),
n.iter = 40000,
n.burnin = 10000,
n.thin = 15,
validation.run = FALSE,
fold = 1) {
# read in the modern data
if (!is.null(modern_species)) {
modern_dat <- modern_species
} else {
modern_dat <- BTFr::NJ_modern_species
}
# get the sorted (by species counts) modern data
modern_data_sorted <- sort_modern(modern_dat)
species_names <- modern_data_sorted$species_names
# read in the elevation data
if (!is.null(modern_elevation)) {
elevation_dat <- modern_elevation
} else {
elevation_dat <- BTFr::NJ_modern_elevation
}
# apply scaling if specified
if (scale_x) {
modern_elevation <- scale(elevation_dat$SWLI)
scale_att <- attributes(modern_elevation)
}
if (!scale_x) {
modern_elevation <- as.matrix(elevation_dat$SWLI / 100)
scale_att <- NULL
}
# run validation if specified
if (validation.run) {
set.seed(3847)
K <- 10
folds <- rep(1:K, ceiling(nrow(modern_data_sorted$moderndat_sorted) / K))
folds <- folds[sample(1:length(modern_elevation))]
test_samps <- which(folds == fold)
test_samps
y <- modern_data_sorted$moderndat_sorted[-test_samps, ]
x <- modern_elevation[-test_samps, 1]
y_test <- tibble::as_tibble(modern_data_sorted$moderndat_sorted[test_samps, ])
x_test <- tibble::as_tibble(modern_elevation[test_samps, 1])
}
if (!validation.run) {
y <- modern_data_sorted$moderndat_sorted
x <- modern_elevation[, 1]
y_test <- NULL
x_test <- NULL
}
# Get min/max elevations (will be used with priors)
elevation_min <- floor(min(modern_elevation))
elevation_max <- ceiling(max(modern_elevation))
# Get index for the first species (if any) that has all zero counts
begin0 <- modern_data_sorted$begin0
# Total species counts
N_count <- apply(y, 1, sum)
# Regular B Splines Create some basis functions
res <- bbase(x, xl = elevation_min, xr = elevation_max, dx = dx) # This creates the basis function matrix
B.ik <- res$B.ik
K <- dim(B.ik)[2]
D <- 1
Delta.hk <- diff(diag(K), diff = D)
Deltacomb.kh <- t(Delta.hk) %*% solve(Delta.hk %*% t(Delta.hk))
Z.ih <- B.ik %*% Deltacomb.kh
H <- dim(Z.ih)[2]
# Prior specifications
if (is.null(sigma_z_priors)) {
mean_sigma_z <- rep(0, ncol(y))
sd_sigma_z <- rep(1, ncol(y))
}
if (!is.null(sigma_z_priors)) {
species_prior <- sigma_z_priors$species
mean_sigma_z <- rep(0, ncol(y))
sd_sigma_z <- rep(2, ncol(y))
match_index <- match(species_names, species_prior)[1:length(species_prior)]
mean_sigma_z[1:length(species_prior)] <- sigma_z_priors$mean_sigma_overall[match_index]
sd_sigma_z[1:length(species_prior)] <- sigma_z_priors$sd_sigma_overall[match_index]
}
# Jags model data
pars <- c("p", "beta.j", "sigma.z", "sigma.delta", "delta.hj", "spline")
data <- list(
y = y,
n = nrow(y),
m = ncol(y),
N_count = N_count,
H = H,
Z.ih = Z.ih,
begin0 = begin0,
mean_sigma_z = mean_sigma_z,
sd_sigma_z = sd_sigma_z
)
# run the model
temp_files <- rep(NA, length(ChainNums))
for (chainNum in ChainNums) {
cat(paste("Start chain ID ", chainNum), "\n")
run <- InternalRunOneChain(
chainNum = chainNum, jags_data = data,
jags_pars = pars, n.burnin = n.burnin, n.iter = n.iter,
n.thin = n.thin
)
temp_files[chainNum] <- run$file
}
# Get model output needed for the core run
data[["x"]] <- x
data[["y_test"]] <- y_test
data[["x_test"]] <- x_test
jags_data <- list(
data = data,
pars = pars,
elevation_max = elevation_max,
elevation_min = elevation_min,
dx = dx,
species_names = species_names,
x_center = scale_att$`scaled:center`,
x_scale = scale_att$`scaled:scale`,
temp_files = temp_files
)
# create the core input
core_input <- internal_get_core_input(
ChainNums = ChainNums,
jags_data = jags_data,
scale_x = scale_x
)
# Update jags_data list
modern_out <- list(
data = data,
pars = pars,
elevation_max = elevation_max,
elevation_min = elevation_min,
dx = dx,
species_names = species_names,
delta0.hj = core_input$delta0.hj,
delta0_sd = core_input$delta0_sd,
beta0.j = core_input$beta0.j,
beta0_sd = core_input$beta0_sd,
sig0_z = core_input$sig0_z,
tau.z0 = core_input$tau.z0,
src_dat = core_input$src_dat,
scale_x = scale_x,
x_center = scale_att$`scaled:center`,
x_scale = scale_att$`scaled:scale`
)
class(modern_out) <- "BTFr"
invisible(modern_out)
}
#-----------------------------------------------------
InternalRunOneChain <- function(chainNum, jags_data, jags_pars, n.burnin,
n.iter, n.thin) {
set.seed.chain <- chainNum * 209846
jags.dir <- tempdir()
set.seed(set.seed.chain)
temp <- stats::rnorm(1)
# The model for the modern data
modernmodel <- "
model
{
for(i in 1:n)
{
for(j in begin0:m){
lambda[i,j] <- 1
}
for(j in 1:(begin0-1)){
spline[i,j] <- beta.j[j] + inprod(Z.ih[i,],delta.hj[,j])
z[i,j] ~ dnorm(spline[i,j],tau.z[j])
lambda[i,j] <- exp(z[i,j])
}#End j loop
y[i,] ~ dmulti(p[i,],N_count[i])
lambdaplus[i] <- sum(lambda[i,])
}#End i loop
###Get p's for multinomial
for(i in 1:n){
for(j in 1:m){
p[i,j] <- lambda[i,j]/lambdaplus[i]
}#End j loop
}#End i loop
#####Spline parameters#####
#Coefficients
for(j in 1:(begin0-1)){
for (h in 1:H)
{
delta.hj[h,j] ~ dnorm(0, tau.delta)
}
}
#Smoothness
tau.delta<-pow(sigma.delta,-2)
sigma.delta~dt(0, 2^-2, 1)T(0,)
###Variance parameter###
for(j in 1:(begin0-1)){
tau.z[j] <- pow(sigma.z[j],-2)
sigma.z[j] ~ dt(mean_sigma_z[j], sd_sigma_z[j]^-2, 1)T(0,)
###Intercept (species specific)
beta.j[j] ~ dt(0,100^-2,1)
}
}##End model
"
mod <- jags(
data = jags_data, parameters.to.save = jags_pars, model.file = textConnection(modernmodel),
n.chains = 1, n.iter = n.iter, n.burnin = n.burnin, n.thin = n.thin,
DIC = FALSE, jags.seed = set.seed.chain
)
mod_upd <- mod
temp.jags.file <- tempfile(paste0("jags_mod", chainNum), jags.dir, ".Rdata")
save(mod_upd, file = temp.jags.file)
cat(paste("Hooraah, Chain", chainNum, "has finished!"), "\n")
return(list(file = temp.jags.file))
}
#-----------------------------------------------------
internal_get_core_input <- function(ChainNums, jags_data, scale_x = FALSE) {
mcmc.array <- ConstructMCMCArray(
ChainIDs = ChainNums,
temp_files = jags_data$temp_files
)
n_samps <- dim(mcmc.array)[1]
######### For Splines #########
# This creates the components for the basis function matrix
xl <- jags_data$elevation_min
xr <- jags_data$elevation_max
begin0 <- jags_data$data$begin0
deg <- 3
dx <- jags_data$dx
knots <- seq(xl - deg * dx, xr + deg * dx, by = dx)
n_knots <- length(knots)
D <- diff(diag(n_knots), diff = deg + 1) / (gamma(deg + 1) * dx^deg)
K <- dim(D)[1]
Dmat <- 1
Delta.hk <- diff(diag(K), diff = Dmat) # difference matrix
Deltacomb.kh <- t(Delta.hk) %*% solve(Delta.hk %*% t(Delta.hk))
########## Get parameter estimates ##########
# Data
y <- jags_data$data$y
n <- nrow(y)
m <- ncol(y)
x <- jags_data$data$x
species_names <- jags_data$species_names
# Parameters
delta.hj_samps <- array(NA, c(n_samps, jags_data$data$H, (begin0 - 1)))
beta.j_samps <- sigma.z_samps <- array(NA, c(n_samps, (begin0 - 1)))
for (j in 1:(begin0 - 1))
{
for (h in 1:jags_data$data$H)
{
parname <- paste0("delta.hj[", h, ",", j, "]")
delta.hj_samps[, h, j] <- mcmc.array[1:n_samps, sample(ChainNums, 1), parname]
}
parname <- paste0("beta.j[", j, "]")
beta.j_samps[, j] <- mcmc.array[1:n_samps, sample(ChainNums, 1), parname]
}
for (j in 1:(begin0 - 1))
{
parname <- paste0("sigma.z[", j, "]")
sigma.z_samps[, j] <- mcmc.array[1:n_samps, sample(ChainNums, 1), parname]
}
delta0.hj <- apply(delta.hj_samps, 2:3, mean)
delta0_sd <- apply((apply(delta.hj_samps, 2:3, stats::sd)), 2, stats::median)
beta0.j <- apply(beta.j_samps, 2, mean)
beta0_sd <- apply(beta.j_samps, 2, stats::sd) %>% stats::median()
sig0_z <- apply(sigma.z_samps, 2, mean)
sigma.z0 <- rep(NA, (begin0 - 1))
for (i in 1:(begin0 - 1))
{
sigma.z0[i] <- delta0_sd[i] + sig0_z[i]
}
tau.z0 <- 1 / (sigma.z0^2)
# SRCs
p_star <- p_star_all <- spline_star <- z_star <- spline_star_all <- array(
NA,
c(n_samps, length(x), m)
)
for (i in 1:n_samps) {
for (j in begin0:m) {
spline_star_all[i, , j] <- 0
}
for (j in 1:(begin0 - 1)) {
for (k in 1:length(x)) x.index <- seq(1:length(x))
spline_star_all[i, , j] <- exp(mcmc.array[i, sample(ChainNums, 1), paste0("spline[", x.index, ",", j, "]")])
}
}
for (i in 1:n_samps) {
for (j in 1:m) {
p_star_all[i, , j] <- spline_star_all[i, , j] / apply(spline_star_all[i, , ], 1, sum)
}
}
# Get predicted values
pred_pi_mean <- apply(p_star_all, 2:3, mean)
pred_pi_high <- apply(p_star_all, 2:3, "quantile", 0.975)
pred_pi_low <- apply(p_star_all, 2:3, "quantile", 0.025)
if (scale_x) {
df <- data.frame((x * jags_data$x_scale) + jags_data$x_center, pred_pi_mean)
df_low <- data.frame((x * jags_data$x_scale) + jags_data$x_center, pred_pi_low)
df_high <- data.frame((x * jags_data$x_scale) + jags_data$x_center, pred_pi_high)
}
if (!scale_x) {
df <- data.frame(x * 100, pred_pi_mean)
df_low <- data.frame(x * 100, pred_pi_low)
df_high <- data.frame(x * 100, pred_pi_high)
}
colnames(df) <- c("SWLI", species_names)
colnames(df_low) <- c("SWLI", species_names)
colnames(df_high) <- c("SWLI", species_names)
df_long <- df %>% tidyr::pivot_longer(
names_to = "species", values_to = "proportion",
-SWLI
)
df_low_long <- df_low %>% tidyr::pivot_longer(
names_to = "species", values_to = "proportion_lwr",
-SWLI
)
df_high_long <- df_high %>% tidyr::pivot_longer(
names_to = "species", values_to = "proportion_upr",
-SWLI
)
src_dat <- df_long %>%
dplyr::mutate(proportion_lwr = df_low_long %>%
dplyr::pull(proportion_lwr), proportion_upr = df_high_long %>%
dplyr::pull(proportion_upr)) %>%
dplyr::arrange(SWLI)
return(list(
delta0.hj = delta0.hj,
delta0_sd = delta0_sd,
beta0.j = beta0.j,
beta0_sd = beta0_sd,
sig0_z = sig0_z,
tau.z0 = tau.z0,
src_dat = src_dat
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.