#' Simulate data from the multi-species, misclassification count model for a single season.
#'
#' @description This function simulates data from he multi-species, misclassification count occupancy model first developed by
#' \href{https://besjournals.onlinelibrary.wiley.com/doi/abs/10.1111/2041-210X.13315}{Wright et al. (2020)}.
#'
#'@details This function simulates data from the single season, multiple
#' species occupancy model to estimate occupancy and relative activity for multiple species simultaneously. If `rand_visits
#' = TRUE`, each site is visited a random number of times between two and
#' `max_j`. Covariates are drawn from the uniform(0, 1.5) distribution so
#' that the effect of the direction of each regression coefficient is
#' intuitive. Note that if covariates are not desired, `beta_psi` and
#' `beta_lambda` can be set to intercepts that generate the desired derived
#' probabilities.
#'
#' @param K number of species
#' @param M number of sites
#' @param M.conf number of sites that have confirmed observations
#' @param max_j maximum number of visits to each site. If `rand_visits =
#' FALSE`, this value is the number of visits to each site. If
#' `rand_visits = TRUE`, each site is visited a random
#' (`sample(2:max_j, size = 1)`) number of times.
#' @param beta_psi vector of regression coefficients used to generate psi
#' @param beta_lambda vector of regression coefficients used to generate lambda
#' @param seed optional seed for reproducibility
#'@param rand_visits logical; should each site be visited a random number of
#' times? See details.
#' @param theta confusion matrix for automatic call classifications. Rows of theta should sum to one, diagonal elements are correct identification probabilities, first row corresponds to probabilities for calls truly from species 1.
#'
#' @return object of class `list` containing the following elements: \cr
#' * `beta_psi` vector of regression coefficients used to generate psi
#' * `beta_lambda` vector of regression coefficients used to generate lambda
#' * `psi_cov` matrix of site level covariates
#' * `lambda_cov` array of detection level covariates; each slice represents a
#' single covariate
#' * `psi` vector of derived site level occupancy probabilities
#' * `lambda` matrix of derived visit level expected count detection parameter
#' * `z` matrix of latent occupancy states for each site/species
#' * `Y` array of observed Bernoulli responses for site/visit/species
#' * `n_visits` vector of number of visits to each site
#' * `c.conf` how calls confirmed to species K were classified for confirmed visits
#' * `c.sum.a` and `c.sum.b` unconfirmed calls classified TO species K from sites with some confirmations ("a") or no confirmations ("b")
#' * `sites.conf` site locations with confirmations
#' * `sites.unconf.a` and `sites.unconf.b` site locations without confirmations ("a" indecates some observations at these sites were confirmed, "b" sites had no confirmations)
#' * `data` a data frame containing all information necessary to fit the
#' model
#' @export
#'
#' @importFrom stats runif rbinom rmultinom
#'
#' @example examples/sim_MSMocc_ex.R
#'
sim_MSMocc <- function(K = 2, M = 20, M.conf = 2, max_j = 10,
beta_psi = matrix(c(0, 1, 0, 1.2), ncol = K),
beta_lambda = matrix(c(0, 1, 0, 1.2), ncol = K),
theta = matrix(c(0.8, 0.2, 0.2, 0.8), byrow = T, ncol = K),
seed = NULL, rand_visits = TRUE) {
# check that number sites with confirmed observations is less than total number of sites
if (M.conf > M) stop("The number of sites with confirmed observations (M.conf) must be less than or equal to the total number of sites (M).")
# create out vector
out <- list()
# optional seed
if (!is.null(seed)) set.seed(seed)
# convenience
inv.logit <- function(x) exp(x) / (1 + exp(x))
# double check betas; separate into list by species
beta_psi <- as.matrix(beta_psi, ncol = K)
beta_lambda <- as.matrix(beta_lambda, ncol = K)
# dimension of regression coefficients
# assumption: all species have same number of coefficients
p_beta_psi <- nrow(beta_psi)
p_beta_lambda <- nrow(beta_lambda)
# generate covariates
## psi
if (p_beta_psi == 1) {
psi_cov <- matrix(rep(1, M), ncol = 1)
} else{
psi_cov <- cbind(
rep(1, M),
matrix(
stats::runif((p_beta_psi - 1) * M, 0, 1.5), ncol = p_beta_psi - 1
)
)
}
# column names
if (p_beta_psi == 1) {
colnames(psi_cov) <- "psi_int"
} else {
colnames(psi_cov) <- c("psi_int", paste0("psi_cov", 1:(ncol(psi_cov) - 1)))
}
# generate psi for each species (one column per species)
psi <- inv.logit(psi_cov %*% beta_psi)
# lambda
lambda_cov <- array(0, dim = c(M, max_j, p_beta_lambda))
lambda_linpred <- array(0, dim = c(M, max_j, K))
for (k in 1:p_beta_lambda) {
if (k == 1) {
lambda_cov[, , k] <- 1
} else {
lambda_cov[, , k] <- stats::runif(n = M * max_j, 0, 1.5)
}
for (i in 1:K) {
lambda_linpred[, , i] <- lambda_linpred[,, i] + beta_lambda[k, i] * lambda_cov[, , k]
}
}
# names
if (p_beta_lambda == 1) {
dimnames(lambda_cov)[[3]] <- "lambda_int"
} else {
dimnames(lambda_cov)[[3]] <- c("lambda_int", paste0("lambda_cov", 1:(dim(lambda_cov)[3] - 1)))
}
# generate lambda
lambda <- array(0, dim = c(M, max_j, K))
for (i in 1:K) {
lambda[ , , i] <- inv.logit(lambda_linpred[, , i])
}
# generate latent occupancy for each species
# z <- list()
# for (i in 1:K) {
z <- apply(psi, c(1,2), function(x) stats::rbinom(1, 1, x))
# }
# generate responses
Y <- array(0, dim = c(M, max_j, K))
for (k in 1:K) {
for (i in 1:M) {
for (j in 1:max_j) {
Y[i, j, k] <- z[i,k] * stats::rpois(1, lambda = lambda[i, j, k])
}
}
}
# add in NAs if rand_visits
if (rand_visits) {
n_visits <- sample(c(2:(max_j)), size = M, replace = TRUE)
for (i in 1:M) {
if (n_visits[i] == max_j) {
NULL
} else {
# response
Y[i, (n_visits[i] + 1):max_j, ] <- NA
# covariates
lambda_cov[i, (n_visits[i] + 1):max_j, ] <- NA
# derived parameters
lambda[i, (n_visits[i] + 1):max_j, ] <- NA
}
}
out$n_visits <- apply(Y, 1, function(x) length(which(!is.na(x)))/K)
}
if (!rand_visits) out$n_visits <- rep(max_j, M)
# generate classifications
c <- array(NA, dim = c(M * max_j, K, K))
for (k in 1:K) {
c[, ,k] <- t(sapply(as.vector(Y[, , k]),
function(x) {
if (is.na(x) == T) {
return(rep(NA, K))
}else {rmultinom(1, x, theta[k, ])}
}))
}
#total number of observations
n.obs <- sum(out$n_visits)
#total number of observations at sites with (some) confirmed observations
n.obs.conf <- sum(out$n_visits[1:M.conf])
#number of confirmed observations per site with confirmed observations
n.conf <- 2
n.unconf <- out$n_visits[1:M.conf] - n.conf
keep.conf <- vector()
for (i in 1:M.conf) {
keep.conf <- c(keep.conf, c(rep(TRUE, n.conf), rep(FALSE, n.unconf[i])))
}
# c.conf[obs, species (how classified), species (which confirmed)] is how calls confirmed to each species were classified for confirmed visits
c.conf <- c[1:n.obs.conf, , ][keep.conf, ,]
# same as above but for unconfirmed visits
c.unconf <- c[1:n.obs.conf, ,][!keep.conf, ,]
# c.sum.a[unconfirmed calls (at sites with confirmed calls), species] is the unconfirmed calls classified TO species k from sites that had some confirmations
# c.sum.b is counts of unconfirmed calls to species k from sites with no confirmations
c.sum.a <- matrix(NA, ncol = K, nrow = dim(c.unconf)[1])
c.sum.b <- matrix(NA, ncol = K, nrow = length((n.obs.conf + 1):n.obs))
for (i in 1:K) {
c.sum.a[, i] <- apply(c.unconf[, i, ], 1, sum, na.rm = T)
c.sum.b[, i] <- apply(c[(n.obs.conf + 1):n.obs, i, ], 1, sum, na.rm = T)
}
# all site variables just keep track of which sites correspond to each set of observations
sites.conf <- vector()
for (i in 1:M.conf) {
sites.conf <- sites.unconf <- c(sites.conf, rep(i, out$n_visits[i]))
}
sites.conf <- sites.conf[keep.conf]
sites.unconf.a <- sites.unconf[!keep.conf]
sites.unconf.b <- vector()
for (i in (M.conf + 1):M) {
sites.unconf.b <- c(sites.unconf.b, rep(i, out$n_visits[i]))
}
# return
out$beta_psi <- beta_psi
out$beta_lambda <- beta_lambda
out$psi_cov <- psi_cov
out$lambda_cov <- lambda_cov
out$psi <- psi
out$lambda <- lambda
out$z <- z
out$Y <- Y
out$c.conf = c.conf
out$c.sum.a = c.sum.a
out$c.sum.b = c.sum.b
out$sites.conf = sites.conf
out$sites.unconf.a = sites.unconf.a
out$sites.unconf.b = sites.unconf.b
# create data compatible with occ_mod
# data <- list(
data = data.frame(
site = rep(1:M, out$n_visits),
visit = unlist(c(sapply(out$n_visits, function(x) 1:x)))
)
# )
# multiple species data
for (i in 1:K) {
data <- cbind(data, stats::na.omit(c(t(out$Y[,,i]))))
}
names(data)[-c(1:2)] <- paste("species", 1:K)
# site covariates
tmp <- out$psi_cov
tmp[,1] <- 1:nrow(tmp);colnames(tmp)[1] <- "site"
tmp <- as.data.frame(tmp)
data <- dplyr::left_join(data, tmp, by = "site")
# detection covariates
tmp <- as.data.frame(apply(out$lambda_cov, 3, function(x) stats::na.omit(c(t(x)))))
tmp$site <- data$site
tmp$visit <- data$visit
tmp <- tmp[,-which(names(tmp) == "lambda_int")]
data <- dplyr::left_join(data, tmp, by = c("site", "visit"))
# export
out$data <- data
return(out)
}
#' Title
#'
#' @param occupancy
#' @param detection
#' @param data
#' @param nspecies
#' @param niter
#' @param nchains
#' @param seed
#' @param save_model
#' @param model_name
#'
#' @return
#' @export
#'
#' @examples
#'
#' @importFrom rlang .data
#'
MSMocc_mod <- function(occupancy, detection, data, K, species.names,
niter = 1000, nchains = 3, seed = NULL,
save_model = FALSE, model_name = paste0("MSMocc_model_", Sys.Date()),
beta_prior = "dnorm(0, 1/2)"){
# convenience
`%notin%` <- Negate("%in%")
# check names on data
if ("site" %notin% names(data)) stop("Data must contain column named 'site'")
if ("visit" %notin% names(data)) stop("Data must contain column named 'visit'")
# if ("y" %notin% names(data)) stop("Data must contain column named 'y'")
for (i in 1:K) {
if (species.names[i] %notin% names(data)) stop("Data must contain a column for each species matching provided species names.")
}
# model formulas
occupancy_mod <- stats::as.formula(occupancy)
detection_mod <- stats::as.formula(detection)
# model matrices
## occupancy
occupancy_mod_matrix <- stats::model.matrix(object = occupancy_mod, data = data)
occ_num_params <- ncol(occupancy_mod_matrix)
occupancy_mod_matrix <- cbind(
occupancy_mod_matrix, matrix(data$site, ncol = 1, dimnames = list(NULL, "site_"))
) %>% unique(.)
if (nrow(occupancy_mod_matrix) != length(unique(data$site))) stop("There are more unique site-level covariate values than there are sites.")
if (occ_num_params == 1) {
occupancy_mod_matrix <- as.matrix(occupancy_mod_matrix[,-which(colnames(occupancy_mod_matrix) == "site_")], ncol = 1)
colnames(occupancy_mod_matrix) <- "(Intercept)"
} else{
occupancy_mod_matrix <- occupancy_mod_matrix[,-which(colnames(occupancy_mod_matrix) == "site_")]
}
## detection
detection_mod_matrix <- stats::model.matrix(object = detection_mod, data = data)
## number of sites (M) and visits (n_visits)
M <- length(unique(data$site))
n_visits <- data %>%
dplyr::select(.data$site, .data$visit) %>%
dplyr::group_by(.data$site) %>%
dplyr::arrange(dplyr::desc(.data$visit)) %>%
dplyr::slice(1) %>%
dplyr::ungroup() %>%
dplyr::pull(.data$visit) %>%
as.vector()
# prep data for NIMBLE
nimble_data <- list()
# prep list for multiple species' data
Y <- array(NA, c(M, max(n_visits), K))
## responses
for (i in 1:K) {
Y[,,i] <- data %>%
dplyr::select(.data$site, .data$visit, .data[[species.names[i]]]) %>%
dplyr::group_by(.data$site) %>%
tidyr::pivot_wider(names_from = "visit",
values_from = species.names[i]) %>%
dplyr::ungroup() %>%
dplyr::select(`1`:ncol(.)) %>%
as.matrix()
}
nimble_data$Y <- Y
## occupancy level
occupancy_covariates <- list()
for (i in 1:ncol(occupancy_mod_matrix)) {
occupancy_covariates[[colnames(occupancy_mod_matrix)[i]]] <- unname(occupancy_mod_matrix[,i])
}
nimble_data$occupancy_covariates <- occupancy_covariates
## detection level
detection_covariates <- list()
for (i in 1:ncol(detection_mod_matrix)) {
detection_covariates[[colnames(detection_mod_matrix)[i]]] <- data %>%
dplyr::select(.data$site, .data$visit) %>%
dplyr::mutate(tmp = detection_mod_matrix[,i]) %>%
dplyr::group_by(.data$site) %>%
tidyr::pivot_wider(names_from = "visit", values_from = "tmp") %>%
dplyr::ungroup() %>%
dplyr::select(`1`:ncol(.)) %>%
as.matrix()
}
nimble_data$detection_covariates <- detection_covariates
## nvisits
nimble_data$nvisits <- n_visits
###### reorganize sites and visits into observations to loop through ########
obs <- 1:(sum(n_visits))
nimble_data$obs <- obs
# NIMBLE code
## priors
priors <- paste0(
"# priors\n",
"for(i in 1:", K, "){ \n",
"for(j in 1:", ncol(occupancy_mod_matrix), "){beta_psi[i,j] ~ ", beta_prior, "}",
"\n",
"for(j in 1:", ncol(detection_mod_matrix), "){beta_lambda[i,j] ~ ", beta_prior, "}",
"}",
"\n"
## should theta be a provided matrix or have a prior??
)
#--------------------------------------------------------------
# likelihoods
#--------------------------------------------------------------
likelihood_a <- paste0(
"# likelihood \n",
"# loop through species and sites \n",
"for(i in 1:",K,"){ \n",
"for(j in 1:",M,"){ \n",
" # calculate psi \n")
if (ncol(occupancy_mod_matrix) > 1) {
likelihood_b <- paste0(
" logit(psi[i,j]) <- beta_psi[i, 1] +\n",
paste0(sapply(2:ncol(occupancy_mod_matrix), function(x){
paste0(" beta_psi[i,", x, "] * ", colnames(occupancy_mod_matrix)[x], "[j]")
}), collapse = " + \n")
)
} else{
likelihood_b <- paste0(
" logit(psi[i,j]) <- beta_psi[i,1]"
)
}
likelihood_c <- paste0(
"\n\n",
" # latent occupancy state \n",
" z[i,j] ~ dbinom(psi[i,j], 1) \n\n",
" # loop through visits \n",
" for(k in 1:nvisits[j]){\n",
" # calculate lambda\n"
)
if (ncol(detection_mod_matrix) > 1) {
likelihood_d <- paste0(
" logit(lambda[i,j,k]) <- beta_lambda[i,1] +\n",
paste0(sapply(2:ncol(detection_mod_matrix), function(x){
paste0(" beta_lambda[i,", x, "] * ", colnames(detection_mod_matrix)[x], "[j,k]")
}), collapse = " + \n")
)
} else{
likelihood_d <- paste0(
" logit(p[i,j,k]) <- beta_lambda[i,1]"
)
}
likelihood_e <- paste0(
"\n\n",
" # response model\n",
" y.true[j,k, i] ~ dpois(lambda[i,j,k]*z[i,j])",
"\n }",
"\n} \n",
"y.true.obs[1:",obs, ", i] <- y.true[1:", M, ", 1:", n_visits, ", i] \n",
"y.counts[i] <- sum(y.true[1:M,1:max_visits, i])",
"\n}"
)
likelihood_f <- paste0(
"\n\n",
" #classification observation model\n",
"for (k in 1:",K, ") {\n",
"for (obs in 1:",obs, ") { \n",
"C.obs[obs,1:",K," ,k] ~ dmulti(theta[k, 1:",K, "], y.true.obs[obs, k])\n",
"\n}",
"\n}"
)
likelihood <- paste0(likelihood_a, likelihood_b, likelihood_c, likelihood_d, likelihood_e, likelihood_f)
code <- paste0(priors, "\n", likelihood)
# initialization
mod_inits <- function(x){
tmp <- x
list(
z = t(apply(nimble_data$Y[, ,], c(1, 3),
function(x) ifelse(sum(x, na.rm = T) == 0, 0, 1))),
beta_psi = matrix(stats::rnorm(K * ncol(occupancy_mod_matrix), 0, sqrt(2)),
nrow = K),
beta_lambda = matrix(stats::rnorm(K * ncol(detection_mod_matrix), 0, sqrt(2)),
nrow = K),
y.counts = apply(nimble_data$Y[, , ], c(3),
sum, na.rm = T)
)
}
# model
data_ <- c(nimble_data$occupancy_covariates, nimble_data$detection_covariates)
data_ <- data_[-c(which(names(data_) == "(Intercept)"))]
data_[["M"]] <- nrow(nimble_data$Y)
data_[["K"]] <- dim(nimble_data$Y)[3]
data_[["nvisits"]] <- nimble_data$nvisits
data_[["max_visits"]] <- max(nimble_data$nvisits)
data_[["Y"]] <- nimble_data$Y
# write model
if (save_model) message("Writing NIMBLE model to current working directory:")
cat(code)
fileConn <- file(paste0(model_name, ".txt"))
writeLines(code, fileConn)
close(fileConn)
message("\nBuilding R model")
model <- nimble::readBUGSmodel(
model = paste0(model_name, ".txt"),
data = data_,
inits = mod_inits(1)
)
if (!save_model) file.remove(paste0(model_name, ".txt"))
# fit model
message("\nCompiling R model to C")
model_c <- nimble::compileNimble(model)
message("\nBuilding R MCMC object")
mcmcinfo <- utils::capture.output(mcmc <- nimble::buildMCMC(model_c,
monitors = c("z",
"beta_psi",
"beta_lambda")))
message("\nCompiling R MCMC object to C")
mcmc_c <- nimble::compileNimble(mcmc)
message("\nRunning MCMC")
inits <- lapply(1:nchains, function(x) mod_inits(x))
samples <- nimble::runMCMC(mcmc_c, niter = niter, inits = inits, nchains = nchains)
# compute posterior likelihood
# compute posterior likelihood
message("\nComputing posterior likelihood")
multispecies_loglik <- list()
for (k in 1:K) {
multispecies_loglik[[species.names[k]]] <- list()
for (i in 1:nchains) {
samples_ <- samples[[i]]
beta_psi <- samples_[,which(colnames(samples_) %in% paste0('beta_psi[',k, ", ", 1:ncol(occupancy_mod_matrix), ']'))]
beta_lambda <- samples_[,which(colnames(samples_) %in% paste0('beta_lambda[',k, ", ", 1:ncol(detection_mod_matrix), ']'))]
z <- samples_[,which(colnames(samples_) %in% paste0('z[',k, ", ", 1:nrow(data_[["Y"]]), ']'))]
z_ <- z[,rep(1:ncol(z),data_[["nvisits"]])]
y_ <- matrix(rep(data[, species.names[k]], nrow(samples_)), nrow = nrow(samples_), byrow = TRUE)
psi_logit <- exp(beta_psi %*% t(occupancy_mod_matrix))
psi <- psi_logit / (1 + psi_logit)
lambda_logit <- exp(beta_lambda %*% t(detection_mod_matrix))
lambda <- lambda_logit / (1 + lambda_logit)
# log pointwise predictive density
site <- psi^z * (1 - psi)^(1 - z)
visit <- ((z_*lambda)^y_ * exp^(-z_ * lambda))/(factorial(y_))
ndx_upr <- cumsum(data_[["nvisits"]])
ndx_lwr <- ndx_upr - data_[["nvisits"]] + 1
tmp <- sapply(1:ncol(site), function(x){ apply(visit[,ndx_lwr[x]:ndx_upr[x]], 1, prod)})
multispecies_loglik[[species.names[k]]][[i]] <- log(site * tmp)
colnames( multispecies_loglik[[species.names[k]]][[i]]) <- paste0("site", 1:nrow(data_[[species.names[k]]]))
}
}
names( multispecies_loglik[[species.names[k]]]) <- paste0("chain", 1:nchains)
message("\nDone.\n")
# compute fitted values
message("\nComputing fitted values.\n")
psi_mcmc <- list()
lambda_mcmc <- list()
for (i in 1:nchains) {
# psi
betas <- samples[[i]][,grepl("_psi[[]", colnames(samples[[i]]))]
tmp <- exp(betas %*% t(occupancy_mod_matrix))
psi <- tmp / (1 + tmp)
colnames(psi) <- paste0("site", 1:ncol(psi))
psi_mcmc[[paste0("chain", i)]] <- psi
# detection
betas <- samples[[i]][,grepl("_lambda[[]", colnames(samples[[i]]))]
lambda_mcmc_chain <- array(0, dim = c(
nrow(psi), nrow(detection_covariates[[1]]), ncol(detection_covariates[[2]])
))
for (iter in 1:nrow(psi)) {
beta_iter <- betas[iter,]
tmp <- sapply(1:length(detection_covariates), function(x) beta_iter[x] * detection_covariates[[x]], simplify = "array")
tmp2 <- exp(apply(tmp, 2, rowSums))
lambda_mcmc_chain[iter,,] <- tmp2 / (1 + tmp2)
dimnames(lambda_mcmc_chain)[[2]] <- paste0("site", 1:(dim(lambda_mcmc_chain)[2]))
dimnames(lambda_mcmc_chain)[[3]] <- paste0("visit", 1:(dim(lambda_mcmc_chain)[3]))
}
lambda_mcmc[[paste0("chain", i)]] <- lambda_mcmc_chain
}
cat("\nDone.\n")
# return
out <- list(samples = samples, loglik = loglik,
derived = list(psi = psi_mcmc,
lambda = lambda_mcmc))
# attributes
class(out) <- c("MCMocc_mod", "list")
attr(out, "code") <- code
attr(out, "mcmcinfo") <- mcmcinfo
attr(out, "occupancy_call") <- occupancy
attr(out, "detection_call") <- detection
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.