.onAttach <- function(libname, pkgname) {
packageStartupMessage("This package relies on R INLA, you should install it manually by running: \n\n\tinstall.packages(\"INLA\", repos=c(getOption(\"repos\"), INLA=\"https://inla.r-inla-download.org/R/stable\"), dep=TRUE) \n\nFull instructions here:\n\n\thttp://www.r-inla.org/download \n\n")
}
# .onLoad <- function(libname, pkgname) {
# tryCatch(library(INLA), error = function(e) {
# # packageStartupMessage("Package INLA not found, attempting install")
# install.packages("INLA", repos=c(getOption("repos"), INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE)
# })
# }
#' @importFrom utils flush.console write.table install.packages
#' @importFrom grDevices dev.off pdf
#' @importFrom graphics abline points
#' @importFrom stats as.formula cor formula lm plogis sd
#### FUNCTION #1
#' @title Make a mesh, build an spde model, and a projector matrix
#' @description Construct a nonconvex boundary for a set of points. Create a triangle mesh based on initial point locations, specified or automatic boundaries, and mesh quality parameters. Construct observation/prediction weight matrices for models. Create an inla.spde2 model object for a Matern model.
#' @param response A data frame containing the response data including number of positive cases (n_positive), number of individuals examined (examined), and point locations (longitude and latitude).
#' @param control Parameters controlling inla.nonconvex.hull and inla.mesh.2d, including convex, concave, max_edge, and cutoff.
#' @param plot_mesh A logical argument indicating if mesh is to be plotted, Default: FALSE.
#' @return OUTPUT_DESCRIPTION
#' @examples
#' \dontrun{
#' if(interactive()){
#' #EXAMPLE1
#' }
#' }
#' @export
makeMeshSPDE <- function(response, control = list(convex = NULL, concave = NULL, max_edge = NULL, cutoff = NULL), plot_mesh = FALSE) {
coords = cbind(response$longitude, response$latitude)
boundary = INLA::inla.nonconvex.hull(points = coords, convex = control$convex, concave = control$concave)
# Make the mesh
mesh = INLA::inla.mesh.2d(loc = coords, boundary = boundary, max.edge = control$max_edge, cutoff = control$cutoff)
# Mapping between meshes and continuous space
A = INLA::inla.spde.make.A(mesh = mesh, loc = coords)
# SPDE model construction
spde = INLA::inla.spde2.matern(mesh, alpha = 2)
if(plot_mesh) {
plot(mesh)
points(coords, pch = 20, col = "red", cex = 0.2)
}
return(list(spde = spde, A_mat = A, n_points = mesh$n, mesh = mesh))
}
#### FUNCTION #2
#' @title Calculate variance inflation factor (VIF) and remove variables with VIF > threshold value
#' @description The function checks for collinearity between variables and performs stepwise VIF selection. Source: https://beckmw.wordpress.com/2013/02/05/collinearity-and-stepwise-vif-selection/.
#' @author Marcus W. Beck
#' @param response A data frame containing the response data including number of positive cases (n_positive), number of individuals examined (examined), and point locations (longitude and latitude).
#' @param raster_stack A collection of RasterLayer objects with the same spatial extent and resolution (These are the environmental covariates).
#' @param thresh The threshold value used for retaining variables, Default: 10.
#' @param trace A logical argument indicating if text output is returned as the stepwise selection progresses, Default: T.
#' @param ... Additional arguments passed to `lm'
#' @return OUTPUT_DESCRIPTION
#' @details DETAILS
#' @examples
#' \dontrun{
#' if(interactive()){
#' #EXAMPLE1
#' }
#' }
#' @export
calculateVIFandRemoveVariables <- function(response, raster_stack, thresh = 10, trace = T,...){
coords = cbind(response$longitude, response$latitude)
covariate_DF = data.frame(raster::extract(raster_stack, coords))
if(class(covariate_DF) != 'data.frame') covariate_DF <- data.frame(covariate_DF)
# get initial vif value for all comparisons of variables
vif_init <- NULL
var_names <- names(covariate_DF)
for(val in var_names){
regressors <- var_names[-which(var_names == val)]
form <- paste(regressors, collapse = '+')
form_in <- formula(paste(val, '~', form))
vif_init <- rbind(vif_init, c(val, 1/(1 - summary(lm(form_in, data = covariate_DF, ...))$r.squared)))
}
vif_max <- max(as.numeric(vif_init[ ,2]))
if(vif_max < thresh){
if(trace == T){ # print output of each iteration
prmatrix(vif_init, collab = c('var', 'vif'), rowlab=rep('', nrow(vif_init)), quote = F)
cat('\n')
cat(paste('All variables have VIF < ', thresh, ', max VIF ', round(vif_max,2), sep = ''), '\n\n')
}
return(var_names)
}
else{
in_dat <- covariate_DF
# backwards selection of explanatory variables, stops when all VIF values are below 'thresh'
while(vif_max >= thresh){
vif_vals <- NULL
var_names <- names(in_dat)
for(val in var_names){
regressors <- var_names[-which(var_names == val)]
form <- paste(regressors, collapse = '+')
form_in <- formula(paste(val, '~', form))
vif_add <- 1/(1 - summary(lm(form_in, data = in_dat, ...))$r.squared)
vif_vals <- rbind(vif_vals, c(val, vif_add))
}
max_row <- which(vif_vals[ , 2] == max(as.numeric(vif_vals[ , 2])))[1]
vif_max <- as.numeric(vif_vals[max_row, 2])
if(vif_max < thresh) break
if(trace == T){ # print output of each iteration
prmatrix(vif_vals, collab = c('var', 'vif'), rowlab = rep('', nrow(vif_vals)), quote = F)
cat('\n')
cat('removed: ', vif_vals[max_row,1], vif_max,'\n\n')
flush.console()
}
in_dat <- in_dat[ , !names(in_dat) %in% vif_vals[max_row, 1]]
}
return(names(in_dat))
}
}
#### FUNCTION #3
#' @title Find the best spatial model, i.e. the model with the smallest DIC
#' @description The function performs forward and backward elimination of variables in order to find the spatial model with a set of covariates that result in the smallest DIC.
#' @param response A data frame containing the response data including number of positive cases (n_positive), number of individuals examined (examined), and point locations (longitude and latitude).
#' @param raster_stack A collection of RasterLayer objects with the same spatial extent and resolution.
#' @param A_mat An observation/prediction weight matrix returned from makeMeshSPDE.
#' @param spde An inla.spde2 model object for a Matern model returned from makeMeshSPDE.
#' @param family A string indicating the likelihood family, Default: `binomial'.
#' @param save_output A logical argument indicating if output to be saved to a csv file, Default: TRUE.
#' @return OUTPUT_DESCRIPTION
#' @details DETAILS
#' @examples
#' \dontrun{
#' if(interactive()){
#' #EXAMPLE1
#' }
#' }
#' @export
findModelWithSmallestDIC <- function(response, raster_stack, A_mat, spde, family = "binomial", save_output = TRUE){
coords = cbind(response$longitude, response$latitude)
covariate_DF = data.frame(raster::extract(raster_stack, coords))
keep = calculateVIFandRemoveVariables(response, raster_stack, thresh = 10)
covars_keep = covariate_DF[ , keep]
# Stacking data
data.stk = INLA::inla.stack(data = list(n_positive = as.vector(response$n_positive),
examined = as.vector(response$examined)),
A = list(A_mat, 1),
effects=list(field = 1:spde$n.spde,
list(intercept = rep(1, nrow(response)),
covariate = covars_keep)))
# Model fitting
options(useFancyQuotes = FALSE)
formula = as.formula(paste("n_positive ~ -1 + intercept + f(field, model = spde) +", paste(names(covars_keep), collapse="+")))
result = INLA::inla(formula, family = family,
Ntrials = INLA::inla.stack.data(data.stk)$examined,
data = INLA::inla.stack.data(data.stk), verbose=FALSE,
control.predictor = list(compute=TRUE,
A = INLA::inla.stack.A(data.stk)),
control.fixed = list(expand.factor.strategy = 'inla'),
control.compute = list(dic = TRUE, waic = TRUE, config = TRUE),
control.inla = list(int.strategy = "eb"))
# Assess the importance of the variable one at a time
nm = noquote(names(covars_keep))
DIC = rep(NA, length(nm)+1)
DIC[1] = result$dic$dic
nmFinal <- nm
for (i in 1:length(nm)) {
j = nm[i]
covars = nmFinal[-which(nmFinal == j)]
formula = as.formula(paste("n_positive ~ -1 + intercept + f(field, model = spde) +", paste(covars, collapse="+")))
output = INLA::inla(formula, family = family,
Ntrials = INLA::inla.stack.data(data.stk)$examined,
data = INLA::inla.stack.data(data.stk), verbose = FALSE,
control.predictor = list(compute = TRUE,
A = INLA::inla.stack.A(data.stk)),
control.fixed = list(expand.factor.strategy = 'inla'),
control.compute = list(dic = TRUE, waic = TRUE, config = TRUE),
control.inla = list(int.strategy = "eb"))
if (output$dic$dic < min(DIC[1:i])){
nmFinal = covars}
DIC[i+1] = output$dic$dic
}
# Add variables back to the model one at a time
outnm = nm[which(!nm %in% nmFinal)]
if (length(outnm) > 0){
nmFinal = nmFinal
DIC2 = rep(NA, length(outnm)+1)
DIC2[1] = min(DIC)
for (i in 1:length(outnm)) {
j = outnm[i]
covars = c(nmFinal, j)
formula = as.formula(paste("n_positive ~ -1 + intercept + f(field, model = spde) +", paste(covars, collapse="+")))
output = INLA::inla(formula, family = family,
Ntrials = INLA::inla.stack.data(data.stk)$examined,
data = INLA::inla.stack.data(data.stk), verbose=FALSE,
control.predictor = list(compute = TRUE,
A = INLA::inla.stack.A(data.stk)),
control.fixed = list(expand.factor.strategy = 'inla'),
control.compute = list(dic = TRUE, waic = TRUE, config = TRUE),
control.inla = list(int.strategy = "eb"))
if (output$dic$dic < min(DIC2[1:i])){
nmFinal = covars}
DIC2[i+1] = output$dic$dic
}}
# nmFinal is the final set of cavariates with the smallest DIC
covars = nmFinal
formula = as.formula(paste("n_positive ~ -1 + intercept + f(field, model = spde) +", paste(covars, collapse="+")))
output = INLA::inla(formula, family = family,
Ntrials = INLA::inla.stack.data(data.stk)$examined,
data = INLA::inla.stack.data(data.stk), verbose = FALSE,
control.predictor = list(compute = TRUE,
A = INLA::inla.stack.A(data.stk)),
control.fixed = list(expand.factor.strategy='inla'),
control.compute = list(dic = TRUE, waic = TRUE, config = TRUE),
control.inla = list(int.strategy = "eb"))
if(save_output) {
write.table(output$summary.fixed, "Fixed_effects.txt", row.names = TRUE, col.names = TRUE)
}
return(output$summary.fixed)
}
#### FUNCTION #4
#' @title Prediction of the response on a grid/raster
#' @description The function predicts the response on target locations where data are not observed using posterior distributions.
#' @param response A data frame containing the response data including number of positive cases (n_positive), number of individuals examined (examined), and point locations (longitude and latitude).
#' @param finalmodel An object returned from the function findModelWithSmallestDIC.
#' @param A_mat An observation/prediction weight matrix returned from makeMeshSPDE.
#' @param spde An inla.spde2 model object for a Matern model returned from makeMeshSPDE,
#' @param mesh A triangle mesh created based on initial point locations and returned from makeMeshSPDE.
#' @param family A string indicating the likelihood family, Default: `binomial'.
#' @param raster_stack A collection of RasterLayer objects with the same spatial extent and resolution.
#' @param nsamp Number of samples to draw from an approximated posterior of a fitted model. Make nsamp >= 100 in order to compute mean, sd, IQR, and 95\% CI of posterior samples.
#' @param int.strategy Character. The integration strategy to use; one of `auto', `ccd', `grid', `eb' (empirical bayes), `user' or `user.std'.
#' @param write_posterior A logical argument indicating if posterior realizations are to be written into raster files, Default: TRUE.
#' @return OUTPUT_DESCRIPTION
#' @details DETAILS
#' @examples
#' \dontrun{
#' if(interactive()){
#' #EXAMPLE1
#' }
#' }
#' @export
predictionOnAGrid <- function(response, finalmodel, A_mat, spde, mesh, family = "binomial", raster_stack, nsamp, int.strategy, write_posterior = TRUE){
covar_names = rownames(finalmodel)[-1]
coords = cbind(response$longitude, response$latitude)
covariate_DF = data.frame(raster::extract(raster_stack, coords))
covars_final = subset(covariate_DF, select = as.vector(covar_names))
# Stacking data
data.stk = INLA::inla.stack(data = list(n_positive = as.vector(response$n_positive),
examined = as.vector(response$examined)),
A = list(A_mat, 1),
effects=list(field = 1:spde$n.spde,
list(intercept = rep(1, nrow(response)),
covariate = covars_final)))
options(useFancyQuotes = FALSE)
formula = as.formula(paste("n_positive ~ -1 + intercept + f(field, model=spde) +", paste(names(covars_final), collapse="+")))
result = INLA::inla(formula, family = family,
Ntrials = INLA::inla.stack.data(data.stk)$examined,
data = INLA::inla.stack.data(data.stk), verbose = F,
control.predictor = list(compute = TRUE,
A = INLA::inla.stack.A(data.stk)),
control.fixed = list(expand.factor.strategy = 'inla'),
control.compute = list(dic = TRUE, waic = TRUE, config = TRUE),
control.inla = list(int.strategy = int.strategy))
# Locations for predictions
mask <- raster_stack[[1]]
NAvalue(mask) <- -9999
pred_val <- raster::getValues(mask)
w <- is.na(pred_val)
index <- 1:length(w)
index <- index[!w]
pred_locs <- raster::xyFromCell(mask, 1:ncell(mask))
pred_locs <- pred_locs[!w, ]
colnames(pred_locs) <- c('longitude', 'latitude')
locs_pred <- pred_locs
A.pred <- INLA::inla.spde.make.A(mesh = mesh, loc = locs_pred)
covariates_pred = data.frame(raster::extract(raster_stack, locs_pred))
covar_pred = subset(covariates_pred, select = as.vector(covar_names))
## Draw from posterior samples
nsamp = nsamp
samp = INLA::inla.posterior.sample(nsamp, result)
k = dim(covar_pred)[2] # number of final covariates
test = raster_stack[[1]]
pred = matrix(NA, nrow = dim(A.pred)[1], ncol = 100)
for (i in 1:nsamp){
field = samp[[i]]$latent[grep('field', rownames(samp[[i]]$latent)), ]
intercept = samp[[i]]$latent[grep('intercept', rownames(samp[[i]]$latent)), ]
beta = NULL
for (j in 1:k){
beta[j] = samp[[i]]$latent[grep(names(covar_pred)[j], rownames(samp[[i]]$latent)), ]
}
lp = as.matrix(intercept + c(as.matrix(covar_pred)%*%beta) + drop(A.pred%*%field))
# Predicted values
if(i <= 100) {
pred[,i] = plogis(lp)
}
pred_prev = plogis(lp)
## Write the posterior surface into a raster file
if(write_posterior) {
pred_val[!w] <- round(pred_prev,3)
out_mean = raster::setValues(test, pred_val)
out_mean = raster::writeRaster(out_mean, paste0("Prevalence_posterior_realization_", i, ".tif"), overwrite = TRUE)
}
}
## Use 100 posterior samples and compute mean, median, CI, IQR etc.
pred_mean = rowMeans(pred, na.rm = T)
pred_sd = apply(pred, 1, sd, na.rm = T)
pred_quants <- apply(pred, 1, quantile, probs = c(0.025, 0.25, 0.75, 0.975), na.rm = T)
pred_IQR = pred_quants[3,] - pred_quants[2,]
pred_95CI = pred_quants[4,] - pred_quants[1,]
# Plot prevalence surfaces
pdf("Prevalence_maps.pdf")
# Write the output into a raster file
pred_val[!w] <- round(pred_mean, 3)
out_mean = raster::setValues(test, pred_val)
out_mean = raster::writeRaster(out_mean, "Prevalence_mean.tif", overwrite = TRUE)
plot(out_mean, main = "Prevalence mean")
pred_val[!w] <- round(pred_sd, 3)
out_sd = raster::setValues(test, pred_val)
out_sd = raster::writeRaster(out_sd, "Prevalence_sd.tif", overwrite = TRUE)
plot(out_sd, main = "Prevalence sd")
pred_val[!w] <- round(pred_95CI, 3)
out_CI = raster::setValues(test, pred_val)
out_CI = raster::writeRaster(out_CI, "Prevalence_95CI.tif", overwrite = TRUE)
plot(out_CI, main = "Prevalence 95%_CI")
pred_val[!w] <- round(pred_IQR, 3)
out_IQR = raster::setValues(test, pred_val)
out_IQR = raster::writeRaster(out_IQR, "Prevalence_IQR.tif", overwrite = TRUE)
plot(out_IQR, main = "Prevalence interquartile range")
pred_val[!w] <- round(pred_quants[4,], 3)
out_975 = raster::setValues(test, pred_val)
out_975 = raster::writeRaster(out_975, "Prevalence_upperbound_975.tif", overwrite = TRUE)
plot(out_975, main = "Prevalence (upper bound)")
pred_val[!w] <- round(pred_quants[1,], 3)
out_025 = raster::setValues(test, pred_val)
out_025 = raster::writeRaster(out_025, "Prevalence_lowerbound_025.tif", overwrite = TRUE)
plot(out_025, main = "Prevalence (lower bound)")
dev.off()
# Validation plot
pdf("In_sample_fit.pdf")
PR = response$n_positive / response$examined
coords = cbind(response$longitude, response$latitude)
pred = raster::extract(out_mean, coords)
PR = PR[which(is.na(pred) == FALSE)]
pred = pred[which(is.na(pred) == FALSE)]
plot(PR, pred, main=paste("Cor = ", round(cor(PR, pred),2)), xlab = "Observed", ylab = "Predicted", cex = 0.8, mgp = c(2,1,0), xlim = c(0,1), ylim = c(0,1))
abline(0, 1, col = "red")
dev.off()
return(invisible())
}
#### FUNCTION #5
#' @title Fit the final spatial model using a subset of data
#' @description Fit the final spatial model using a subset of data for the purpose of cross-validation.
#' @param response A data frame containing the response data including number of positive cases (n_positive), number of individuals examined (examined), and point locations (longitude and latitude).
#' @param val_DAT Subset of data for cross-validation.
#' @param finalmodel An object returned from the function findModelWithSmallestDIC.
#' @param A_mat An observation/prediction weight matrix returned from makeMeshSPDE.
#' @param spde An inla.spde2 model object for a Matern model returned from makeMeshSPDE.
#' @param family A string indicating the likelihood family, Default: `binomial'.
#' @param raster_stack A collection of RasterLayer objects with the same spatial extent and resolution.
#' @param int.strategy Character. The integration strategy to use; one of `auto', `ccd', `grid', `eb' (empirical bayes), `user' or `user.std'.
#' @return OUTPUT_DESCRIPTION
#' @details DETAILS
#' @examples
#' \dontrun{
#' if(interactive()){
#' #EXAMPLE1
#' }
#' }
#' @export
#' @rdname fitFinalModel
#'
#' @import raster
fitFinalModel = function(response, val_DAT, finalmodel, A_mat, spde, family = "binomial", raster_stack, int.strategy){
aa = which(is.na(val_DAT$n_positive) == TRUE)
covar_names = rownames(finalmodel)[-1]
coords = cbind(val_DAT$longitude, val_DAT$latitude)
covariate_DF = data.frame(raster::extract(raster_stack, coords))
covars_final = subset(covariate_DF, select = as.vector(covar_names))
# Stacking data
data.stk = INLA::inla.stack(data = list(n_positive = as.vector(val_DAT$n_positive),
examined = as.vector(val_DAT$examined)),
A = list(A_mat, 1),
effects=list(field = 1:spde$n.spde,
list(intercept = rep(1, nrow(val_DAT)),
covariate = covars_final)))
options(useFancyQuotes = FALSE)
formula = as.formula(paste("n_positive ~ -1 + intercept + f(field, model=spde) +", paste(names(covars_final), collapse="+")))
result = INLA::inla(formula, family = family,
Ntrials = INLA::inla.stack.data(data.stk)$examined,
data = INLA::inla.stack.data(data.stk), verbose = F,
control.predictor = list(link=1, compute = TRUE, A = INLA::inla.stack.A(data.stk)),
control.fixed = list(expand.factor.strategy = 'inla'),
control.compute = list(dic = TRUE, waic = TRUE, config = TRUE),
control.inla = list(int.strategy = int.strategy))
fitted = result$summary.fitted.values$mean[1:nrow(val_DAT)]
obs = response$n_positive / response$examined
out = c(round(cor(fitted, obs), 4), round(cor(fitted, obs)^2, 4))
return(out)
}
#### FUNCTION #6
#' @title Performs cross-validation
#' @description Performs cross-validation for a subset of data to assess predictive performance of the final model.
#' @param response A data frame containing the response data including number of positive cases (n_positive), number of individuals examined (examined), and point locations (longitude and latitude).
#' @param finalmodel An object returned from the function findModelWithSmallestDIC.
#' @param A_mat An observation/prediction weight matrix returned from makeMeshSPDE.
#' @param spde An inla.spde2 model object for a Matern model returned from makeMeshSPDE.
#' @param family A string indicating the likelihood family, Default: `binomial'.
#' @param raster_stack A collection of RasterLayer objects with the same spatial extent and resolution.
#' @param int.strategy Character. The integration strategy to use; one of `auto', `ccd', `grid', `eb' (empirical bayes), `user' or `user.std'.
#' @param n_reps Number of replicates of subsets of data for cross-validation, Default: `100'.
#' @param pct_out Percentage of data to be used as a test set.
#' @return OUTPUT_DESCRIPTION
#' @details DETAILS
#' @examples
#' \dontrun{
#' if(interactive()){
#' #EXAMPLE1
#' }
#' }
#' @export
crossValidation <- function(response, finalmodel, A_mat, spde, family = "binomial", raster_stack, int.strategy, n_reps = 100, pct_out){
i = 1:n_reps
cor_mat <- parallel::mclapply(X = i, FUN = function(...) {
requireNamespace("INLA")
requireNamespace("raster")
aa = sample(1:nrow(response), size = floor(nrow(response)*pct_out), replace = FALSE)
DAT = response
DAT$n_positive[aa] = NA
fitFinalModel(response = response, val_DAT = DAT, finalmodel = finalmodel, A_mat = A_mat, spde = spde, family = family, raster_stack = raster_stack, int.strategy = int.strategy)
}, mc.cores = ifelse(tolower(Sys.info()[["sysname"]] == "windows"), 1, parallel::detectCores()))
cor_mat <- t(as.data.frame(cor_mat))
colnames(cor_mat) = c("Correlation mean", "Cross-validated R-squared")
rownames(cor_mat) <- NULL
return(cor_mat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.