#' SpatialModelEstimator
#'
#' @details
#' Return Estimates and objects from a model based GMRF approach
#'
#' @param spatial_df data.frame
#' @param formula class(formula) e.g. formula = y ~ 1
#' @param mesh an inla.mesh object that has been created before this function is applied
#' @param extrapolation_grid data.frame we expect columns names "x", "y", "area"
#' @param family 0 = Poisson, 1 = Negative Binomial, 2 = Gaussian, 3 = Gamma
#' @param link link function 0 = log, 1 = logit, 2 = probit, 3 = inverse, 4 = identity
#' @param bias_correct <bool> whether bias correction is needed in deriving expectations and standard errors for
#' @param control list of settings for nlminb
#' @param convergence list of convergence setting to check
#' @param start_par_list list of inital values for optimisaton, if you have conversion issues can be useful to look at alternative starting values
#' @param trace_level 'none' don't print any information, 'low' print steps in the function 'medium' print gradients of TMB optimisation, 'high' print parameter candidates as well as gradients during oprimisation.
#' @export
#' @importFrom sp coordinates
#' @importFrom INLA inla.mesh.projector inla.spde2.matern inla.spde.make.A
#' @importFrom RANN nn2
#' @importFrom TMB MakeADFun
#' @importFrom stats model.matrix nlminb optimHess poisson rnorm sd terms var
#' @return: list of estimated objects and data objects
SpatialModelEstimator = function(spatial_df, formula, mesh, extrapolation_grid, family, link, bias_correct = F, control = list(eval.max = 10000, iter.max = 10000), convergence = list(grad_tol = 0.001), start_par_list = NULL, trace_level = "none") {
if(!trace_level %in% c("none", "low", "medium","high"))
stop(paste0("trace_level needs to be 'none', 'low', 'medium','high'"))
if(class(spatial_df) != "SpatialPointsDataFrame")
stop(paste0("spatial_df needs to be of class SpatialPointsDataFrame, see the example for more information"))
if(class(mesh) != "inla.mesh")
stop(paste0("mesh needs to be of class inla.mesh, see the example for more information"))
if(!family %in% c(0:3))
stop(paste0("family needs to be a value from 0 to 3, for a valid distribution"))
if(!link %in% c(0:4))
stop(paste0("link needs to be a value from 0 to 4, for a valid link function"))
if(class(formula) != "formula")
stop(paste0("formula not of class formula, often due to wrapping as string, i.e. having quotation marks."))
# get response variable
repsons_var_label = all.vars(formula)[attr(terms(formula), "response")]
if(!all(c("x","y","area") %in% colnames(extrapolation_grid)))
stop(paste0("extrapolation_grid: needs colnames 'x', 'y', 'area'"))
if(!"area" %in% colnames(spatial_df@data))
stop(paste0("spatial_df@data: needs colnames 'area'"))
if(!repsons_var_label %in% colnames(extrapolation_grid))
stop(paste0("extrapolation_grid: needs colname ", repsons_var_label, ", this is a dummy variable just for creating extrapolation matrix."))
## map mesh to observations
A = inla.spde.make.A(mesh, loc = cbind(coordinates(spatial_df[, 1]), coordinates(spatial_df[, 2])))
## map mesh to extrapolation grid
Proj <- inla.mesh.projector(mesh, loc = cbind(extrapolation_grid$x, extrapolation_grid$y))
P = Proj$proj$A
## Create Sparse Matern objects to pass to TMB
spde = inla.spde2.matern(mesh, alpha = 2)
## model formula
model_matrix = model.matrix(object = formula, data = spatial_df@data)
## will need to create a dummy variable for the response variable in the extrapolation grid.
proj_model_matrix = model.matrix(object = formula, data = extrapolation_grid)
## Index prediction sampled locations, these wont be included as estimates in our variance caluclations these squares = constants
proj_indicator = rep(1,nrow(extrapolation_grid))
proj_ndx = nn2(query = coordinates(spatial_df), data = cbind(extrapolation_grid$x, extrapolation_grid$y), k = 1)
proj_indicator[proj_ndx$nn.idx] = 0
## create TMB data object
data_tmb <- list(y = spatial_df@data[, repsons_var_label],
area = spatial_df@data$area,
spde = spde$param.inla[c("M0","M1","M2")],
Proj = P,
A = A,
model_matrix = model_matrix,
family = family,
link = link,
proj_area = extrapolation_grid$area,
proj_model_matrix = proj_model_matrix,
pred_indicator = proj_indicator,
simulate_state = 0
)
## create TMB param object
range_domain = diff(range(coordinates(spatial_df)))
params_tmb = list()
if(is.null(start_par_list)) {
params_tmb$omega = rep(0, spde$n.spde)
params_tmb$betas = c(mean(data_tmb$y / data_tmb$area), rep(0, ncol(model_matrix) - 1))
## get range of domain, based on mesh
params_tmb$ln_kappa = log(sqrt(8) / (range_domain * 0.25)) ## start by distance at which spatial autocorrelation (rho) = 0.1. based on 25% of domain
## variance terms half for phi, half for GMRF.
params_tmb$ln_tau = log(2) ## tay = precision inverse of marginal variance. starting value set to 0.1 of variation in
params_tmb$ln_phi = log(0.5 * sd(data_tmb$y / data_tmb$area))
if(family == 1) {
params_tmb$ln_tau = 0 ## phi -> 0 = Poission
} else if (family == 2) {
params_tmb$ln_phi = log(0.5 * sd(data_tmb$y / data_tmb$area))
} else if (family == 3) {
params_tmb$ln_phi = log(0.3) # coeffecient of variation
}
} else {
if(class(start_par_list) != "list")
stop(paste0("If you supply start values it needs to be of class 'list'"))
if(!all(names(start_par_list) %in% c("omega", "betas", "ln_kappa", "ln_tau", "ln_phi")))
stop(paste0("you supplied start_par_list, needs to have all these parameters 'omega', 'betas', 'ln_kappa', 'ln_tau', 'ln_phi'"))
params_tmb = start_par_list
}
## optimise - model with no GMRF
fixed_pars_no_gmrf = list(ln_kappa = as.factor(NA), ln_tau = as.factor(NA), omega = as.factor(rep(NA, spde$n.spde)))
if(!family %in% c(1, 2, 3))
fixed_pars_no_gmrf$ln_phi = as.factor(NA)
## Estiamte initial model without GMRF
data_tmb$model = "GMRFModel"
if(trace_level != "none")
print(paste0("set up data and parameters"))
obj_init = tryCatch (
expr <- TMB::MakeADFun(data = data_tmb, parameters = params_tmb, map = fixed_pars_no_gmrf, DLL = "systematicsurvey_TMBExports", silent = T),
error = function(e) {e}
)
if(inherits(obj_init, "error"))
stop(paste0("Failed to create obj_init, issue with MakeADFun() call error = ", obj_init$message))
opt_init = tryCatch (
expr = nlminb(obj_init$par, obj_init$fn, obj_init$gr, control = control),
error = function(e) {e}
)
if(inherits(opt_init, "error"))
stop(paste0("Failed to optimise opt_init, issue with nlminb() call. Error = ", opt_init$message))
if(trace_level != "none")
print(paste0("Finished optimisation of simple non-GMRF model"))
## check convergence
max_grad_this = max(abs(obj_init$gr(opt_init$par)));
grad_tol = 0.001 ## default value
if(!is.null(convergence))
grad_tol = convergence$grad_tol
## check hessian invertable and gradient above some satisfied
init_convergence = "There is no evidence that the model is not converged"
if (is.character(try(chol(optimHess(opt_init$par, fn = obj_init$fn, gr = obj_init$gr)), silent = TRUE)) | (max_grad_this > grad_tol))
init_convergence = "Failed conergence for non GMRF model";
## pull out estimates
sd_report_init = TMB::sdreport(obj = obj_init)
est_init = sd_report_init$value["fitted_non_sample_Total"]
sd_init = sd_report_init$sd[names(sd_report_init$value) == "fitted_non_sample_Total"]
est_ln_init = sd_report_init$value["log_fitted_non_sample_Total"]
sd_ln_init = sd_report_init$sd[names(sd_report_init$value) == "log_fitted_non_sample_Total"]
MLE_init_report = obj_init$report(obj_init$env$last.par.best)
params_tmb$betas = as.numeric(opt_init$par[names(opt_init$par) %in% "betas"])
if (any(names(opt_init$par) %in% "ln_phi"))
params_tmb[["ln_phi"]] = opt_init$par[names(opt_init$par) %in% "ln_phi"]
if(trace_level != "none")
print(paste0("Finished saving non-GMRF info."))
## Now do the GMRF model and estimate
obj_gmrf = NULL
if(family %in% c(1, 2, 3)) {
obj_gmrf = tryCatch(
expr <- TMB::MakeADFun(data = data_tmb, random = c("omega"), parameters = params_tmb , DLL = "systematicsurvey_TMBExports", silent = T),
error = function(e) {e}
)
} else {
obj_gmrf = tryCatch(
expr <- TMB::MakeADFun(data = data_tmb, random = c("omega"), parameters = params_tmb, map = list(ln_phi = as.factor(NA)), DLL = "systematicsurvey_TMBExports", silent = T),
error = function(e) {e}
)
}
if(trace_level %in% c("medium", "high"))
obj_gmrf$env$silent = FALSE
if(trace_level == "high")
obj_gmrf$env$tracepar = TRUE
if(inherits(obj_gmrf, "error"))
stop(paste0("Failed to create obj_gmrf, issue with MakeADFun() call error = ", obj_gmrf$message))
## Optimise
opt_gmrf = tryCatch(
expr = nlminb(obj_gmrf$par, obj_gmrf$fn, obj_gmrf$gr, control = control),
error = function(e) {e}
)
if(inherits(opt_gmrf, "error"))
stop(paste0("Failed to optimise opt_gmrf, issue with nlminb() call. Error = ", opt_gmrf$message))
## Check convergence
temp_pars = opt_gmrf$par
max_grad_this = max(abs(obj_gmrf$gr(temp_pars)));
## start building the return object
return_list = list(init_convergence = init_convergence, MLE_init_report = MLE_init_report)
return_list$start_vals = params_tmb;
return_list$sd_report_init = sd_report_init; ## no random effects so don't need to bias correct.
return_list$init_opt = opt_init
if (is.character(try(chol(optimHess(temp_pars, fn = obj_gmrf$fn, gr = obj_gmrf$gr)), silent = TRUE)) | (max_grad_this > grad_tol)) {
## try a different kappa
temp_pars["ln_kappa"] = log(sqrt(8) / (range_domain * 0.1)) ## 10% of domain range
temp_pars["ln_tau"] = log(10)
if(trace_level != "none")
print(paste0("Failed first optimisation, trying different starting values betas = ", paste0(temp_pars["betas"], collapse = ",")," for kappa = ", exp(temp_pars["ln_kappa"] ),", tau = ", exp(temp_pars["ln_tau"]) ," for GF"))
temp_opt = tryCatch(
expr = nlminb(temp_pars, obj_gmrf$fn, obj_gmrf$gr, control = control),
error = function(e) {e}
)
if(trace_level != "none")
print(paste0("fixed effect MLE pars = ", paste0(temp_opt$par, collapse = ", ")))
temp_pars <- temp_opt$par
opt_gmrf$iterations = opt_gmrf$iterations + temp_opt$iterations
if (is.character(try(chol(optimHess(temp_pars, fn = obj_gmrf$fn, gr = obj_gmrf$gr)), silent = TRUE))) {
temp_pars["ln_kappa"] = log(sqrt(8) / (range_domain * 0.5)) ## 50% of domain range
temp_pars["ln_tau"] = log(5)
if(trace_level != "none")
print(paste0("Failed first optimisation, trying different starting values betas = ", paste0(temp_pars["betas"], collapse = ",")," for kappa = ", exp(temp_pars["ln_kappa"] ),", tau = ", exp(temp_pars["ln_tau"]) ," for GF"))
temp_opt = tryCatch(
expr = nlminb(temp_pars, obj_gmrf$fn, obj_gmrf$gr, control = control),
error = function(e) {e}
)
temp_pars <- temp_opt$par
if(trace_level != "none")
print(paste0("fixed effect MLE pars = ", paste0(temp_opt$par, collapse = ", ")))
opt_gmrf$iterations = opt_gmrf$iterations + temp_opt$iterations
}
}
if(trace_level != "none")
print(paste0("Finished GMRF optimisation fixed effect pars = ", paste0(temp_pars, collapse = ", ")))
## final convergence parameters
max_grad_this = max(abs(obj_gmrf$gr(temp_pars)));
hess = optimHess(temp_pars, fn = obj_gmrf$fn, gr = obj_gmrf$gr)
gmrf_convergence = ""
if (is.character(try(chol(hess), silent = TRUE))) {
print("Hessian is not positive definite, so standard errors are not available for GMRF use init model");
gmrf_convergence = "Hessian is not positive definite, so standard errors are not available. Look at eigen values from hession, to see problem params."
return_list$hess_eigen_vals = eigen(hess)
return_list$gmrf_failed_opt = opt_gmrf
} else {
if (max_grad_this > grad_tol) {
gmrf_convergence = paste0("Gradient of MLE params > grad_tol = '", grad_tol, "standard errors are probably useable, but this does indicate non convergence")
} else {
gmrf_convergence = "There is no evidence that the model is not converged"
}
gmrf_sd_report = NULL
if (bias_correct) {
gmrf_sd_report = TMB::sdreport(obj_gmrf, bias.correct= TRUE, bias.correct.control = list(vars_to_correct = c("fitted_non_sample_Total", "log_fitted_non_sample_Total"), sd = TRUE), getJointPrecision = T)
} else {
gmrf_sd_report = TMB::sdreport(obj_gmrf, getJointPrecision = T)
}
return_list$gmrf_sd_report = gmrf_sd_report
return_list$gmrf_max_grad = max_grad_this
return_list$MLE_gmrf = obj_gmrf$report(obj_gmrf$env$last.par.best)
return_list$MLE_par = obj_gmrf$env$last.par.best ## opt_gmrf won't have random effects, this will have bayes estiamtes for random effects.
return_list$gmrf_opt = opt_gmrf
}
if(trace_level != "none")
print(paste0("Finished saving GMRF information"))
return_list$gmrf_convergence = gmrf_convergence
return_list$tmb_data = data_tmb ## with this and MLE pars can generate thier on obj and play around.
return(return_list)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.