#' @title R6 Class for Bayesian Linear modeling of crop responses
#'
#' @description R6 class using Bayesian Linear Regression to fit a crop
#' response model with the experimental variable and remotely sensed covariate
#' data. This class is initialized with a named list of training (named 'trn')
#' and validation (named 'val') datasets, the response variable, the experimental
#' variable, and the means of the centered data.
#'
#' The initialization creates a data frame ('parm_df') containing the parameter names,
#' the mean and standard deviation, and whether it meets
#' the criteria to be omitted from the model, making it a 'bad_parm'. The criteria for
#' this is over 30% of data for a given year missing for a parameter or a standard
#' deviation of zero, indicating singularity.
#'
#' The process then creates a formula for a final model with all parameters that
#' are not considered 'bad'. This is used to fit the final model that is returned
#' to the user for use in the simulation to predict the response under varying
#' rates of the experimental variable.
#'
#' The 'saveDiagnostics' method include residuals vs. fitted. The fitting process
#' also prepares data for validation plots in the
#' 'ModClass' R6 class. This includes predicting observations in the validation
#' dataset, making a unique id using the year and fieldname, uncentering data, and
#' identifying a field name to use for plotting that reflects all fields in the
#' dataset.
#' @seealso \code{\link{ModClass}} for the class that calls the ModClass interface,
#' \code{\link{NonLinear_Logistic}}, \code{\link{GAM}}, and \code{\link{RF}}
#' for alternative model classes.
#' @export
BayesLinear <- R6::R6Class(
"BayesLinear",
public = list(
#' @field dat Named list of traning (named 'trn') and validation (named 'val')
#' datasets with the response, experimental, and remotely sensed variables.
dat = NULL,
#' @field respvar Character, the response variable of interest.
respvar = NULL,
#' @field expvar Character, the experimental variable of interest.
expvar = NULL,
#' @field covars Character vector of covariates to use for training the model.
covars = NULL,
#' @field m Fitted RF model.
m = NULL,
#' @field form Final RF formula.
form = NULL,
#' @field parm_df Data frame of parameter names, and a column
#' named 'bad_parms' to indicate whether to include in the model formula. Also includes
#' columns for the mean and standard deviation of each parameter.
parm_df = NULL,
#' @field fieldname Unique name for the field(s) analyzed. If multiple fields are used
#' they are separated by an ampersand, otherwise the singular field name is used. This
#' is used for plotting.
fieldname = NULL,
#' @field mod_type Name of the model of this class, used for plotting.
mod_type = "BayesLinear",
#' @description
#' The initialization creates a data frame ('parm_df') containing the parameter names,
#' the mean and standard deviation, and whether it meets
#' the criteria to be omitted from the model, making it a 'bad_parm'. The criteria for
#' this is over 30% of data for a given year missing for a parameter or a standard
#' deviation of zero, indicating singularity.
#' @param dat Named list of training (named 'trn') and validation (named 'val')
#' datasets with the response, experimental, and remotely sensed variables.
#' @param respvar Character, the response variable of interest.
#' @param expvar Character, the experimental variable of interest.
#' @param covars Character vector of covariates to use for training the model.
#' @return A instantiated 'RF' object.
initialize = function(dat, respvar, expvar, covars) {
stopifnot(any(grepl("trn", names(dat)), grepl("val", names(dat))),
is.character(respvar),
is.character(expvar),
is.character(covars)
)
for (i in 1:length(dat)) {
stopifnot(any(grepl(paste0("^", respvar, "$"), names(dat[[i]]))),
any(grepl(paste0("^", expvar, "$"), names(dat[[i]]))),
all(covars %in% names(dat[[i]])))
}
self$dat <- dat
self$respvar <- respvar
self$expvar <- expvar
self$covars <- covars
if (self$expvar %in% self$covars) {
covars <- self$covars[-grep(self$expvar, self$covars)]
}
self$parm_df <- data.frame(
parms = c(self$expvar, covars),
bad_parms = FALSE,
means = NA,
sd = NA
)
self$parm_df <- OFPE::findBadParms(self$parm_df, self$dat$trn)
self$dat <- lapply(self$dat,
OFPE::removeNAfromCovars,
self$parm_df$parms[!self$parm_df$bad_parms])
},
#' @description
#' Method for fitting the Bayesian linear model to response variables using
#' experimental and covariate data.
#'
#' The fitting begins by taking the data frame ('parm_df') containing the parameter names,
#' the mean and standard deviation, and identifying whether
#' each parameter meets the criteria to be omitted from the model, making it a 'bad_parm'.
#' The criteria for this is over 30% of data for a given year missing for a parameter or a
#' standard deviation of zero, indicating singularity.
#'
#' OLD: A subset of 1000 observations is taken to perform top-down feature
#' selection based on the RMSE from predictions to a holdout set. If a covariate
#' lowers the RMSE when removed, it is flagged as 'bad'.
#' NEW: No feature selection is performed due to computing time.
#'
#' The process then creates a formula for a final model with all parameters that
#' are not considered 'bad'. This is used to fit the final model that is returned
#' to the user for use in the simulation to predict the response under varying
#' rates of the experimental variable.
#'
#' Finally, this method prepares the validation data for plotting by using the model to predict
#' the response for each of the observations in the validation dataset, uncentering data if
#' necessary, and identifying a unique field name from the data.
#' @param None Parameters provided upon class instantiation.
#' @return A fitted BayesLinear model.
fitMod = function() {
# subdat <- lapply(self$dat, OFPE::takeSubset, self$respvar, 500)
#
# form <- private$.makeFormula(parms = self$parm_df[!self$parm_df$bad_parms, "parms"],
# respvar = self$respvar)
# ## simultaneous autoregressive model
# xy_sub <- subdat$trn[, c("x", "y")]
# xy_sf <- sf::st_as_sf(xy_sub, coords = c("x", "y"))
# nn <- suppressWarnings(spdep::knn2nb(spdep::knearneigh(x = xy_sf, k = 4)))
#
# m <- invisible(suppressWarnings(suppressMessages(brms::brm(
# brms::brmsformula(as.formula(form),
# nl = FALSE,
# autocor = ~ brms::sar(nn)),
# data = subdat$trn, family = gaussian(),
# control = list(adapt_delta = 0.99),
# iter = 6000,
# warmup = 2000
# ))))
# subdat$val$preds <- predict(m, subdat$val)[,1]
# get_cols <- c(self$respvar, "preds")
# preds <- subdat$val[!is.na(subdat$val$preds), get_cols, with = FALSE]
# init_rmse <- Metrics::rmse(preds[[1]], preds$preds) %>% round(4)
#
# ## do top-down RMSE based feature selection
# for (i in 1:nrow(self$parm_df)) {
# if (self$parm_df$parms[i] != self$expvar) {
# self$parm_df$bad_parms[i] <- TRUE
#
# form <- private$.makeFormula(parms = self$parm_df$parms[!self$parm_df$bad_parms],
# respvar = self$respvar)
# m <- invisible(suppressWarnings(suppressMessages(brms::brm(
# brms::brmsformula(as.formula(form),
# nl = FALSE,
# autocor = ~ brms::sar(nn)),
# data = subdat$trn, family = gaussian(),
# control = list(adapt_delta = 0.99),
# iter = 6000,
# warmup = 2000
# ))))
# subdat$val$preds <- predict(m, subdat$val)[,1]
# get_cols <- c(self$respvar, "preds")
# preds <- subdat$val[!is.na(subdat$val$preds), get_cols, with = FALSE]
# diff_rmse <- Metrics::rmse(preds[[1]], preds$preds) %>% round(4) - init_rmse
# if (diff_rmse < 0) {
# init_rmse <- Metrics::rmse(preds[[1]], preds$preds) %>% round(4)
# } else {
# self$parm_df$bad_parms[i] <- FALSE
# }
# }
# rm(m)
# }
# gc()
## simultaneous autoregressive model
xy_sub <- self$dat$trn[, c("x", "y")]
xy_sf <- sf::st_as_sf(xy_sub, coords = c("x", "y"))
nn <- suppressWarnings(spdep::knn2nb(spdep::knearneigh(x = xy_sf, k = 4)))
self$form <- private$.makeFormula(parms = self$parm_df$parms[!self$parm_df$bad_parms],
respvar = self$respvar)
self$m <- invisible(suppressWarnings(suppressMessages(brms::brm(
brms::brmsformula(as.formula(self$form),
nl = FALSE,
autocor = ~ brms::sar(nn),
decomp = "QR"),
data = self$dat$trn, family = Gamma(link = "log"),
control = list(adapt_delta = 0.99),
iter = 6000,
warmup = 2000,
normalize = FALSE,
cores = ifelse(parallel::detectCores() > 4, 4, parallel::detectCores())
))))
self$dat$val$pred <- self$predResps(self$dat$val, self$m)
self$dat$val <- OFPE::valPrep(self$dat$val,
self$respvar,
self$expvar)
self$fieldname <- OFPE::uniqueFieldname(self$dat$val)
return(self$m)
},
#' @description
#' Method for predicting response variables using data and a model.
#' @param dat Data for predicting response variables for.
#' @param m The fitted model to use for predicting the response
#' variable for each observation in 'dat'.
#' @return Vector of predicted values for each location in 'dat'.
predResps = function(dat, m) {
pred_df <- predict(m, dat, ndraws = 100)
pred <- apply(pred_df, 1, function(x) rgamma(1, (x[1] / x[2])^2, (x[1] / x[2]^2)))
gc()
return(pred)
},
#' @description
#' Method for saving diagnostic plots of the fitted model. This is
#' simply the residuals vs fitted values.
#' @param out_path The path to the folder in which to store and
#' save outputs from the model fitting process
#' @param SAVE Whether to save diagnostic plots.
#' @return Diagnostic plots.
saveDiagnostics = function(out_path, SAVE) {
if (SAVE) {
try({dev.off()}, silent = TRUE)
## general diagnostics
resp_col <- grep(paste0("^", self$respvar, "$"), names(self$dat$val))
obs <- self$dat$val[, resp_col, with = FALSE][[1]]
mod_diagnostics <- data.frame(
fitted = self$dat$val$pred,
residuals = obs - self$dat$val$pred
)
png(paste0(out_path, "/Outputs/Diagnostics/", self$respvar, "_",
self$fieldname, "_BayesLinear_diagnostics.png"),
width = 7.5, height = 5, units = 'in', res = 100)
plot(mod_diagnostics$fitted,
mod_diagnostics$residuals,
ylab = "Residuals", xlab = "Fitted")
dev.off()
}
}
),
private = list(
.makeFormula = function(parms = NULL, respvar = NULL) {
if (is.null(parms)) {
parms <- self$parm_df$parms[!self$parm_df$bad_parms]
}
fxn <- paste(
parms,
collapse = " + ")
fxn <- paste0(self$respvar, " ~ ", fxn)
return(fxn)
}
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.