Nothing
#'
#' @title rSPDE inlabru mapper
#' @name bru_get_mapper.inla_rspde
#' @param model An `inla_rspde` for which to construct or extract a mapper
#' @param \dots Arguments passed on to other methods
#' @rdname bru_get_mapper.inla_rspde
#' @rawNamespace if (getRversion() >= "3.6.0") {
#' S3method(inlabru::bru_get_mapper, inla_rspde)
#' S3method(inlabru::ibm_n, bru_mapper_inla_rspde)
#' S3method(inlabru::ibm_values, bru_mapper_inla_rspde)
#' S3method(inlabru::ibm_jacobian, bru_mapper_inla_rspde)
#' }
#'
#' @examples
#' \donttest{ #devel version
#' if (requireNamespace("INLA", quietly = TRUE) &&
#' requireNamespace("inlabru", quietly = TRUE)){
#' library(INLA)
#' library(inlabru)
#'
#' set.seed(123)
#' m <- 100
#' loc_2d_mesh <- matrix(runif(m * 2), m, 2)
#' mesh_2d <- inla.mesh.2d(
#' loc = loc_2d_mesh,
#' cutoff = 0.05,
#' max.edge = c(0.1, 0.5)
#' )
#' sigma <- 1
#' range <- 0.2
#' nu <- 0.8
#' kappa <- sqrt(8 * nu) / range
#' op <- matern.operators(
#' mesh = mesh_2d, nu = nu,
#' range = range, sigma = sigma, m = 2,
#' parameterization = "matern"
#' )
#' u <- simulate(op)
#' A <- inla.spde.make.A(
#' mesh = mesh_2d,
#' loc = loc_2d_mesh
#' )
#' sigma.e <- 0.1
#' y <- A %*% u + rnorm(m) * sigma.e
#' y <- as.vector(y)
#'
#' data_df <- data.frame(y=y, x1 = loc_2d_mesh[,1],
#' x2 = loc_2d_mesh[,2])
#' coordinates(data_df) <- c("x1", "x2")
#' rspde_model <- rspde.matern(
#' mesh = mesh_2d,
#' nu_upper_bound = 2
#' )
#'
#' cmp <- y ~ Intercept(1) +
#' field(coordinates, model = rspde_model)
#'
#'
#' rspde_fit <- bru(cmp, data = data_df)
#' summary(rspde_fit)
#' }
#' #devel.tag
#' }
bru_get_mapper.inla_rspde <- function(model,...) {
mapper <- list(model = model)
inlabru::bru_mapper_define(mapper, new_class = "bru_mapper_inla_rspde")
}
#' @param mapper A `bru_mapper_inla_rspde` object
#' @rdname bru_get_mapper.inla_rspde
ibm_n.bru_mapper_inla_rspde <- function(mapper, ...) {
model <- mapper[["model"]]
integer_nu <- model$integer.nu
rspde_order <- model$rspde.order
if(integer_nu){
factor_rspde <- 1
} else{
factor_rspde <- rspde_order + 1
}
factor_rspde*model$n.spde
}
#' @rdname bru_get_mapper.inla_rspde
ibm_values.bru_mapper_inla_rspde <- function(mapper, ...) {
seq_len(inlabru::ibm_n(mapper))
}
#' @param input The values for which to produce a mapping matrix
#' @rdname bru_get_mapper.inla_rspde
ibm_jacobian.bru_mapper_inla_rspde <- function(mapper, input, ...) {
model <- mapper[["model"]]
if (!is.null(input) && !is.matrix(input) && !inherits(input, "Spatial")) {
input <- as.matrix(input)
}
if(model$est_nu){
nu <- NULL
} else{
nu <- model$nu
}
rspde_order <- model$rspde.order
rSPDE::rspde.make.A(mesh = model$mesh, loc=input,
rspde.order = rspde_order,
nu=nu)
}
#' @noRd
# Function to process bru's formula
process_formula <- function(bru_result){
form <- bru_result$bru_info$model$formula[3]
form <- as.character(form)
form <- strsplit(form, "f\\(")
form <- form[[1]]
form <- form[-1]
form_proc <- sub(",.*", "",strsplit(form, "f\\(")[1])
if(length(form)>1){
for(i in 2:(length(form))){
form_proc <- paste(form_proc, " + ", sub(",.*", "",strsplit(form, "f\\(")[i]))
}
}
form_proc <- paste("~", "linkfuninv(", form_proc, ")")
return(stats::as.formula(form_proc))
}
#' @noRd
# Function to process the link function
process_link <- function(link_name){
return_link <- switch(link_name,
"log" = function(x){INLA::inla.link.log(x, inverse=TRUE)},
"invlog" = function(x){INLA::inla.link.invlog(x, inverse=TRUE)},
"logit" = function(x){INLA::inla.link.logit(x, inverse=TRUE)},
"invlogit" = function(x){INLA::inla.link.invlogit(x, inverse=TRUE)},
"probit" = function(x){INLA::inla.link.probit(x, inverse=TRUE)},
"invprobit" = function(x){INLA::inla.link.invprobit(x, inverse=TRUE)},
"cloglog" = function(x){INLA::inla.link.cloglog(x, inverse=TRUE)},
"invcloglog" = function(x){INLA::inla.link.invcloglog(x, inverse=TRUE)},
"tan" = function(x){INLA::inla.link.tan(x, inverse=TRUE)},
"invtan" = function(x){INLA::inla.link.invtan(x, inverse=TRUE)},
"identity" = function(x){INLA::inla.link.identity(x, inverse=TRUE)},
"invidentity" = function(x){INLA::inla.link.invidentity(x, inverse=TRUE)}
)
return(return_link)
}
#' @noRd
bru_rerun_with_data <- function(result, idx_data, true_CV, fit_verbose) {
stopifnot(inherits(result, "bru"))
if(!true_CV){
options <- list(control.mode = list(restart = FALSE,
theta=result$mode$theta, fixed = TRUE))
} else{
options <- list()
}
if(fit_verbose){
options$verbose <- TRUE
} else{
options$verbose <- FALSE
}
info <- result[["bru_info"]]
info[["options"]] <- inlabru::bru_call_options(
inlabru::bru_options(
info[["options"]],
inlabru::as.bru_options(options)
)
)
original_timings <- result[["bru_timings"]]
lhoods_tmp <- info[["lhoods"]]
lhoods_tmp[[1]]$response_data$BRU_response <- lhoods_tmp[[1]]$response_data$BRU_response[idx_data]
# lhoods_tmp[[1]]$data <- lhoods_tmp[[1]]$data[idx_data,]
lhoods_tmp[[1]]$data <- select_indexes(lhoods_tmp[[1]]$data, idx_data)
if(length(lhoods_tmp[[1]]$E)>1){
lhoods_tmp[[1]]$E <- lhoods_tmp[[1]]$E[idx_data]
}
if(length(lhoods_tmp[[1]]$Ntrials)>1){
lhoods_tmp[[1]]$Ntrials <- lhoods_tmp[[1]]$Ntrials[idx_data]
}
if(length(lhoods_tmp[[1]]$weights)>1){
lhoods_tmp[[1]]$weights <- lhoods_tmp[[1]]$weights[idx_data]
}
if(isS4(lhoods_tmp[[1]]$data)){
lhoods_tmp[[1]]$drange <- lapply(lhoods_tmp[[1]]$data@data, function(i){range(i)})
} else{
lhoods_tmp[[1]]$drange <- lapply(lhoods_tmp[[1]]$data, function(i){range(i)})
}
# Get the components list
list_of_components <- names(info[["model"]][["effects"]])
backup_list <- list()
total_length <- NULL
small_length <- length(idx_data)
for(comp in list_of_components){
name_input_group <- info[["model"]][["effects"]][[comp]][["group"]][["input"]][["input"]]
if(!is.null(name_input_group)){
name_input_group <- as.character(name_input_group)
comp_group_tmp <- info[["model"]][["effects"]][[comp]][["env"]][[name_input_group]]
if(is.null(total_length) && !is.null(comp_group_tmp)){
total_length <- length(comp_group_tmp)
if(length(comp_group_tmp) == total_length){
backup_list[[comp]][["group_val"]] <- info[["model"]][["effects"]][[comp]][["env"]][[name_input_group]]
comp_group_tmp <- comp_group_tmp[idx_data]
assign(name_input_group, comp_group_tmp, envir = info[["model"]][["effects"]][[comp]][["env"]])
}
}
}
name_input_repl <- info[["model"]][["effects"]][[comp]][["replicate"]][["input"]][["input"]]
if(!is.null(name_input_repl)){
name_input_repl <- as.character(name_input_repl)
comp_repl_tmp <- info[["model"]][["effects"]][[comp]][["env"]][[name_input_repl]]
if(is.null(total_length) && !is.null(comp_repl_tmp)){
total_length <- length(comp_repl_tmp)
if(length(comp_repl_tmp) == total_length){
backup_list[[comp]][["repl_val"]] <- info[["model"]][["effects"]][[comp]][["env"]][[name_input_repl]]
comp_repl_tmp <- comp_repl_tmp[idx_data]
assign(name_input_repl, comp_repl_tmp, envir = info[["model"]][["effects"]][[comp]][["env"]])
}
}
}
}
if(!true_CV){
result <- inlabru::iinla(
model = info[["model"]],
lhoods = lhoods_tmp,
options = info[["options"]]
)
} else{
result <- inlabru::iinla(
model = info[["model"]],
lhoods = lhoods_tmp,
initial = result,
options = info[["options"]]
)
}
# Assigning back:
for(comp in list_of_components){
name_input_group <- info[["model"]][["effects"]][[comp]][["group"]][["input"]][["input"]]
if(!is.null(name_input_group)){
name_input_group <- as.character(name_input_group)
if(!is.null(backup_list[[comp]][["group_val"]])){
assign(name_input_group, backup_list[[comp]][["group_val"]], envir = info[["model"]][["effects"]][[comp]][["env"]])
}
}
name_input_repl <- info[["model"]][["effects"]][[comp]][["replicate"]][["input"]][["input"]]
if(!is.null(name_input_repl)){
name_input_repl <- as.character(name_input_repl)
if(!is.null(backup_list[[comp]][["repl_val"]])){
assign(name_input_repl, backup_list[[comp]][["repl_val"]], envir = info[["model"]][["effects"]][[comp]][["env"]])
}
}
}
timing_end <- Sys.time()
result$bru_timings <-
rbind(
original_timings[1, , drop = FALSE],
result[["bru_iinla"]][["timings"]]
)
# Add bru information to the result
result$bru_info <- info
class(result) <- c("bru", class(result))
return(result)
}
#' @noRd
get_post_var <- function(density_df){
min_x <- min(density_df[, "x"])
max_x <- max(density_df[, "x"])
denstemp <- function(x) {
dens <- sapply(x, function(z) {
if (z < min_x) {
return(0)
} else if (z > max_x) {
return(0)
} else {
return(approx(x = density_df[, "x"], y = density_df[, "y"], xout = z)$y)
}
})
return(dens)
}
post_var <- stats::integrate(
f = function(z) {
denstemp(z) * 1/z
}, lower = min_x, upper = max_x,
subdivisions = nrow(density_df),
stop.on.error = FALSE
)$value
return(post_var)
}
#' @noRd
prepare_df_pred <- function(df_pred, result, idx_test){
info <- result[["bru_info"]]
list_of_components <- names(info[["model"]][["effects"]])
for(comp in list_of_components){
name_input_group <- info[["model"]][["effects"]][[comp]][["group"]][["input"]][["input"]]
if(!is.null(name_input_group)){
name_input_group <- as.character(name_input_group)
comp_group_tmp <- info[["model"]][["effects"]][[comp]][["env"]][[name_input_group]]
comp_group_tmp <- comp_group_tmp[idx_test]
df_pred[[name_input_group]] <- comp_group_tmp
}
name_input_repl <- info[["model"]][["effects"]][[comp]][["replicate"]][["input"]][["input"]]
if(!is.null(name_input_repl)){
name_input_repl <- as.character(name_input_repl)
comp_repl_tmp <- info[["model"]][["effects"]][[comp]][["env"]][[name_input_repl]]
comp_repl_tmp <- comp_repl_tmp[idx_test]
df_pred[[name_input_repl]] <- comp_repl_tmp
}
}
return(df_pred)
}
#' @name cross_validation
#' @title Perform cross-validation on a list of fitted models.
#' @description Obtain several scores for a list of fitted models according
#' to a folding scheme.
#' @param models A fitted model obtained from calling the `bru()` function or a list of models fitted with the `bru()` function.
#' @param model_names A vector containing the names of the models to appear in the returned `data.frame`. If `NULL`, the names will be of the form `Model 1`, `Model 2`, and so on. By default, it will try to obtain the name from the models list.
#' @param scores A vector containing the scores to be computed. The options are "mse", "crps", "scrps" and "dss". By default, all scores are computed.
#' @param cv_type The type of the folding to be carried out. The options are `k-fold` for `k`-fold cross-validation, in which case the parameter `k` should be provided,
#' `loo`, for leave-one-out and `lpo` for leave-percentage-out, in this case, the parameter `percentage` should be given, and also the `number_folds`
#' with the number of folds to be done. The default is `k-fold`.
#' @param k The number of folds to be used in `k`-fold cross-validation. Will only be used if `cv_type` is `k-fold`.
#' @param percentage The percentage (from 1 to 99) of the data to be used to train the model. Will only be used if `cv_type` is `lpo`.
#' @param number_folds Number of folds to be done if `cv_type` is `lpo`.
#' @param n_samples Number of samples to compute the posterior statistics to be used to compute the scores.
#' @param return_scores_folds If `TRUE`, the scores for each fold will also be returned.
#' @param orientation_results character vector. The options are "negative" and "positive". If "negative", the smaller the scores the better. If "positive", the larger the scores the better.
#' @param include_best Should a row indicating which model was the best for each score be included?
#' @param train_test_indexes A list containing two entries `train`, which is a list whose elements are vectors of indexes of the training data, and `test`, which is a list whose elements are vectors of indexes of the test data.
#' Typically this will be returned list obtained by setting the argument `return_train_test` to `TRUE`.
#' @param return_train_test Logical. Should the training and test indexes be returned? If 'TRUE' the train and test indexes will the 'train_test' element of the returned list.
#' @param parallelize_RP Logical. Should the computation of CRPS and SCRPS be parallelized?
#' @param n_cores_RP Number of cores to be used if `parallelize_rp` is `TRUE`.
#' @param true_CV Should a `TRUE` cross-validation be performed? If `TRUE` the models will be fitted on the training dataset. If `FALSE`, the parameters will be kept fixed at the ones obtained in the result object.
#' @param save_settings Logical. If `TRUE`, the settings used in the cross-validation will also be returned.
#' @param print Should partial results be printed throughout the computation?
#' @param fit_verbose Should INLA's run during cross-validation be verbose?
#' @return A data.frame with the fitted models and the corresponding scores.
#' @export
cross_validation <- function(models, model_names = NULL, scores = c("mse", "crps", "scrps", "dss"),
cv_type = c("k-fold", "loo", "lpo"),
k = 5, percentage = 20, number_folds = 10,
n_samples = 1000, return_scores_folds = FALSE,
orientation_results = c("negative", "positive"),
include_best = TRUE,
train_test_indexes = NULL,
return_train_test = FALSE,
parallelize_RP = FALSE, n_cores_RP = parallel::detectCores()-1,
true_CV = FALSE, save_settings = FALSE, print = TRUE,
fit_verbose = FALSE){
orientation_results <- orientation_results[[1]]
if(!(orientation_results %in% c("positive", "negative"))){
stop("orientation_results must be either 'positive' or 'negative'!")
}
scores <- intersect(scores, c("mse", "crps", "scrps", "dss"))
cv_type <- cv_type[[1]]
if(!(cv_type %in% c("k-fold", "loo", "lpo"))){
stop("The possible options for cv_type are 'k-fold', 'loo' or 'lpo'!")
}
if(!is.numeric(percentage)){
stop("percentage must be numeric!")
}
if(percentage %%1 != 0){
warning("Non-integer percentage given, it will be rounded to an integer number.")
percentage <- round(percentage)
}
if(percentage <= 0 || percentage >= 100){
stop("percentage must be a number between 1 and 99!")
}
if(!is.numeric(number_folds)){
stop("number_folds must be numeric!")
}
if(number_folds %% 1 != 0){
warning("Non-integer number_folds given, it will be rounded to an integer number.")
number_folds <- round(number_folds)
}
if(number_folds <= 0){
stop("number_folds must be positive!")
}
if(!is.numeric(n_samples)){
stop("n_samples must be numeric!")
}
if(n_samples %% 1 != 0){
warning("Non-integer n_samples given, it will be rounded to an integer number.")
n_samples <- round(n_samples)
}
if(n_samples <= 0){
stop("n_samples must be positive!")
}
if(!is.list(models)){
stop("models should either be a result from a bru() call or a list of results from bru() calls!")
}
if(parallelize_RP){
cluster_tmp <- parallel::makeCluster(n_cores_RP)
doParallel::registerDoParallel(cluster_tmp)
}
if(inherits(models, "bru")){
models <- list(models)
} else{
for(i in 1:length(models)){
if(!inherits(models[[i]],"bru")){
stop("models must be either a result from a bru call or a list of results from bru() calls!")
}
}
}
if(is.null(model_names) && is.list(models)){
model_names <- names(models)
}
if(!is.null(model_names)){
if(!is.character(model_names)){
stop("model_names must be a vector of strings!")
}
if(length(models)!= length(model_names)){
stop("model_names must contain one name for each model!")
}
} else{
model_names <- vector(mode = "character", length(models))
for(i in 1:length(models)){
model_names[i] <- paste("Model",i)
}
}
# Getting the data if NULL
data <- models[[1]]$bru_info$lhoods[[1]]$data
if(is.vector(data)){
data <- as.data.frame(data)
}
# Creating lists of train and test datasets
if(is.null(train_test_indexes)){
if(inherits(data, "metric_graph_data")){
idx <- seq_len(nrow(as.data.frame(data)))
} else{
idx <- seq_len(nrow(data))
}
if(inherits(data, "SpatialPointsDataFrame")){
data_tmp <- data@data
data_nonNA <- !is.na(data_tmp)
} else if(inherits(data, "metric_graph_data")){
data_nonNA <- !is.na(as.data.frame(data))
} else {
data_nonNA <- !is.na(data)
}
idx_nonNA <- sapply(1:length(idx), function(i){all(data_nonNA[i,])})
idx <- idx[idx_nonNA]
if(cv_type == "k-fold"){
# split idx into k
folds <- cut(sample(idx), breaks = k, label = FALSE)
test_list_idx <- lapply(1:k, function(i) {which(folds == i, arr.ind = TRUE)})
test_list <- lapply(test_list_idx, function(idx_test){idx[idx_test]})
train_list <- lapply(1:k, function(i){
idx[-test_list_idx[[i]]]
})
} else if (cv_type == "loo"){
train_list <- lapply(1:length(idx), function(i){
idx[-i]
})
# test_list <- lapply(1:length(idx), function(i){idx[i]})
test_list <- as.list(idx)
} else if (cv_type == "lpo"){
test_list_idx <- list()
n_Y <- length(idx)
for (i in number_folds:1) {
test_list_idx[[i]] <- sample(1:length(idx), size = (1-percentage/100) * n_Y)
}
train_list <- lapply(1:number_folds, function(i){
idx[-test_list_idx[[i]]]
})
test_list <- lapply(test_list_idx, function(idx_test){idx[idx_test]})
}
} else{
if(!is.list(train_test_indexes)){
stop("train_test_indexes should be a list!")
}
if(is.null(train_test_indexes[["train"]])){
stop("train_test_indexes must contain a 'train' element.")
}
if(is.null(train_test_indexes[["test"]])){
stop("train_test_indexes must contain a 'test' element.")
}
if(!is.list(train_test_indexes[["train"]])){
stop("train_test_indexes$train must be a list!")
}
if(!is.list(train_test_indexes[["test"]])){
stop("train_test_indexes$test must be a list!")
}
train_list <- train_test_indexes[["train"]]
test_list <- train_test_indexes[["test"]]
}
# Perform the cross-validation
result_df <- data.frame(Model = model_names)
dss <- matrix(numeric(length(train_list)*length(models)), ncol = length(models))
mse <- matrix(numeric(length(train_list)*length(models)), ncol = length(models))
crps <- matrix(numeric(length(train_list)*length(models)), ncol = length(models))
scrps <- matrix(numeric(length(train_list)*length(models)), ncol = length(models))
# Get the formulas for the models
formula_list <- lapply(models, function(model){process_formula(model)})
for(fold in 1:length(train_list)){
for(model_number in 1:length(models)){
if(print){
cat(paste("Fold:",fold,"/",length(train_list),"\n"))
if(!is.null(model_names)){
cat(paste("Model:",model_names[[model_number]],"\n"))
} else{
cat(paste("Model:", model_number,"\n"))
}
}
# Generate posterior samples of the mean
if(is.null(models[[model_number]]$.args)){
stop("There was a problem with INLA's fit. Please, check your model specifications carefully and re-fit the model.")
}
df_train <- select_indexes(data, train_list[[fold]])
df_pred <- select_indexes(data, test_list[[fold]])
if(models[[model_number]]$.args$family == "gaussian"){
link_name <- models[[model_number]]$.args$control.family[[1]]$link
if(link_name == "default"){
linkfuninv <- function(x){x}
} else {
linkfuninv <- process_link(link_name)
}
df_pred <- prepare_df_pred(df_pred, models[[model_number]], test_list[[fold]])
new_model <- bru_rerun_with_data(models[[model_number]], train_list[[fold]], true_CV = true_CV, fit_verbose = fit_verbose)
resp_var <- as.character(models[[model_number]]$bru_info$lhoods[[1]]$formula[2])
formula_tmp <- formula_list[[model_number]]
env_tmp <- environment(formula_tmp)
assign("linkfuninv", linkfuninv, envir = env_tmp)
if(print){
cat("Generating samples...\n")
}
if(("crps" %in% scores) || ("scrps" %in% scores)){
posterior_samples <- inlabru::generate(new_model, newdata = df_pred, formula = formula_tmp, n.samples = 2 * n_samples)
} else {
posterior_samples <- inlabru::generate(new_model, newdata = df_pred, formula = formula_tmp, n.samples = n_samples)
}
if(print){
cat("Samples generated!\n")
}
test_data <- data[[resp_var]][test_list[[fold]]]
if(nrow(posterior_samples) == 1){
posterior_samples <- matrix(rep(posterior_samples, length(test_data)),ncol=ncol(posterior_samples), byrow = TRUE)
}
posterior_mean <- rowMeans(posterior_samples)
if("dss" %in% scores){
density_df <- new_model$marginals.hyperpar$`Precision for the Gaussian observations`
Expect_post_var <- tryCatch(get_post_var(density_df), error = function(e) NA)
if(is.na(Expect_post_var)){
Expect_post_var <- 1/new_model$summary.hyperpar["Precision for the Gaussian observations","mean"]
}
posterior_variance_of_mean <- rowMeans(posterior_samples^2) - posterior_mean^2
post_var <- Expect_post_var + posterior_variance_of_mean
dss[fold, model_number] <- mean((test_data - posterior_mean)^2/post_var + log(post_var))
if(orientation_results == "positive"){
dss[fold, model_number] <- - dss[fold, model_number]
}
if(print){
cat(paste("DSS:", dss[fold, model_number],"\n"))
}
}
if("mse" %in% scores){
mse[fold, model_number] <- mean((test_data - posterior_mean)^2)
if(orientation_results == "positive"){
mse[fold, model_number] <- - mse[fold, model_number]
}
if(print){
cat(paste("MSE:",mse[fold, model_number],"\n"))
}
}
if(("crps" %in% scores) || ("scrps" %in% scores)){
hyper_sample <- INLA::inla.hyperpar.sample(n_samples, new_model, improve.marginals=TRUE)
# phi_sample <- INLA::inla.rmarginal(n_samples, new_model$marginals.hyperpar$`Precision for the Gaussian observations`)
phi_sample_1 <- as.vector(hyper_sample[,"Precision for the Gaussian observations"])
sd_sample_1 <- 1/sqrt(phi_sample_1)
hyper_sample2 <- INLA::inla.hyperpar.sample(n_samples, new_model, improve.marginals=TRUE)
phi_sample_2 <- as.vector(hyper_sample2[,"Precision for the Gaussian observations"])
sd_sample_2 <- 1/sqrt(phi_sample_2)
if(parallelize_RP){
Y1_sample <- foreach::`%dopar%`(foreach::foreach(i = 1:length(test_data)), {
posterior_samples[i,1:n_samples] + sd_sample_1 * rnorm(n_samples)
})
Y2_sample <- foreach::`%dopar%`(foreach::foreach(i = 1:length(test_data)), {
posterior_samples[i,(n_samples+1):(2*n_samples)] + sd_sample_2 * rnorm(n_samples)
})
E1_tmp <- foreach::`%dopar%`(foreach::foreach(i = 1:length(test_data)), {
mean(abs(Y1_sample[[i]]-test_data[i]))
})
E2_tmp <- foreach::`%dopar%`(foreach::foreach(i = 1:length(test_data)), {
mean(abs(Y1_sample[[i]]-Y2_sample[[i]]))
})
} else{
Y1_sample <- lapply(1:length(test_data), function(i){
posterior_samples[i,1:n_samples] + sd_sample_1 * rnorm(n_samples)
})
Y2_sample <- lapply(1:length(test_data), function(i){
posterior_samples[i,(n_samples+1):(2*n_samples)] + sd_sample_2 * rnorm(n_samples)
})
E1_tmp <- lapply(1:length(test_data), function(i){
mean(abs(Y1_sample[[i]]-test_data[i]))
})
E2_tmp <- lapply(1:length(test_data), function(i){
mean(abs(Y1_sample[[i]]-Y2_sample[[i]]))
})
}
if("crps" %in% scores){
crps_temp <- lapply(1:length(test_data), function(i){
return(E1_tmp[[i]] - 0.5*E2_tmp[[i]])
})
crps_temp <- unlist(crps_temp)
crps[fold, model_number] <- mean(crps_temp)
if(orientation_results == "negative"){
crps[fold, model_number] <- crps[fold, model_number]
}
if(print){
cat(paste("CRPS:",crps[fold, model_number],"\n"))
}
}
if("scrps" %in% scores){
scrps_temp <- lapply(1:length(test_data), function(i){
return(-E1_tmp[[i]]/E2_tmp[[i]] - 0.5*log(E2_tmp[[i]]))
})
scrps_temp <- unlist(scrps_temp)
scrps[fold, model_number] <- mean(scrps_temp)
if(orientation_results == "negative"){
scrps[fold, model_number] <- - scrps[fold, model_number]
}
if(print){
cat(paste("SCRPS:",scrps[fold, model_number],"\n"))
}
}
}
} else if (models[[model_number]]$.args$family == "gamma"){
link_name <- models[[model_number]]$.args$control.family[[1]]$link
if(link_name == "default"){
linkfuninv <- function(x){exp(x)}
} else {
linkfuninv <- process_link(link_name)
}
formula_tmp <- formula_list[[model_number]]
env_tmp <- environment(formula_tmp)
assign("linkfuninv", linkfuninv, envir = env_tmp)
df_pred <- prepare_df_pred(df_pred, models[[model_number]], test_list[[fold]])
new_model <- bru_rerun_with_data(models[[model_number]], train_list[[fold]], true_CV = true_CV, fit_verbose = fit_verbose)
resp_var <- as.character(models[[model_number]]$bru_info$lhoods[[1]]$formula[2])
if(print){
cat("Generating samples...\n")
}
if(("crps" %in% scores) || ("scrps" %in% scores)){
posterior_samples <- inlabru::generate(new_model, newdata = df_pred, formula = formula_tmp, n.samples = 2 * n_samples)
} else {
posterior_samples <- inlabru::generate(new_model, newdata = df_pred, formula = formula_tmp, n.samples = n_samples)
}
if(print){
cat("Samples generated!\n")
}
test_data <- models[[model_number]]$bru_info$lhoods[[1]]$response_data$BRU_response[test_list[[fold]]]
if(nrow(posterior_samples) == 1){
posterior_samples <- matrix(rep(posterior_samples, length(test_data)),ncol=ncol(posterior_samples), byrow = TRUE)
}
posterior_mean <- rowMeans(posterior_samples)
if("dss" %in% scores){
Expected_post_var <- new_model$summary.hyperpar["Precision parameter for the Gamma observations","mean"]/(posterior_mean^2)
posterior_variance_of_mean <- rowMeans(posterior_samples^2) - posterior_mean^2
post_var <- Expected_post_var + posterior_variance_of_mean
dss[fold, model_number] <- mean((test_data - posterior_mean)^2/post_var + log(post_var))
if(orientation_results == "positive"){
dss[fold, model_number] <- - dss[fold, model_number]
}
if(print){
cat(paste("DSS:", dss[fold, model_number],"\n"))
}
}
if("mse" %in% scores){
mse[fold, model_number] <- mean((test_data - posterior_mean)^2)
if(orientation_results == "positive"){
mse[fold, model_number] <- - mse[fold, model_number]
}
if(print){
cat(paste("MSE:",mse[fold, model_number],"\n"))
}
}
if(("crps" %in% scores) || ("scrps" %in% scores)){
hyper_sample <- INLA::inla.hyperpar.sample(n_samples, new_model, improve.marginals=TRUE) # INLA::inla.rmarginal(n_samples, new_model$marginals.hyperpar$`Precision parameter for the Gamma observations`)
phi_sample_1 <- as.vector(hyper_sample[,"Precision parameter for the Gamma observations"])
hyper_sample2 <- INLA::inla.hyperpar.sample(n_samples, new_model, improve.marginals=TRUE)
phi_sample_2 <- as.vector(hyper_sample[,"Precision parameter for the Gamma observations"])
if(parallelize_RP){
Y1_sample <- foreach::`%dopar%`(foreach::foreach(i = 1:length(test_data)), {
scale_temp <- posterior_samples[i,1:n_samples] / phi_sample_1
stats::rgamma(n_samples, shape = phi_sample_1, scale = scale_temp)
})
Y2_sample <- foreach::`%dopar%`(foreach::foreach(i = 1:length(test_data)), {
scale_temp <- posterior_samples[i,(n_samples+1):(2*n_samples)] / phi_sample_2
stats::rgamma(n_samples, shape = phi_sample_2, scale = scale_temp)
})
E1_tmp <- foreach::`%dopar%`(foreach::foreach(i = 1:length(test_data)), {
mean(abs(Y1_sample[[i]]-test_data[i]))
})
E2_tmp <- foreach::`%dopar%`(foreach::foreach(i = 1:length(test_data)), {
mean(abs(Y1_sample[[i]]-Y2_sample[[i]]))
})
} else{
Y1_sample <- lapply(1:length(test_data), function(i){
scale_temp <- posterior_samples[i,1:n_samples] / phi_sample_1
stats::rgamma(n_samples, shape = phi_sample_1, scale = scale_temp)
})
Y2_sample <- lapply(1:length(test_data), function(i){
scale_temp <- posterior_samples[i,(n_samples+1):(2*n_samples)] / phi_sample_2
stats::rgamma(n_samples, shape = phi_sample_2, scale = scale_temp)
})
E1_tmp <- lapply(1:length(test_data), function(i){
mean(abs(Y1_sample[[i]]-test_data[i]))
})
E2_tmp <- lapply(1:length(test_data), function(i){
mean(abs(Y1_sample[[i]]-Y2_sample[[i]]))
})
}
if("crps" %in% scores){
crps_temp <- lapply(1:length(test_data), function(i){
return(E1_tmp[[i]] - 0.5*E2_tmp[[i]])
})
crps_temp <- unlist(crps_temp)
crps[fold, model_number] <- mean(crps_temp)
if(orientation_results == "negative"){
crps[fold, model_number] <- crps[fold, model_number]
}
if(print){
cat(paste("CRPS:",crps[fold, model_number],"\n"))
}
}
if("scrps" %in% scores){
scrps_temp <- lapply(1:length(test_data), function(i){
return(-E1_tmp[[i]]/E2_tmp[[i]] - 0.5*log(E2_tmp[[i]]))
})
scrps_temp <- unlist(scrps_temp)
scrps[fold, model_number] <- mean(scrps_temp)
if(orientation_results == "negative"){
scrps[fold, model_number] <- - scrps[fold, model_number]
}
}
}
} else if (models[[model_number]]$.args$family == "poisson"){
link_name <- models[[model_number]]$.args$control.family[[1]]$link
if(link_name == "default"){
linkfuninv <- function(x){exp(x)}
} else {
linkfuninv <- process_link(link_name)
}
formula_tmp <- formula_list[[model_number]]
env_tmp <- environment(formula_tmp)
assign("linkfuninv", linkfuninv, envir = env_tmp)
df_pred <- prepare_df_pred(df_pred, models[[model_number]], test_list[[fold]])
new_model <- bru_rerun_with_data(models[[model_number]], train_list[[fold]], true_CV = true_CV, fit_verbose = fit_verbose)
resp_var <- as.character(models[[model_number]]$bru_info$lhoods[[1]]$formula[2])
if(print){
cat("Generating samples...\n")
}
if(("crps" %in% scores) || ("scrps" %in% scores)){
posterior_samples <- inlabru::generate(new_model, newdata = df_pred, formula = formula_tmp, n.samples = 2 * n_samples)
} else {
posterior_samples <- inlabru::generate(new_model, newdata = df_pred, formula = formula_tmp, n.samples = n_samples)
}
if(print){
cat("Samples generated!\n")
}
test_data <- models[[model_number]]$bru_info$lhoods[[1]]$response_data$BRU_response[test_list[[fold]]]
if(nrow(posterior_samples) == 1){
posterior_samples <- matrix(rep(posterior_samples, length(test_data)),ncol=ncol(posterior_samples), byrow = TRUE)
}
posterior_mean <- rowMeans(posterior_samples)
if("dss" %in% scores){
posterior_variance_of_mean <- rowMeans(posterior_samples^2) - posterior_mean^2
post_var <- posterior_mean + posterior_variance_of_mean
dss[fold, model_number] <- mean((test_data - posterior_mean)^2/post_var + log(post_var))
if(orientation_results == "positive"){
dss[fold, model_number] <- - dss[fold, model_number]
}
if(print){
cat(paste("DSS:", dss[fold, model_number],"\n"))
}
}
if("mse" %in% scores){
mse[fold, model_number] <- mean((test_data - posterior_mean)^2)
if(orientation_results == "positive"){
mse[fold, model_number] <- - mse[fold, model_number]
}
if(print){
cat(paste("MSE:",mse[fold, model_number],"\n"))
}
}
if(("crps" %in% scores) || ("scrps" %in% scores)){
if(parallelize_RP){
Y1_sample <- foreach::`%dopar%`(foreach::foreach(i = 1:length(test_data)), {
stats::rpois(n_samples, posterior_samples[i,1:n_samples])
})
Y2_sample <- foreach::`%dopar%`(foreach::foreach(i = 1:length(test_data)), {
stats::rpois(n_samples, posterior_samples[i,(n_samples+1):(2*n_samples)])
})
E1_tmp <- foreach::`%dopar%`(foreach::foreach(i = 1:length(test_data)), {
mean(abs(Y1_sample[[i]]-test_data[i]))
})
E2_tmp <- foreach::`%dopar%`(foreach::foreach(i = 1:length(test_data)), {
mean(abs(Y1_sample[[i]]-Y2_sample[[i]]))
})
} else{
Y1_sample <- lapply(1:length(test_data), function(i){
stats::rpois(n_samples, posterior_samples[i,1:n_samples])
})
Y2_sample <- lapply(1:length(test_data), function(i){
stats::rpois(n_samples, posterior_samples[i,(n_samples+1):(2*n_samples)])
})
E1_tmp <- lapply(1:length(test_data), function(i){
mean(abs(Y1_sample[[i]]-test_data[i]))
})
E2_tmp <- lapply(1:length(test_data), function(i){
mean(abs(Y1_sample[[i]]-Y2_sample[[i]]))
})
}
if("crps" %in% scores){
crps_temp <- lapply(1:length(test_data), function(i){
return(E1_tmp[[i]] - 0.5*E2_tmp[[i]])
})
crps_temp <- unlist(crps_temp)
crps[fold, model_number] <- mean(crps_temp)
if(orientation_results == "negative"){
crps[fold, model_number] <- crps[fold, model_number]
}
if(print){
cat(paste("CRPS:",crps[fold, model_number],"\n"))
}
}
if("scrps" %in% scores){
scrps_temp <- lapply(1:length(test_data), function(i){
return(-E1_tmp[[i]]/E2_tmp[[i]] - 0.5*log(E2_tmp[[i]]))
})
scrps_temp <- unlist(scrps_temp)
scrps[fold, model_number] <- mean(scrps_temp)
if(orientation_results == "negative"){
scrps[fold, model_number] <- - scrps[fold, model_number]
}
}
}
}
}
}
if("dss" %in% scores){
dss_mean <- colMeans(dss)
result_df <- data.frame(result_df, dss = dss_mean)
}
if("mse" %in% scores){
mse_mean <- colMeans(mse)
result_df <- data.frame(result_df, mse = mse_mean)
}
if("crps" %in% scores){
crps_mean <- colMeans(crps)
result_df <- data.frame(result_df, crps = crps_mean)
}
if("scrps" %in% scores){
scrps_mean <- colMeans(scrps)
result_df <- data.frame(result_df, scrps = scrps_mean)
}
if(save_settings){
settings_list <- list(n_samples = n_samples, cv_type = cv_type, true_CV = true_CV,
orientation_results = orientation_results)
if(cv_type == "k-fold"){
settings_list[["k"]] <- k
} else if(cv_type == "lpo"){
settings_list[["percentage"]] <- percentage
settings_list[["number_folds"]] <- number_folds
}
}
if(include_best){
n_fit_scores <- ncol(result_df)-1
final_row <- c("Best")
for(j in 2:ncol(result_df)){
if(orientation_results == "negative"){
best_tmp <- which.min(result_df[,j])
final_row <- c(final_row, model_names[best_tmp])
} else{
best_tmp <- which.max(result_df[,j])
final_row <- c(final_row, model_names[best_tmp])
}
}
result_df <- rbind(result_df, final_row)
row.names(result_df)[nrow(result_df)] <- ""
}
if(parallelize_RP){
parallel::stopCluster(cluster_tmp)
}
if(!return_scores_folds){
if(save_settings){
out <- list(scores_df = result_df,
settings = settings_list)
if(return_train_test){
out[["train_test"]] <- list(train = train_list, test = test_list)
}
} else if(return_train_test){
out <- list(scores_df = result_df, train_test = list(train = train_list, test = test_list))
} else{
out <- result_df
}
} else{
colnames(dss) <- model_names
colnames(mse) <- model_names
colnames(crps) <- model_names
colnames(scrps) <- model_names
out <- list(scores_df = result_df,
scores_folds = list(dss = dss, mse = mse, crps = crps, scrps = scrps))
if(save_settings){
out[["settings"]] <- settings_list
}
if(return_train_test){
out[["train_test"]] <- list(train = train_list, test = test_list)
}
}
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.