#' Generate model names / IDs
#'
#' @param params Original list of parametes for this model (outcome variable name, predictor names, stratification criteria and model id)
#' @param H2O.model.object H2O model object (if used)
#' @param model_algorithm The name of the modeling algorithm
#' @param name Additional name previously specified in \code{model_contrl} list
#' @export
assign_model_name_id <- function(params, H2O.model.object, model_algorithm, name = NULL) {
if (!missing(H2O.model.object) && inherits(H2O.model.object, "H2OModel")) {
model_ids <- list(H2O.model.object@model_id)
} else {
model_ids <- list(model_algorithm)
}
new_names <- model_algorithm
if (!is.null(name)) new_names <- new_names %+% "." %+% name
if (!is.null(params[["Model_idx"]])) new_names <- "m." %+% params[["Model_idx"]] %+% "." %+% new_names
names(model_ids) <- new_names
return(model_ids)
}
#' Create a model fit list
#'
#' @param model.fit Model fit object
#' @param model_alg Name of the model algorithm
#' @param fitfunname The name of the main function used for fitting (e.g., "glm")
#' @param params List of main model parameters
#' @param coef Fitted glm coefficients, if used
#' @param nobs Number of observations used for fitting the model
#' @param model_contrl Full list of model control parameters used
#' @param fitclass The name of the R6 object class used for fitting
#' @param ... Additional objects that will be included in the final fit list
#' @export
create_fit_object <- function(model.fit, model_alg, fitfunname, params, coef, nobs, model_contrl, fitclass = "H2Omodel", ...) {
modelfits_all <- vector(mode = "list", length = 1)
modelfits_all[[1]] <- model.fit
model_ids <- assign_model_name_id(params, model.fit, model_alg, model_contrl$name)
names(modelfits_all) <- names(model_ids)
extra_params <- list(...)
fit <- list(
params = params,
coef = coef,
fitfunname = fitfunname,
model_algorithms = list(model_alg),
nobs = nobs,
model_ids = model_ids,
modelfits_all = modelfits_all)
if (length(extra_params) > 0) fit <- c(fit, extra_params)
class(fit) <- c(class(fit)[1], fitclass)
return(fit)
}
#' Create a list with main model parameters
#'
#' @param reg RegressionClass Object
#' @export
create_fit_params <- function(reg) {
return(list(outvar = reg$outvar, predvars = reg$predvars, stratify = reg$subset_exprs[[1]], Model_idx = reg$Model_idx))
}
#----------------------------------------------------------------------------------
# Class that defines the same type of models for regression problem E[Y|X]
#----------------------------------------------------------------------------------
#' S3 methods for printing model fit summary for PredictionModel R6 class object
#'
#' Prints the modeling summaries
#' @param x The model fit object produced by functions \code{get_fit}.
#' @param model_stats Also print some model summaries?
#' @param all_fits Print all of the modeling fits contained in this object? Warning: this may produce a lot of output!
#' @param ... Additional options passed on to \code{print.PredictionModel}.
#' @export
print.PredictionModel <- function(x, model_stats = FALSE, all_fits = FALSE, ...) {
x$show(model_stats = model_stats, all_fits = all_fits)
return(invisible(NULL))
}
## ---------------------------------------------------------------------
## R6 class for fitting and making predictions for a single or a grid of regression models E(outvar | predvars)
## The class has methods that perform queries to data storage R6 class DataStorageClass
## to get appropriate data columns & row subsets
##
## @docType class
## @format An \code{\link{R6Class}} generator object
## @keywords R6 class
## @details
## \itemize{
## \item{ModelFitObject} - Pointer to an instance of \code{ModelFitObject} class that contains the data.
## }
## @section Methods:
## \describe{
## \item{\code{new(reg)}}{Uses \code{reg} R6 \code{RegressionClass} object to instantiate a new model .}
## \item{\code{show()}}{Print information on outcome and predictor names used in this regression model}
## \item{\code{fit()}}{...}
## \item{\code{copy.fit()}}{...}
## \item{\code{predict()}}{...}
## \item{\code{copy.predict()}}{...}
## \item{\code{predictAeqa()}}{...}
## }
#' @importFrom assertthat assert_that is.flag
PredictionModel <- R6Class(classname = "PredictionModel",
cloneable = TRUE,
portable = TRUE,
class = TRUE,
public = list(
# classify = FALSE,
reg = NULL,
Model_idx = integer(),
outvar = character(), # outcome name(s)
predvars = character(), # names of predictor vars
runCV = logical(),
is.fitted = FALSE,
nodes = NULL,
OData_train = NULL, # object of class DataStorageClass used for training
OData_valid = NULL, # object of class DataStorageClass used for scoring models (contains validation data)
ModelFitObject = NULL, # object of class ModelFitObject that is used in fitting / prediction
# best_refit_only = FALSE,
BestModelFitObject = NULL,
fit.package = character(),
fit.algorithm = character(),
grid.algorithm = character(),
method = NA, # model selection method used
n_obs_fit = NA_integer_, # total number of observations used for fitting the model
model_contrl = list(),
useH2Oframe = FALSE,
# subset_vars = NULL, # THE VAR NAMES WHICH WILL BE TESTED FOR MISSINGNESS AND WILL DEFINE SUBSETTING
subset_exprs = NULL, # THE LOGICAL EXPRESSION (ONE) TO self$subset WHICH WILL BE EVALUTED IN THE ENVIRONMENT OF THE data
# subset_idx = NULL, # Logical vector of length n (TRUE = include the obs)
# subset_train = NULL,
ReplMisVal0 = logical(),
initialize = function(reg, useH2Oframe = FALSE, ...) {
self$method <- gvars$method # record the model selection method used ("none", "cv", "holdout")
self$model_contrl <- reg$model_contrl
self$useH2Oframe <- useH2Oframe
self$reg <- reg
self$runCV <- reg$runCV
self$Model_idx <- reg$Model_idx
self$fit.package <- reg$model_contrl$fit.package[1L]
self$fit.algorithm <- reg$model_contrl$fit.algorithm[1L]
self$grid.algorithm <- reg$model_contrl$grid.algorithm[1L]
assert_that(is.string(reg$outvar))
assert_that(is.character(reg$predvars))
if ("x" %in% names(self$model_contrl)) {
new.x <- self$model_contrl[["x"]]
if (gvars$verbose) message("over-riding default predictors with new ones: " %+% paste0(new.x, collapse=","))
assert_that(is.character(new.x))
reg$predvars <- new.x
self$model_contrl[["x"]] <- NULL
reg$model_contrl[["x"]] <- NULL
}
self$outvar <- reg$outvar
self$predvars <- reg$predvars
# self$subset_vars <- reg$subset_vars[[1]]
self$subset_exprs <- reg$subset_exprs[[1]]
assert_that(length(self$subset_exprs) <= 1)
self$ReplMisVal0 <- reg$ReplMisVal0
# if (is.null(reg$subset_vars)) {self$subset_vars <- TRUE}
# assert_that(is.logical(self$subset_vars) || is.character(self$subset_vars)) # is.call(self$subset_vars) ||
self$ModelFitObject <- self$define_model_fit_object(self$fit.package, self$fit.algorithm, reg, self$useH2Oframe, ...)
# if (gvars$verbose) { print("New instance of " %+% class(self)[1] %+% " :"); self$show() }
invisible(self)
},
define_model_fit_object = function(fit.package, fit.algorithm, reg, useH2Oframe, ...) {
# ***************************************************************************
# Add any additional options passed on to modeling functions as extra args
# Calling the constructor for the fitting model class, dispatching on class name stored in fit.package
# ***************************************************************************
class(fit.package) <- fit.package
ModelFitObject <- newFitModel(fit.package, fit.algorithm, reg, useH2Oframe, ...)
return(ModelFitObject)
},
fit = function(overwrite = FALSE, data, validation_data = NULL, subset_exprs = NULL, ...) { # Move overwrite to a field? ... self$overwrite
if (!overwrite) assert_that(!self$is.fitted) # do not allow overwrite of prev. fitted model unless explicitely asked
## save a pointer to training data class used for fitting
self$OData_train <- data
self$nodes <- self$OData_train$nodes
## save a pointer to validation data class used for scoring
## **** NOTE THAT AUTOMATIC SUBSETTING WILL NOT WORK FOR VALIDATION DATA ->
## **** VALIDATION DATA NEEDS TO BE ALREADY SPLIT APPROPRIATELY IF DOING SUBSETS
if (!is.null(validation_data)) self$OData_valid <- validation_data
if (is.null(subset_exprs)) subset_exprs <- self$subset_exprs
subset_idx <- data$evalsubst(subset_exprs = subset_exprs)
self$n_obs_fit <- length(subset_idx)
if (gvars$verbose) { print("gridisl is fitting the model: "); self$show() }
model.fit <- self$ModelFitObject$fit(data, subset_idx = subset_idx, validation_data = validation_data, ...)
# **********************************************************************
# This should not be a fatal error, especially if we are doing a stack / ensemble and only some models have failed
# Should be able to continue, after removing the failed models
# **********************************************************************
if (inherits(model.fit, "try-error")) {
stop("...error while fitting estimator: " %+% self$reg$estimator)
# self$ModelFitObject <- glmModelClass$new(fit.algorithm = "GLM", fit.package = "speedglm", reg = reg, ...)
# model.fit <- self$ModelFitObject$fit(data, subset_idx = subset_idx, ...)
}
self$is.fitted <- TRUE
# **********************************************************************
# to save RAM space when doing many stacked regressions wipe out all internal data:
# **********************************************************************
self$wipe.alldat
return(invisible(self))
},
refit_best_model = function(data, subset_exprs = NULL, ...) {
expand.dots <- list(...)
if (is.null(subset_exprs)) subset_exprs <- self$subset_exprs
if ("subset_idx" %in% names(expand.dots)) {
subset_idx <- data$evalsubst(subset_exprs = expand.dots[["subset_idx"]])
} else {
subset_idx <- data$evalsubst(subset_exprs = subset_exprs)
}
top_model_params <- self$get_best_model_params()
top_model_name <- self$get_best_model_names(1)
best_reg <- RegressionClass$new(outvar = self$outvar, predvars = self$predvars, model_contrl = top_model_params)
self$BestModelFitObject <- self$define_model_fit_object(top_model_params$fit.package, top_model_params$fit.algorithm, best_reg, useH2Oframe = self$useH2Oframe)
self$n_obs_fit <- length(subset_idx)
if (gvars$verbose) { print("refitting the best model: "); self$show() }
model.fit <- try(self$BestModelFitObject$fit(data, subset_idx = subset_idx, destination_frame = "alldata_H2Oframe"))
if (inherits(model.fit, "try-error")) stop("...error while refitting the best model for estimator: " %+% self$reg$estimator)
# **********************************************************************
# to save RAM space when doing many stacked regressions wipe out all internal data:
# **********************************************************************
self$wipe.alldat
return(invisible(model.fit))
},
# Predict the response E[Y|newdata];
predict = function(newdata, subset_exprs = NULL, predict_model_names = NULL, best_refit_only = FALSE, convertResToDT = TRUE, ...) {
if (!self$is.fitted) stop("Please fit the model prior to attempting to make predictions.")
if (is.null(subset_exprs) && !missing(newdata)) subset_exprs <- self$subset_exprs
if (!missing(newdata)) subset_idx <- newdata$evalsubst(subset_exprs = subset_exprs)
## When missing newdata the predictions are for the training frame.
## No subset re-evaluation is needed (training frame was already subsetted by self$subset_exprs)
if (best_refit_only && !is.null(self$BestModelFitObject)) {
probA1 <- self$BestModelFitObject$predictP1(newdata, subset_idx = subset_idx)
} else {
probA1 <- self$ModelFitObject$predictP1(newdata, subset_idx = subset_idx, predict_model_names = predict_model_names)
}
if (convertResToDT) probA1 <- as.data.table(probA1)
return(probA1)
},
# Predict the response E[Y|newdata] for out of sample observations (validation set / holdouts);
predict_out_of_sample = function(newdata, subset_exprs = NULL, predict_model_names, convertResToDT = TRUE, ...) {
if (!self$is.fitted) stop("Please fit the model prior to attempting to make predictions.")
if (is.null(subset_exprs)) subset_exprs <- self$subset_exprs
if (missing(newdata) && self$runCV) {
## For CV with missing data use the default h2o/xgboost out-of-sample (holdout) predictions
probA1 <- self$ModelFitObject$predictP1_out_of_sample_cv(predict_model_names = predict_model_names)
} else if (missing(newdata) && !self$runCV) {
## For holdout validation use the validation data (if it is available)
newdata <- self$OData_valid
if (!is.null(newdata)) stop("Must supply the validation data for making holdout predictions")
subset_idx <- newdata$evalsubst(subset_exprs = subset_exprs) # self$define.subset.idx(newdata)
probA1 <- self$predict(newdata, subset_exprs, predict_model_names, ...)
} else {
subset_idx <- newdata$evalsubst(subset_exprs = subset_exprs) # self$define.subset.idx(newdata)
if (self$runCV) {
probA1 <- self$ModelFitObject$predictP1_out_of_sample_cv(validation_data = newdata, subset_idx = subset_idx, predict_model_names = predict_model_names)
} else {
probA1 <- self$ModelFitObject$predictP1(data = newdata, subset_idx = subset_idx, predict_model_names = predict_model_names)
}
}
if (convertResToDT) probA1 <- as.data.table(probA1)
return(probA1)
},
# Score models (so far only MSE) based on either out of sample CV model preds or validation data preds;
score_models = function(validation_data, subset_exprs = NULL, OData_train = NULL, OData_valid = NULL, ...) {
if (!self$is.fitted) stop("Please fit the model prior to making predictions.")
if (is.null(subset_exprs)) subset_exprs <- self$subset_exprs
## New validation dataset has been supplied. Use that to obtain out-of-sample preds and test values
if (!missing(validation_data)) {
out_of_sample_preds_DT <- self$predict_out_of_sample(validation_data, subset_exprs, ...)
subset_idx <- validation_data$evalsubst(subset_exprs = subset_exprs)
test_values <- validation_data$get.outvar(subset_idx, var = self$outvar)
IDs <- validation_data$get.outvar(subset_idx, var = validation_data$nodes$IDnode)
## Validation dataset has been previously provided during model training. Use that.
} else if (!is.null(OData_valid)) {
out_of_sample_preds_DT <- self$predict_out_of_sample(newdata = OData_valid, subset_exprs = subset_exprs, ...)
subset_idx <- OData_valid$evalsubst(subset_exprs = subset_exprs)
test_values <- OData_valid$get.outvar(subset_idx, var = self$outvar)
IDs <- OData_valid$get.outvar(subset_idx, var = OData_valid$nodes$IDnode)
## No trace of validation dataset -- can score models only if doing CV. Use old training data for scoring.
} else if (self$runCV) {
out_of_sample_preds_DT <- self$predict_out_of_sample(subset_exprs = subset_exprs, ...)
subset_idx <- OData_train$evalsubst(subset_exprs = subset_exprs)
test_values <- OData_train$get.outvar(subset_idx, var = self$outvar)
IDs <- OData_train$get.outvar(subset_idx, var = OData_train$nodes$IDnode)
} else {
stop("Model re-scoring not possible. Must either provide validation_data or use method = 'cv'")
}
private$MSE <- self$evalMSE_byID(out_of_sample_preds_DT, test_values, IDs)
private$MSE_bysubj <- self$evalMSE_byID(out_of_sample_preds_DT, test_values, IDs, bysubj = TRUE)
## save out of sample CV predictions for the best model
private$out_of_sample_preds <- out_of_sample_preds_DT[, self$get_best_model_names(K = 1), with = FALSE]
return(invisible(self))
},
# First evaluate the empirical loss by subject. Then average that loss across subjects
evalMSE_byID = function(predsDT, test_values, IDs, bysubj = FALSE) {
loss_fun_MSE <- function(yhat, y0) (yhat - y0)^2
if (!self$is.fitted) stop("Please fit the model prior to evaluating MSE.")
if (!is.vector(test_values)) stop("test_values must be a vector of outcomes.")
## 1. Evaluate the empirical loss at each person-time prediction (apply loss function to each row):
sqresid_preds <- as.data.table(predsDT)[, lapply(.SD, loss_fun_MSE, test_values)][, ("subjID") := IDs]
NA_predictions <- sqresid_preds[, lapply(.SD, function(x) any(is.na(x)))]
nNA_predictions <- sqresid_preds[, lapply(.SD, function(x) sum(is.na(x)))]
# system.time(sqresid_preds2 <- as.data.table(private$probA1[, ] - test_values)[, ("subjID") := IDs])
setkeyv(sqresid_preds, cols = "subjID")
if (bysubj) {
## 2. Evaluate the average loss for each person (average loss by rows within each subject)
## 3A. Evaluate the mean, var, sd loss averaging at the subject level first, then averaging across subjects
# browser()
MSE_mean <- sqresid_preds[, lapply(.SD, mean, na.rm = TRUE), by = subjID]
setnames(MSE_mean, "subjID", self$nodes$IDnode)
# mean_bysubj[, subjID := NULL]
# n <- nrow(mean_bysubj)
# MSE_mean <- as.list(mean_bysubj[, lapply(.SD, mean, na.rm = TRUE)])
RMSE_mean <- NA
MSE_var <- NA
MSE_sd <- NA
} else {
# 3B. Evaluate the mean, var, SD loss averaging across all rows of the data
sqresid_preds[, subjID := NULL]
n <- nrow(sqresid_preds)
MSE_mean <- as.list(sqresid_preds[, lapply(.SD, mean, na.rm = TRUE)])
RMSE_mean <- lapply(MSE_mean, sqrt)
MSE_var <- as.list(sqresid_preds[, lapply(.SD, var, na.rm = TRUE)])
MSE_sd <- as.list(sqresid_preds[, lapply(.SD, sd, na.rm = TRUE)] * (1 / sqrt(n)))
}
if (any(as.logical(NA_predictions)))
warning("Some of the test set predictions of the following model fits were missing (NA) and hence were excluded from MSE evaluation.
Note that this may lead to misleading & erroneous assessment of the model performance.
These are the models with missing predictions: " %+%
paste0(names(NA_predictions)[as.logical(NA_predictions)], collapse = ",") %+% ".
This is the number of missing (NA) test set predictions per model: " %+% paste0(as.integer(nNA_predictions)[as.logical(NA_predictions)], collapse = ",")
)
return(list(MSE_mean = MSE_mean, RMSE_mean = RMSE_mean, MSE_var = MSE_var, MSE_sd = MSE_sd))
},
reassignMSEs = function(sqresid_preds) {
# cat("re-assigning MSEs to new values in \n"); print(sqresid_preds[])
model_names <- names(self$getmodel_ids)
n <- nrow(sqresid_preds)
MSE_mean <- as.list(sqresid_preds[, lapply(.SD, mean, na.rm = TRUE)])
RMSE_mean <- lapply(MSE_mean, sqrt)
MSE_var <- as.list(sqresid_preds[, lapply(.SD, var, na.rm = TRUE)])
MSE_sd <- as.list(sqresid_preds[, lapply(.SD, sd, na.rm = TRUE)] * (1 / sqrt(n)))
private$MSE <- list(MSE_mean = MSE_mean[model_names],
RMSE_mean = RMSE_mean[model_names],
MSE_var = MSE_var[model_names],
MSE_sd = MSE_sd[model_names])
return(invisible(NULL))
},
## ------------------------------------------------------------------------------
## return a model object by name / ID
## ------------------------------------------------------------------------------
getmodel_byname = function(model_names, model_IDs) {
return(self$ModelFitObject$getmodel_byname(model_names, model_IDs))
},
## ------------------------------------------------------------------------------
## return top K models based on smallest validation / test MSE
## ------------------------------------------------------------------------------
get_best_MSEs = function(K = 1) {
if (!self$is.fitted) stop("Please fit the model prior to calling get_best_models()")
if (!is.integerish(K)) stop("K argument must be an integer <= the total number of models in this ensemble")
if (K > length(self$getmodel_ids)) {
message("K value exceeds the total number of models; K is being truncated to " %+% length(self$getmodel_ids))
K <- length(self$getmodel_ids)
}
if (is.null(self$getMSE)) stop("The validation / holdout MSE has not been evaluated, making model model ranking impossible.
Please call evalMSE_byID() and provide a vector of validation / test values.")
## ***********************************
## This throws everything off, since the model may not be uniquely idenfified.
## Need to use MSE as locate the model best K models adresses
## ***********************************
return(sort(unlist(self$getMSE))[1:K])
},
## ------------------------------------------------------------------------------
## return top K model object names
## ------------------------------------------------------------------------------
get_best_model_names = function(K = 1) {
if (length(self$getmodel_ids) == 1) {
return(names(self$getmodel_ids))
} else {
return(names(self$get_best_MSEs(K)))
}
},
## ------------------------------------------------------------------------------
## return top K model objects ranked by prediction MSE on a holdout (CV) fold
## ------------------------------------------------------------------------------
get_best_models = function(K = 1) {
top_model_names <- self$get_best_model_names(K)
# if (gvars$verbose) message("fetching top " %+% K %+% " models ranked by the smallest holdout / validation MSE")
return(self$getmodel_byname(top_model_names))
},
# ------------------------------------------------------------------------------
# return the parameters of the top K models as a list (ranked by prediction MSE on a holdout (CV) fold)
# ------------------------------------------------------------------------------
get_best_model_params = function(K = 1) {
top_model_names <- self$get_best_model_names(K)
return(self$ModelFitObject$get_best_model_params(top_model_names))
},
# ------------------------------------------------------------------------------
# return a data.table with best mean MSEs, including SDs & corresponding model names
# ------------------------------------------------------------------------------
get_best_MSE_table = function(K = 1) {
top_MSE_CV <- self$get_best_MSEs(K)
top_model_names <- names(top_MSE_CV)
top_model_pos <- unlist(lapply(top_model_names, function(model_n) which(names(self$getmodel_ids) %in% model_n)))
top_model_ids <- unlist(self$getmodel_ids[top_model_names])
if (is.null(top_model_ids)) top_model_ids <- rep.int(NA, length(top_MSE_CV))
## switch to data.table::data.table:
datMSE <- data.table::data.table(
model = names(self$getmodel_ids[top_model_pos]),
algorithm = unlist(self$getmodel_algorithms[top_model_pos]),
MSE = unlist(self$getMSE[top_model_pos]),
RMSE = unlist(self$getRMSE[top_model_pos]),
MSE.sd = unlist(self$getMSEsd[top_model_pos]),
model.id = top_model_ids,
model.pos = top_model_pos
)
datMSE[["CIlow"]] <- datMSE[["MSE"]] - 1.96 * datMSE[["MSE.sd"]]
datMSE[["CIhi"]] <- datMSE[["MSE"]] + 1.96 * datMSE[["MSE.sd"]]
datMSE[["model"]] <- factor(datMSE[["model"]], levels = datMSE[["model"]][order(datMSE[["MSE"]])])
# rownames(datMSE) <- NULL
return(datMSE)
},
# ------------------------------------------------------------------------------
# return a data.table of grid model fits with parameters, sorted by internal test MSE
# ------------------------------------------------------------------------------
get_modelfits_grid = function() {
return(self$ModelFitObject$get_modelfits_grid())
},
# Output info on the general type of regression being fitted:
show = function(print_format = TRUE, model_stats = FALSE, all_fits = FALSE) {
if (print_format) {
cat("model: E[" %+% self$outvar %+% "|" %+% paste(self$predvars, collapse=", ") %+% "]\n")
if (!is.null(self$subset_exprs) && (self$subset_exprs != "") ) cat("strata: " %+% self$subset_exprs %+% "\n")
cat("estimator: " %+% self$reg$estimator %+% "\n")
cat("method: " %+% self$method %+%"\n")
# cat("fit.package: " %+% self$fit.package %+% "\n")
# cat("fit.algorithm: " %+% self$fit.algorithm %+%"\n")
# cat("grid.algorithm: " %+% self$grid.algorithm %+%"\n")
if (!is.na(self$n_obs_fit)) cat("N: " %+% self$n_obs_fit %+%"\n")
if (self$is.fitted && model_stats) self$ModelFitObject$show(all_fits = all_fits)
return(invisible(NULL))
} else {
return(list(outvar = self$outvar, predvars = self$predvars, stratify = self$subset_exprs, fit.package = self$fit.package, fit.algorithm = self$fit.algorithm))
}
},
summary = function(all_fits = FALSE) {
return(self$ModelFitObject$summary(all_fits = all_fits))
}
),
active = list(
wipe.alldat = function() {
private$probA1 <- NULL
self$ModelFitObject$emptydata
self$ModelFitObject$emptyY
if (!is.null(self$BestModelFitObject)) {
self$BestModelFitObject$emptydata
self$BestModelFitObject$emptyY
}
return(self)
},
wipe.allmodels = function() {
self$ModelFitObject$wipe.allmodels
},
wipe.allOData = function() {
self$OData_train <- NULL # object of class DataStorageClass used for training
self$OData_valid <- NULL # object of class DataStorageClass used for scoring models (contains validation data)
},
emptymodelfit = function() { self$ModelFitObject$emptymodelfit },
getprobA1 = function() { private$probA1 },
get_out_of_sample_preds = function() { private$out_of_sample_preds },
# getsubset = function() { self$subset_idx },
getoutvarnm = function() { self$outvar },
getoutvarval = function() { self$ModelFitObject$getY },
getMSEtab = function() {
MSE_list <- self$getMSE
RMSE_list <- self$getRMSE
MSE.sd_list <- self$getMSEsd
if (length(MSE_list) == 0L) {
warning("It looks like the CV/holdout MSEs have not been evaluated for the model calling order: " %+% self$Model_idx %+%
". Cannot make prediction or select the best model unless some model selection criteria is specified during fit() call.
Please make sure the argument 'method' is set to either 'cv' or 'holdout'.")
MSE_list <- MSE.sd_list <- RMSE_list <- rep.int(list(NA), length(self$getmodel_ids))
names(MSE_list) <- names(MSE.sd_list) <- names(RMSE_list) <- names(self$getmodel_ids)
}
data.table::data.table(
MSE = unlist(MSE_list),
MSE.sd = unlist(MSE.sd_list),
RMSE = unlist(RMSE_list),
model = names(MSE_list),
Model_idx = self$Model_idx,
order = seq_along(MSE_list)
)
},
getMSE_bysubj = function() {
sel_cols <- c(self$nodes$IDnode, self$get_best_model_names())
MSEdat <- private$MSE_bysubj[["MSE_mean"]][, sel_cols, with = FALSE]
setnames(MSEdat, self$get_best_model_names(), "MSE")
MSEdat
},
getMSE = function() { private$MSE[["MSE_mean"]] },
getRMSE = function() { private$MSE[["RMSE_mean"]] },
getMSEvar = function() { private$MSE[["MSE_var"]] },
getMSEsd = function() { private$MSE[["MSE_sd"]] },
getfit = function() { self$ModelFitObject$model.fit },
getRetrainedfit = function() { self$BestModelFitObject$model.fit },
getmodel_ids = function() { self$ModelFitObject$getmodel_ids },
getmodel_algorithms = function() { self$ModelFitObject$getmodel_algorithms }
),
private = list(
# model.fit = list(), # the model fit (either coefficients or the model fit object)
MSE = list(),
MSE_bysubj = list(),
probA1 = NULL, # Predicted probA^s=1 conditional on Xmat
out_of_sample_preds = NULL,
probAeqa = NULL # Likelihood of observing a particular value A^s=a^s conditional on Xmat
)
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.