#' Builds predictive model based GLM.
#'
#' The method provides main functionality on building GLM models with automatic variables transformation.
#' The transformations are based on specified single variable responses for selected black-box model.
#' See details in \code{vignette("xspliner")}.
#'
#' model_surrogate_xspliner is a wrapper of xspline method to assure consistency with https://github.com/ModelOriented/DrWhy tools
#'
#' @param object Predictive model, formula or explainer (see DALEX) object.
#' @param model When \code{object} is formula - predictive model. Basic model used for extracting predictors transformation.
#' @param lhs Left-hand side of model formula. Can be transformed response.
#' @param response Name of response variable of \code{model}.
#' @param predictors Predictor values that should be used in final model.
#' @param data Training data of \code{model}.
#' @param form Can be 'additive' (default) or 'multiplicative'. Specifies formula form in final model.
#' @param bare Variable names that mustn't be transformed in final model.
#' @param env Environment in which optional variables passed into parameters are stored.
#' variables transformation. See vignette("xspliner") for details.
#' @param consider One of \code{c("specials", "all")}. If "specials", only components with xs or xf
#' call are considered in transition.
#' @param ... Other arguments passed to \code{xspline} methods or \link{build_xspliner}.
#'
#' @return GLM object of class 'xspliner'.
#'
#' @examples
#' # preparing blackbox model
#' library(randomForest)
#' rf_iris <- randomForest(
#' Petal.Width ~ Sepal.Length + Petal.Length + Species,
#' data = iris)
#'
#' # formula based xspliner
#' xs_iris <- xspline(
#' Petal.Width ~ xs(Sepal.Length) + xs(Petal.Length) + xf(Species),
#' model = rf_iris)
#' summary(xs_iris)
#' plot(xs_iris, "Sepal.Length")
#'
#' # passing just the model
#' xs_iris <- xspline(rf_iris)
#' summary(xs_iris)
#' plot(xs_iris, "Sepal.Length")
#'
#' # using DALEX
#' library(DALEX)
#' xs_iris_explainer <- explain(rf_iris)
#' xs_iris <- xspline(rf_iris)
#' summary(xs_iris)
#' plot(xs_iris, "Sepal.Length")
#'
#' @export
xspline <- function(object, ...) {
UseMethod("xspline", object)
}
#' @rdname xspline
#' @export
model_surrogate_xspliner <- xspline
#' @rdname xspline
#' @export
xspline.default <- function(object, lhs = NULL, response = NULL, predictors = NULL, data = NULL,
form = "additive", bare = NULL, env = parent.frame(), ...) {
data <- get_model_data(object, data, env)
lhs <- get_model_lhs(object, lhs)
predictors <- get_model_predictors(object, data, predictors, get_model_response(object, data, response))
classes <- get_predictors_classes(data[, predictors])
formula <- as.formula(
build_predictor_based_formula(lhs, predictors, classes, bare, form),
env = env)
build_xspliner(formula, object, data, env = env, ...)
}
#' @rdname xspline
#' @export
xspline.formula <- function(object, model, data = NULL, consider = "specials", env = parent.frame(), ...) {
data <- get_model_data(model, data, env)
formula_lhs <- get_formula_lhs(object)
model_lhs <- get_model_lhs(model, NULL)
if (model_lhs != formula_lhs) {
message("Model and formula lhs's must be the same. Using lhs from model.")
object[[2]] <- model_lhs
}
model_predictors <- get_model_predictors(model, data, NULL, get_model_response(model, data, NULL))
if (get_formula_rhs(object) == ".") {
lhs <- get_formula_lhs(object)
xspline.default(model, lhs, data = data, env = env, ...)
} else {
formula_predictors <- get_formula_predictors(object, data, NULL, get_formula_response(object, data, NULL))
if (!(all(formula_predictors %in% model_predictors))) {
stop("Not all variables from formula are included in model.")
}
if (consider == "specials") {
local_build_xspliner <- function(object, model, data, env, ..., bare) {
build_xspliner(object, model, data, env = env, ...)
}
local_build_xspliner(object, model, data, env = env, ...)
} else {
object[[3]] <- add_specials_to_formula(object[[3]], data)
xspline.formula(object, model, data = NULL, consider = "specials", env = env, ...)
}
}
}
#' @rdname xspline
#' @export
xspline.explainer <- function(object, env = parent.frame(), ...) {
xspline.default(object$model, NULL, NULL, NULL, env = env, ...)
}
#' Helper function for building GLM object with transformed variables.
#'
#' @param formula xspliner-specific formula object. Check vignette("xspliner") for more details.
#' @param model Predictive model. Basic model used for extracting predictors transformation.
#' @param data Training data of \code{model}.
#' @param xf_opts Formula parameters used for factor variable transformations inherited from factorMerger package.
#' @param xs_opts Predictive model response method and approximation parameters used for quantitative.
#' @param link Link function that should be used in final model. The passed is used when cannot be extracted from
#' model. By default 'identity'. See \link[stats]{family} for possibilities.
#' @param family Family of response variable that should be used in final model. The passed is used when cannot
#' be extracted from model. By default 'gaussian'. See \link[stats]{family} for possibilities.
#' @param env Environment in which optional variables passed into parameters are stored.
#' @param compare_stat Function of linear model (lm function output). Statistic that measures if linear model is better
#' that transformed one. See \link{stats}.
#' @param control Fitting settings. See \link{glm.control}.
#' @param ... Another parameters passed from chosen method. Not used.
#'
build_xspliner <- function(formula, model, data, xf_opts = xf_opts_default, xs_opts = xs_opts_default, link = "identity",
family = "gaussian", env = parent.frame(), compare_stat = aic, control, ...) {
formula_environment <- new.env(parent = env)
attr(formula, ".Environment") <- formula_environment
formula_metadata <- get_formula_metadata(formula, extract_formula_var_names(formula, data))
type <- get_model_type(model, data, formula_metadata$response)
model_family <- get_model_family(model, family, type)
model_link <- get_model_link(model, link, type)
family <- model_family(link = model_link)
cleared_formula <- transformed_formula_object(formula_metadata, model, data, family, xs_opts, xf_opts, compare_stat)
if (!missing(control)) {
glm_model <- stats::glm(cleared_formula, data = data, family = family, control = control)
} else {
glm_model <- stats::glm(cleared_formula, data = data, family = family)
}
environment(glm_model) <- attr(cleared_formula, ".Environment")
class(glm_model) <- c("xspliner", class(glm_model))
glm_model$call[[2]] <- cleared_formula
attr(glm_model, "type") <- type
glm_model
}
#' Default parameters for transition methods
#'
#' While constructing formula interpreted by xspliner package, some parameters may be specified within xs(..) or xf(..) symbols.
#' Below are default parameters. See details in \code{vignette("xspliner")}
#'
#' @export
xf_opts_default = list(
effect = list(type = "ice"),
transition = list(alter = "always", stat = "GIC", value = 3)
)
#' @rdname xf_opts_default
#' @export
xs_opts_default = list(
effect = list(type = "pdp"),
transition = list(alter = "always", monotonic = "not")
)
#' Predict xspliner method
#'
#' @param object Object of class 'xspliner'.
#' @param newdata Data that should be prediction based on.
#' @param ... Another arguments passed into \link[stats]{predict.glm} method.
#' @export
predict.xspliner <- function(object, newdata, ...) {
predict.glm(object, newdata = newdata, ...)
}
#' Statistics used for better linear model selection
#'
#' Used as \code{compare_stat} parameter in \code{xspline} method.
#' Each function has attribute "higher-better".
#' If "higher-better" is TRUE then model with higher statistic value is treated as better one.
#'
#' @param glm_model Linear model - \code{glm} function output.
#' @name stats
NULL
#' Calculate AIC for glm model.
#'
#' @rdname stats
#' @export
aic <- function(glm_model) {
summary(glm_model)$aic
}
attr(aic, "higher-better") <- FALSE
#' Calculate Hosmer-Lemeshow Goodness of Fit for glm model.
#'
#' @rdname stats
#' @export
hoslem <- function(glm_model) {
if (glm_model$family$family != "binomial") {
stop("Not classification model.")
}
ResourceSelection::hoslem.test(glm_model$model[[1]], fitted(glm_model))$statistic
}
attr(hoslem, "higher-better") <- TRUE
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.