Nothing
#' Catalytic Generalized Linear Models (GLMs) Fitting Function by Tuning tau from a Sequence of tau Values
#'
#' This function tunes a catalytic catalytic Generalized Linear Models (GLMs) by performing specified risk estimate method
#' to estimate the optimal value of the tuning parameter `tau`. The resulting `cat_glm_tune` object
#' encapsulates the fitted model, including estimated coefficients and family information, facilitating further analysis.
#'
#' @param formula A formula specifying the GLMs. Should at least include response variables (e.g. \code{~ .}).
#' @param cat_init A list generated from `cat_glm_initialization`.
#' @param risk_estimate_method Method for risk estimation, chosen from "parametric_bootstrap",
#' "cross_validation", "mallows_estimate", "steinian_estimate". Depends on the size of the data if not provided.
#' @param discrepancy_method Method for discrepancy calculation, chosen from "mean_square_error",
#' "mean_classification_error", "logistic_deviance". Depends on the family if not provided.
#' @param tau_seq Vector of numeric values for down-weighting synthetic data.
#' Defaults to a sequence around one fourth of the number of predictors for gaussian and
#' the number of predictors for binomial.
#' @param tau_0 Initial `tau` value used for discrepancy calculation in risk estimation.
#' Defaults to one fourth of the number of predictors for binomial and 1 for gaussian.
#' @param parametric_bootstrap_iteration_times Number of bootstrap iterations for "parametric_bootstrap" risk estimation. Defaults to 100.
#' @param cross_validation_fold_num Number of folds for "cross_validation" risk estimation.. Defaults to 5.
#'
#' @return A list containing the values of all the arguments and the following components:
#' \item{tau}{Optimal `tau` value determined through tuning.}
#' \item{model}{Fitted GLM model object with the optimal `tau` value.}
#' \item{coefficients}{Estimated coefficients from the `model` fitted by the optimal `tau` value.}
#' \item{risk_estimate_list}{Collected risk estimates for each `tau`.}
#'
#' @examples
#' gaussian_data <- data.frame(
#' X1 = stats::rnorm(10),
#' X2 = stats::rnorm(10),
#' Y = stats::rnorm(10)
#' )
#'
#' cat_init <- cat_glm_initialization(
#' formula = Y ~ 1, # formula for simple model
#' data = gaussian_data,
#' syn_size = 100, # Synthetic data size
#' custom_variance = NULL, # User customized variance value
#' gaussian_known_variance = TRUE, # Indicating whether the data variance is known
#' x_degree = c(1, 1), # Degrees for polynomial expansion of predictors
#' resample_only = FALSE, # Whether to perform resampling only
#' na_replace = stats::na.omit # How to handle NA values in data
#' )
#'
#' cat_model <- cat_glm_tune(
#' formula = ~.,
#' cat_init = cat_init, # Only accept object generated from `cat_glm_initialization`
#' risk_estimate_method = "parametric_bootstrap",
#' discrepancy_method = "mean_square_error",
#' tau_seq = c(1, 2), # Weight for synthetic data
#' tau_0 = 2,
#' parametric_bootstrap_iteration_times = 20, # Number of bootstrap iterations
#' cross_validation_fold_num = 5 # Number of folds
#' )
#' cat_model
#' @export
cat_glm_tune <- function(
formula,
cat_init,
risk_estimate_method = c(
"parametric_bootstrap",
"cross_validation",
"mallowian_estimate",
"steinian_estimate"
),
discrepancy_method = c(
"mean_square_error",
"mean_classification_error",
"logistic_deviance"
),
tau_seq = NULL,
tau_0 = NULL,
parametric_bootstrap_iteration_times = 100,
cross_validation_fold_num = 5) {
if (missing(risk_estimate_method)) {
risk_estimate_method <- ifelse(
cat_init$obs_size > 200,
"cross_validation",
"parametric_bootstrap"
)
}
if (missing(discrepancy_method)) {
discrepancy_method <- ifelse(
cat_init$family == "gaussian",
"mean_square_error",
"logistic_deviance"
)
}
risk_estimate_method <- match.arg(risk_estimate_method)
discrepancy_method <- match.arg(discrepancy_method)
# Validate Input Parameters
validate_glm_input(
formula = formula,
cat_init = cat_init,
tau_seq = tau_seq,
tau_0 = tau_0,
parametric_bootstrap_iteration_times = parametric_bootstrap_iteration_times,
cross_validation_fold_num = cross_validation_fold_num,
risk_estimate_method = risk_estimate_method,
discrepancy_method = discrepancy_method
)
# Update cat_init with adjusted data based on the formula's right-hand side
f_rhs <- get_formula_rhs(formula, with_tilde = TRUE)
full_formula <- stats::as.formula(paste0(cat_init$y_col_name, f_rhs))
cat_init <- get_adjusted_cat_init(
cat_init = cat_init,
formula_rhs = f_rhs
)
if (is.null(tau_seq)) {
tau_seq <- if (cat_init$family == "gaussian") {
seq(max(0.01, ncol(cat_init$adj_x) / 4 - 2), ncol(cat_init$adj_x) / 4 + 2, 0.5)
} else {
seq(max(0.01, ncol(cat_init$adj_x) - 2), ncol(cat_init$adj_x) + 2, 0.5)
}
}
tau_seq <- sort(unique(tau_seq))
if ((0 %in% tau_seq) & (dim(cat_init$adj_obs_x)[1] < dim(cat_init$adj_obs_x)[2])) {
warning(
paste0(
"Found 0 in input `tau_seq`, and the number of columns in the observation data exceeds its data size. \n",
"To avoid issues, 0 in `tau_seq` will be replaced with a small positive value."
),
call. = FALSE
)
tau_seq[1] <- tau_seq[2] / 2
}
if (is.null(tau_0)) {
tau_0 <- ifelse(
cat_init$family == "gaussian",
1,
ncol(cat_init$adj_x) / 4
)
}
# Estimate Risk Using the Specified Method and Processed Parameters
risk_estimate_list <- do.call(
risk_estimate_method,
list(
formula = full_formula,
cat_init = cat_init,
discrepancy_method = discrepancy_method,
tau_seq = tau_seq,
tau_0 = tau_0,
parametric_bootstrap_iteration_times = parametric_bootstrap_iteration_times,
cross_validation_fold_num = cross_validation_fold_num
)
)
# Fit the Optimal Model Using the Optimal tau
optimal_tau <- tau_seq[which.min(risk_estimate_list)]
## Suppress warning for `binomial` family when weights contains value < 1
optimal_model <- suppressWarnings(
do.call(
stats::glm,
list(
formula = full_formula,
family = cat_init$family,
data = cat_init$data,
weights = c(
rep(1, cat_init$obs_size),
rep(optimal_tau / cat_init$syn_size, cat_init$syn_size)
)
)
)
)
# Finalize Setup and Output
cat_model <- list(
function_name = "cat_glm_tune",
## Input/Processed parameters
formula = full_formula,
cat_init = cat_init,
risk_estimate_method = risk_estimate_method,
discrepancy_method = if (risk_estimate_method == "mallowian_estimate") {
"mean_square_error"
} else if (risk_estimate_method == "steinian_estimate") {
"logistic_deviance"
} else {
discrepancy_method
},
tau_seq = tau_seq,
tau_0 = tau_0,
parametric_bootstrap_iteration_times = parametric_bootstrap_iteration_times,
cross_validation_fold_num = cross_validation_fold_num,
## Result
tau = optimal_tau,
model = optimal_model,
coefficients = stats::coef(optimal_model),
risk_estimate_list = risk_estimate_list
)
class(cat_model) <- c(cat_model$class, "cat_tune", "cat_glm")
return(cat_model)
}
#' Perform Parametric Bootstrap for Model Risk Estimation
#'
#' This function performs parametric bootstrapping to estimate model risk. It fits a sequence
#' of Generalized Linear Models (GLMs) with different values of `tau`, calculates the in-sample
#' prediction error, and incorporates deviations from the bootstrap response samples. The final
#' risk estimate is obtained by combining the in-sample error and the covariance penalty derived
#' from the bootstrap samples.
#'
#' @param formula A formula specifying the GLMs. Should at least include response variables.
#' @param cat_init A list generated from `cat_glm_initialization`.
#' @param tau_seq A sequence of tuning parameter values (`tau`) over which
#' the model risk will be estimated. Each `tau` value is used to weight the synthetic data during model fitting.
#' @param tau_0 A reference value for `tau` used in the preliminary estimate model and variance calculation.
#' @param discrepancy_method The method used to calculate the discrepancy (e.g., logistic deviance).
#' @param parametric_bootstrap_iteration_times The number of bootstrap iterations to perform.
#' @param ... Other arguments passed to other internal functions.
#'
#' @details
#' 1. **Preliminary Estimate Model**: The function first fits a GLM model using the observed
#' and synthetic data with an initial value of `tau_0` for the synthetic data weights.
#'
#' 2. **Bootstrap Samples**: The function generates bootstrap response samples based on the
#' mean and standard deviation of the preliminary estimate model, using parametric bootstrapping.
#'
#' 3. **In-sample Prediction Error**: For each value of `tau` in `tau_seq`, the function computes
#' the in-sample prediction error (e.g., using logistic deviance).
#'
#' 4. **Bootstrap Models**: For each bootstrap iteration, the function fits a GLM using the
#' bootstrap response samples and calculates the corresponding lambda values.
#'
#' 5. **Covariance Penalty**: The function approximates the covariance penalty using the weighted
#' deviations across all bootstrap iterations.
#'
#' 6. **Final Risk Estimate**: The final model risk estimate is calculated by summing the in-sample
#' prediction error and the average weighted deviations from the bootstrap response samples.
#'
#' @return
#' A numeric vector containing the risk estimates for each `tau` in `tau_seq`.
parametric_bootstrap <- function(
formula,
cat_init,
tau_seq,
tau_0,
discrepancy_method,
parametric_bootstrap_iteration_times,
...) {
# The Preliminary Estimate Model from Using Initial Value tau_0
## Suppress warning for `binomial` family when weights contains value < 1
tau_0_model <- suppressWarnings(
do.call(
stats::glm,
list(
formula = formula,
family = cat_init$family,
data = cat_init$data,
weights = c(
rep(1, cat_init$obs_size),
rep(tau_0 / cat_init$syn_size, cat_init$syn_size)
)
)
)
)
# The Mean from the Preliminary Estimate Model for Generating Bootstrap Response Samples
boots_mean_Y <- get_glm_mean(
family_string = cat_init$family,
X = cat_init$adj_obs_x,
coefs = stats::coef(tau_0_model)
)
# The Standard Deviance for Generating Bootstrap Response Samples
boots_sd_Y <- if (cat_init$family == "gaussian") {
sqrt(
ifelse(
(is.null(cat_init$custom_variance)),
get_glm_custom_var(
formula = formula,
cat_init = cat_init,
tau = ncol(cat_init$adj_x)
),
cat_init$custom_variance
)
)
} else {
0
}
# The data.frame that Contains All Vectors of Bootstrap Response Samples for Each Bootstrap Iterations
boots_Y_df <- as.data.frame(
lapply(
1:parametric_bootstrap_iteration_times,
function(.) {
get_glm_sample_data(
family_string = cat_init$family,
n = cat_init$obs_size,
mean = boots_mean_Y,
sd = boots_sd_Y
)
}
)
)
# The Deviations of Bootstrap Response Samples from Their Means
boots_dev_df <- sweep(boots_Y_df, 1, rowMeans(boots_Y_df))
risk_estimate_list <- c()
for (tau in tau_seq) {
# The tau Model
## Suppress warning for `binomial` family when weights contains value < 1
tau_model <- suppressWarnings(
do.call(
stats::glm,
list(
formula = formula,
family = cat_init$family,
data = cat_init$data,
weights = c(
rep(1, cat_init$obs_size),
rep(tau / cat_init$syn_size, cat_init$syn_size)
)
)
)
)
# The In-sample Prediction Error from the tau Model
D <- get_discrepancy(
discrepancy_method = discrepancy_method,
family_string = cat_init$family,
X = cat_init$adj_obs_x,
Y = cat_init$obs_y,
coefs = stats::coef(tau_model)
)
boots_lambda_mu_df <- do.call(
cbind,
lapply(
1:parametric_bootstrap_iteration_times,
function(boots_i) {
# The Bootstrap Model from Replacing the Observation Response by Bootstrap Response Samples
## Suppress warning for `binomial` family when weights contains value < 1
boots_model <- suppressWarnings(
do.call(
stats::glm, list(
formula = formula,
family = cat_init$family,
data = cbind(
cat_init$x,
stats::setNames(
data.frame(c(
boots_Y_df[[boots_i]],
cat_init$syn_y
)),
cat_init$y_col_name
)
),
weights = c(
rep(1, cat_init$obs_size),
rep(tau / cat_init$syn_size, cat_init$syn_size)
)
)
)
)
# Return Lambda(mu) values
return(get_glm_lambda(
discrepancy_method = discrepancy_method,
X = cat_init$adj_obs_x,
coefs = stats::coef(boots_model)
))
}
)
)
risk_estimate_list <- c(
risk_estimate_list,
# Covariance Penalty Approximation Using Weighted Deviations Across Bootstrap Iterations
D + sum(boots_dev_df * boots_lambda_mu_df) / parametric_bootstrap_iteration_times
)
}
return(risk_estimate_list)
}
#' Perform Steinian Estimate for Model Risk (Only Applicable for Binomial Family)
#'
#' This function computes the Steinian estimate for model risk by fitting a sequence of
#' Generalized Linear Models (GLMs) with varying values of `tau`. It combines the preliminary
#' estimate from a model fitted with an initial `tau_0` value with a penalty term that incorporates
#' the in-sample prediction error and a covariance penalty, which is based on models fitted by inverting
#' the response of individual observations.
#'
#' @param formula A formula specifying the GLMs. Should at least include response variables.
#' @param cat_init A list generated from `cat_glm_initialization`.
#' @param tau_seq A sequence of tuning parameter values (`tau`) over which
#' the Steinian estimate will be computed. Each value of `tau` is used to weight the
#' synthetic data during model fitting.
#' @param tau_0 A reference value for `tau` that is used in the calculation of the
#' preliminary estimate model and the variance term.
#' @param ... Other arguments passed to other internal functions.
#'
#' @details
#' 1. **Preliminary Estimate Model**: The function first fits a GLM model using the observed and
#' synthetic data with an initial value of `tau_0` for the synthetic data weights.
#'
#' 2. **In-sample Prediction Error**: For each value of `tau` in `tau_seq`, the function computes
#' the in-sample prediction error (logistic deviance).
#'
#' 3. **Steinian Penalty**: The function calculates the Steinian covariance penalty for each observation
#' by fitting a modified model that inverts one observation at a time. The penalty is added to the
#' in-sample prediction error to obtain the final risk estimate.
#'
#' 4. **Steinian Risk Estimate**: The final Steinian risk estimate is calculated by summing the
#' in-sample prediction error and the Steinian penalty term for each value of `tau` in `tau_seq`.
#'
#' @return
#' A numeric vector of Steinian risk estimates, one for each value of `tau` in `tau_seq`.
steinian_estimate <- function(
formula,
cat_init,
tau_seq,
tau_0,
...) {
# The Preliminary Estimate Model from Using Initial Value tau_0
## Suppress warning for `binomial` family when weights contains value < 1
tau_0_model <- suppressWarnings(
do.call(
stats::glm,
list(
formula = formula,
family = "binomial",
data = cat_init$data,
weights = c(
rep(1, cat_init$obs_size),
rep(tau_0 / cat_init$syn_size, cat_init$syn_size)
)
)
)
)
# The Mean from the Preliminary Estimate Model for Generating Bootstrap Response Samples
tau_0_mean <- get_glm_mean(
family_string = "binomial",
X = cat_init$adj_obs_x,
coefs = stats::coef(tau_0_model)
)
# The Variance Estimates for the Preliminary Estimate Model
tau_0_varience <- tau_0_mean * (1 - tau_0_mean)
risk_estimate_list <- c()
for (tau in tau_seq) {
# The tau Model
## Suppress warning for `binomial` family when weights contains value < 1
tau_model <- suppressWarnings(
do.call(
stats::glm,
list(
formula = formula,
family = "binomial",
data = cat_init$data,
weights = c(
rep(1, cat_init$obs_size),
rep(tau / cat_init$syn_size, cat_init$syn_size)
)
)
)
)
# The In-sample Prediction Error from the tau Model
D <- get_discrepancy(
discrepancy_method = "logistic_deviance",
family_string = "binomial",
X = cat_init$adj_obs_x,
Y = cat_init$obs_y,
coefs = stats::coef(tau_model)
)
lambda_mu_tau_df <- get_glm_lambda(
discrepancy_method = "logistic_deviance",
X = cat_init$adj_x,
coefs = stats::coef(tau_model)
)
steinian_penalty_sum <- 0
for (obs_i in 1:cat_init$obs_size) {
# The Triangle Response by Inverting One Observation
triangle_obs_y <- cat_init$obs_y
triangle_obs_y[obs_i] <- 1 - cat_init$obs_y[obs_i]
# The Triangle Model Fitted from Triangle Response
## Suppress warning for `binomial` family when weights contains value < 1
triangle_tau_model <- suppressWarnings(
do.call(
stats::glm,
list(
formula = formula,
family = "binomial",
data = cbind(
cat_init$adj_x,
stats::setNames(
data.frame(c(
triangle_obs_y,
cat_init$syn_y
)),
cat_init$y_col_name
)
),
weights = c(
rep(1, cat_init$obs_size),
rep(tau / cat_init$syn_size, cat_init$syn_size)
)
)
)
)
# Steinian Covariance Penalty
steinian_penalty_sum <- steinian_penalty_sum +
tau_0_varience[obs_i] * (
2 * cat_init$obs_y[obs_i] - 1) * (
lambda_mu_tau_df[obs_i] - get_glm_lambda(
discrepancy_method = "logistic_deviance",
X = cat_init$adj_x[obs_i, ],
coefs = stats::coef(triangle_tau_model)
)
)
}
risk_estimate_list <- c(risk_estimate_list, D + steinian_penalty_sum)
}
return(risk_estimate_list)
}
#' Perform Mallowian Estimate for Model Risk (Only Applicable for Gaussian Family)
#'
#' This function calculates the Mallowian estimate for model risk by fitting a sequence of
#' Generalized Linear Models (GLMs) with varying values of `tau`. It uses the in-sample prediction
#' error along with a regularized projection matrix to estimate the model risk. The `tau` parameter
#' influences the weighting of synthetic data during model fitting.
#'
#' @param formula A formula specifying the GLMs. Should at least include response variables.
#' @param cat_init A list generated from `cat_glm_initialization`.
#' @param tau_seq A sequence of tuning parameter values (`tau`) over which
#' the Mallowian estimate will be computed. Each value of `tau` is used to weight the
#' synthetic data during model fitting.
#' @param ... Other arguments passed to other internal functions.
#'
#' @details
#' 1. **Model Fitting**: For each value of `tau` in `tau_seq`, the function fits a GLM model
#' using the observed and synthetic data. The synthetic data is weighted by the corresponding
#' `tau` value during the fitting process.
#'
#' 2. **In-sample Prediction Error**: After fitting the model, the function computes the
#' in-sample prediction error (Mean Squared Error) to assess the model's performance.
#'
#' 3. **Regularized Projection Matrix**: The function calculates a regularized projection matrix
#' using the observed and synthetic data, which influences the covariance matrix used in risk estimation.
#'
#' 4. **Mallowian Risk Estimate**: The final Mallowian risk estimate is computed by combining the
#' in-sample prediction error with a penalty term involving the projection matrix and a variance term.
#' This estimate is calculated for each value of `tau` in `tau_seq`.
#'
#' @return
#' A numeric vector of Mallowian risk estimates, one for each value of `tau` in `tau_seq`.
mallowian_estimate <- function(
formula,
cat_init,
tau_seq,
...) {
var_Y <- ifelse(
(is.null(cat_init$custom_variance)),
get_glm_custom_var(
formula = formula,
cat_init = cat_init,
tau = ncol(cat_init$adj_x)
),
cat_init$custom_variance
)
risk_estimate_list <- c()
for (tau in tau_seq) {
# The tau Model
tau_model <- do.call(
stats::glm,
list(
formula = formula,
family = "gaussian",
data = cat_init$data,
weights = c(
rep(1, cat_init$obs_size),
rep(tau / cat_init$syn_size, cat_init$syn_size)
)
)
)
# The In-sample Prediction Error from the tau Model
D <- get_discrepancy(
discrepancy_method = "mean_square_error",
family_string = "gaussian",
X = cat_init$adj_obs_x,
Y = cat_init$obs_y,
coefs = stats::coef(tau_model)
)
obs_x_matrix <- as.matrix(cat_init$adj_obs_x)
syn_x_matrix <- as.matrix(cat_init$adj_syn_x)
solved_obs_syn_matrix <- solve(
crossprod(obs_x_matrix) + (tau / cat_init$syn_size) * crossprod(syn_x_matrix)
)
weighted_covariance_matrix <- t(obs_x_matrix) + (
tau / ((cat_init$syn_size * cat_init$obs_size))
) * t(syn_x_matrix) %*% matrix(1, nrow = cat_init$syn_size, ncol = cat_init$obs_size)
# The Regularized Projection Matrix
H_tau_matrix <- obs_x_matrix %*% solved_obs_syn_matrix %*% weighted_covariance_matrix
risk_estimate_list <- c(
risk_estimate_list,
D + (2 * var_Y * sum(diag(H_tau_matrix))) / cat_init$obs_size
)
}
return(risk_estimate_list)
}
#' Perform Cross-Validation for Model Estimation
#'
#' This function performs cross-validation for estimating risk over a sequence
#' of tuning parameters (`tau_seq`) by fitting a Generalized Linear Model (GLM) to the data.
#' It evaluates model performance by splitting the dataset into multiple folds, training
#' the model on a subset of the data, and testing it on the remaining portion.
#'
#' @param formula A formula specifying the GLMs. Should at least include response variables.
#' @param cat_init A list generated from `cat_glm_initialization`.
#' @param tau_seq A sequence of tuning parameter values (`tau`) over which
#' cross-validation will be performed. Each value of `tau` is used to weight the
#' synthetic data during model fitting.
#' @param discrepancy_method A function used to calculate the discrepancy (error) between
#' model predictions and actual values.
#' @param cross_validation_fold_num The number of folds to use in cross-validation.
#' The dataset will be randomly split into this number of subsets, and the model will be trained and tested on different combinations of these subsets.
#' @param ... Other arguments passed to other internal functions.
#'
#' @details
#' 1. **Randomization of the Data**: The data is randomly shuffled into `cross_validation_fold_num`
#' subsets to ensure that the model is evaluated across different splits of the dataset.
#'
#' 2. **Model Training and Prediction**: For each fold, a training set is used to fit
#' a GLM with varying values of `tau` (from `tau_seq`), and the model is evaluated on a test set.
#' The training data consists of both the observed and synthetic data, with synthetic data weighted by `tau`.
#'
#' 3. **Risk Estimation**: After fitting the model, the `discrepancy_method` is used to calculate the
#' prediction error for each combination of fold and `tau`. These errors are accumulated for each `tau`.
#'
#' 4. **Average Risk Estimate**: After completing all folds, the accumulated prediction errors
#' are averaged over the number of folds to provide a final risk estimate for each value of `tau`.
#'
#' @return
#' A numeric vector of averaged risk estimates, one for each value of `tau` in `tau_seq`.
cross_validation <- function(
formula,
cat_init,
tau_seq,
discrepancy_method,
cross_validation_fold_num,
...) {
random_idxs <- sample(1:cat_init$obs_size)
risk_estimate_list <- rep(0, length(tau_seq))
# The perform of cross-validation
for (fold in seq_len(cross_validation_fold_num)) {
test_idx <- random_idxs[
(cat_init$obs_size * (fold - 1) / cross_validation_fold_num + 1):
(cat_init$obs_size * fold / cross_validation_fold_num)
]
test_x <- cat_init$adj_obs_x[test_idx, , drop = FALSE]
test_y <- cat_init$obs_y[test_idx]
train_x <- cat_init$adj_obs_x[-test_idx, , drop = FALSE]
train_y <- cat_init$obs_y[-test_idx]
risk_estimate_list_fold <- c()
# The iteration over the tau sequence for estimation
for (tau in tau_seq) {
## Suppress warning for `binomial` family when weights contains value < 1
cv_tau_model <- suppressWarnings(
do.call(
stats::glm,
list(
formula = formula,
family = cat_init$family,
data = cbind(
rbind(train_x, cat_init$adj_syn_x),
stats::setNames(
data.frame(c(train_y, cat_init$syn_y)),
cat_init$y_col_name
)
),
weights = c(
rep(1, nrow(train_x)),
rep(tau / cat_init$syn_size, cat_init$syn_size)
)
)
)
)
prediction_error <- get_discrepancy(
discrepancy_method = discrepancy_method,
family_string = cat_init$family,
X = test_x,
Y = test_y,
coefs = stats::coef(cv_tau_model)
)
risk_estimate_list_fold <- c(
risk_estimate_list_fold,
prediction_error
)
}
risk_estimate_list <- risk_estimate_list + risk_estimate_list_fold
}
return(risk_estimate_list / cross_validation_fold_num)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.