Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.align = "center",
fig.width = 7,
fig.height = 6,
cache = TRUE
)
## ----eval=FALSE---------------------------------------------------------------
# library(catalytic)
#
# set.seed(1)
#
# # Function for calculating the mean of logarithmic error between true response and estimated response
# get_mean_logarithmic_error <- function(Y, est_Y) {
# Y <- pmax(0.0001, pmin(0.9999, Y))
# est_Y <- pmax(0.0001, pmin(0.9999, est_Y))
# return(mean(Y * log(Y / est_Y) + (1 - Y) * log((1 - Y) / (1 - est_Y))))
# }
#
# # Load swim dataset for binomial analysis
# data("swim")
# swim_data <- cbind(swim$x, swim$y)
#
# # Seperate observation data into train and test data
# n <- 5 * ncol(swim$x + 1) # Size for training data
# train_idx <- sample(1:nrow(swim_data), n)
# train_data <- swim_data[train_idx, ]
# test_data <- swim_data[-train_idx, ]
#
# dim(train_data)
## ----eval=FALSE---------------------------------------------------------------
# # Fit a logistic regression model (GLM)
# glm_model <- stats::glm(
# formula = empyr1 ~ .,
# family = binomial,
# data = train_data
# )
#
# predicted_y <- predict(
# glm_model,
# newdata = test_data,
# type = "response"
# )
#
# cat(
# "MLE GLM Binomial Model - Logarithmic Error:",
# get_mean_logarithmic_error(
# Y = test_data$empyr1,
# est_Y = predicted_y
# )
# )
## ----eval=FALSE---------------------------------------------------------------
# roc_curve <- pROC::roc(test_data$empyr1, predicted_y)
# plot(roc_curve, main = "ROC Curve (MLE)", col = "blue", lwd = 2)
## ----eval=FALSE---------------------------------------------------------------
# cat_init <- cat_glm_initialization(
# formula = empyr1 ~ 1,
# family = binomial, # Default: gaussian
# data = train_data,
# syn_size = 50, # Default: 4 times the number of predictors
# x_degree = NULL, # Default: NULL, which means degrees of all predictors are 1
# resample_only = FALSE, # Default: FALSE
# na_replace = mean # Default: stats::na.omit
# )
#
# cat_init
## ----eval=FALSE---------------------------------------------------------------
# names(cat_init)
## ----eval=FALSE---------------------------------------------------------------
# # The number of observations (rows) in the original dataset (`obs_data`)
# cat_init$obs_size
#
# # The information detailing the process of resampling synthetic data
# cat_init$syn_x_resample_inform
## ----eval=FALSE---------------------------------------------------------------
# cat_glm_model <- cat_glm(
# formula = empyr1 ~ female + agege35, # Same as `~ female + agege35`
# cat_init = cat_init,
# tau = 10 # Default: number of predictors / 4
# )
#
# cat_glm_model
## ----eval=FALSE---------------------------------------------------------------
# cat_glm_predicted_y <- predict(
# cat_glm_model,
# newdata = test_data,
# type = "response"
# )
#
# cat(
# "Catalytic `cat_glm` - Logarithmic Error:",
# get_mean_logarithmic_error(
# Y = test_data$empyr1,
# est_Y = cat_glm_predicted_y
# )
# )
## ----eval=FALSE---------------------------------------------------------------
# roc_curve <- pROC::roc(test_data$empyr1, cat_glm_predicted_y)
# plot(roc_curve, main = "ROC Curve (cat_glm)", col = "blue", lwd = 2)
## ----eval=FALSE---------------------------------------------------------------
# names(cat_glm_model)
## ----eval=FALSE---------------------------------------------------------------
# # The formula used for modeling
# cat_glm_model$formula
#
# # The fitted GLM model object obtained from `stats::glm` with `tau`
# cat_glm_model$coefficients
## ----eval=FALSE---------------------------------------------------------------
# cat_glm_tune_cv <- cat_glm_tune(
# formula = empyr1 ~ ., # Same as `~ .`
# cat_init = cat_init,
# risk_estimate_method = "cross_validation", # Default auto-select based on the data size
# discrepancy_method = "logistic_deviance", # Default auto-select based on family
# tau_seq = seq(0.1, 5.1, 0.5), # Default is a numeric sequence around the number of predictors / 4. Do not recommand to including 0 for cross validation.
# cross_validation_fold_num = 3 # Default: 5
# )
#
# cat_glm_tune_cv
## ----eval=FALSE---------------------------------------------------------------
# plot(cat_glm_tune_cv)
## ----eval=FALSE---------------------------------------------------------------
# cat_glm_tune_boots <- cat_glm_tune(
# formula = ~., # Same as `empyr1 ~ .`
# cat_init = cat_init,
# risk_estimate_method = "parametric_bootstrap", # Default auto-select based on the data size
# discrepancy_method = "logistic_deviance", # Default auto-select based on family
# tau_0 = 2, # Default: number of predictors / 4
# parametric_bootstrap_iteration_times = 10, # Default: 100
# )
#
# cat_glm_tune_boots
## ----eval=FALSE---------------------------------------------------------------
# cat_glm_tune_stein <- cat_glm_tune(
# formula = ~., # Same as `empyr1 ~ .`
# cat_init = cat_init,
# risk_estimate_method = "steinian_estimate", # Default auto-select based on the data size
# discrepancy_method = "logistic_deviance", # default auto select based on family
# )
#
# cat_glm_tune_stein
## ----eval=FALSE---------------------------------------------------------------
# cat_glm_tune_auto <- cat_glm_tune(
# formula = ~.,
# cat_init = cat_init
# )
#
# cat_glm_tune_auto
## ----eval=FALSE---------------------------------------------------------------
# cat_glm_tune_predicted_y <- predict(
# cat_glm_tune_auto,
# newdata = test_data,
# type = "response"
# )
#
# cat(
# "Catalytic `cat_glm_tune` - Logarithmic Error:",
# get_mean_logarithmic_error(
# Y = test_data$empyr1,
# est_Y = cat_glm_tune_predicted_y
# )
# )
## ----eval=FALSE---------------------------------------------------------------
# roc_curve <- pROC::roc(test_data$empyr1, cat_glm_tune_predicted_y)
# plot(roc_curve, main = "ROC Curve (cat_glm_tune)", col = "blue", lwd = 2)
## ----eval=FALSE---------------------------------------------------------------
# names(cat_glm_tune_auto)
## ----eval=FALSE---------------------------------------------------------------
# # The method used for risk estimation
# cat_glm_tune_auto$risk_estimate_method
#
# # Selected optimal tau value from `tau_seq` that minimizes discrepancy error
# cat_glm_tune_auto$tau
## ----eval = FALSE-------------------------------------------------------------
# cat_glm_bayes_model <- cat_glm_bayes(
# formula = empyr1 ~ ., # Same as `~ .`
# cat_init = cat_init,
# tau = 50, # Default: number of predictors / 4
# chains = 1, # Default: 4
# iter = 100, # Default: 2000
# warmup = 50, # Default: 1000
# algorithm = "NUTS" # Default: NUTS
# )
#
# cat_glm_bayes_model
## ----eval = FALSE-------------------------------------------------------------
# traceplot(cat_glm_bayes_model)
## ----eval = FALSE-------------------------------------------------------------
# traceplot(cat_glm_bayes_model, inc_warmup = TRUE)
## ----eval = FALSE-------------------------------------------------------------
# cat_glm_bayes_predicted_y <- predict(
# cat_glm_bayes_model,
# newdata = test_data,
# type = "response"
# )
#
# cat(
# "MLE GLM Binomial Model - Logarithmic Error:",
# get_mean_logarithmic_error(
# Y = test_data$empyr1,
# est_Y = cat_glm_bayes_predicted_y
# )
# )
## ----eval = FALSE-------------------------------------------------------------
# roc_curve <- pROC::roc(test_data$empyr1, cat_glm_bayes_predicted_y)
# plot(roc_curve, main = "ROC Curve (cat_glm_bayes)", col = "blue", lwd = 2)
## ----eval = FALSE-------------------------------------------------------------
# names(cat_glm_bayes_model)
## ----eval = FALSE-------------------------------------------------------------
# # The number of Markov chains used during MCMC sampling in `rstan`.
# cat_glm_bayes_model$chain
#
# # The mean estimated coefficients from the Bayesian GLM model
# cat_glm_bayes_model$coefficients
## ----eval = FALSE-------------------------------------------------------------
# cat_glm_bayes_joint_model <- cat_glm_bayes_joint(
# formula = empyr1 ~ ., # Same as `~ .`
# cat_init = cat_init,
# chains = 1, # Default: 4
# iter = 100, # Default: 2000
# warmup = 50, # Default: 1000
# algorithm = "NUTS", # Default: NUTS
# tau_alpha = 2, # Default: 2
# tau_gamma = 1 # Default: 1
# )
#
# cat_glm_bayes_joint_model
## ----eval = FALSE-------------------------------------------------------------
# traceplot(cat_glm_bayes_joint_model)
## ----eval = FALSE-------------------------------------------------------------
# traceplot(cat_glm_bayes_joint_model, inc_warmup = TRUE)
## ----eval = FALSE-------------------------------------------------------------
# cat_glm_bayes_joint_predicted_y <- predict(
# cat_glm_bayes_joint_model,
# newdata = test_data,
# type = "response"
# )
#
# cat(
# "Catlytic `cat_glm_bayes_joint` - Logarithmic Error:",
# get_mean_logarithmic_error(
# Y = test_data$empyr1,
# est_Y = cat_glm_bayes_joint_predicted_y
# )
# )
## ----eval = FALSE-------------------------------------------------------------
# roc_curve <- pROC::roc(test_data$empyr1, cat_glm_bayes_joint_predicted_y)
# plot(roc_curve, main = "ROC Curve (cat_glm_bayes_joint)", col = "blue", lwd = 2)
## ----eval = FALSE-------------------------------------------------------------
# names(cat_glm_bayes_joint_model)
## ----eval = FALSE-------------------------------------------------------------
# # The estimated tau parameter from the MCMC sampling `rstan::sampling`,
# cat_glm_bayes_joint_model$tau
#
# # The mean estimated coefficients from the Bayesian GLM model
# cat_glm_bayes_joint_model$coefficients
## ----eval = FALSE-------------------------------------------------------------
# cat_glm_bayes_joint_gibbs_model <- cat_glm_bayes_joint_gibbs(
# formula = empyr1 ~ ., # Same as `~ .`
# cat_init = cat_init,
# iter = 100, # Default: 1000
# warmup = 50, # Default: 500
# coefs_iter = 2, # Default: 5
# tau_0 = 1, # Default: number of predictors / 4
# tau_alpha = 2, # Default: 2
# tau_gamma = 1, # Default: 1
# refresh = TRUE # Default: TRUE
# )
#
# cat_glm_bayes_joint_gibbs_model
## ----eval = FALSE-------------------------------------------------------------
# traceplot(cat_glm_bayes_joint_gibbs_model)
## ----eval = FALSE-------------------------------------------------------------
# traceplot(cat_glm_bayes_joint_gibbs_model, pars = c("female", "hsdip", "numchild"))
## ----eval = FALSE-------------------------------------------------------------
# cat_glm_bayes_joint_gibbs_predicted_y <- predict(
# cat_glm_bayes_joint_gibbs_model,
# newdata = test_data,
# type = "response"
# )
#
# cat(
# "Catalytic `cat_glm_bayes_joint_gibbs` - Logarithmic Error:",
# get_mean_logarithmic_error(
# Y = test_data$empyr1,
# est_Y = cat_glm_bayes_joint_gibbs_predicted_y
# )
# )
## ----eval = FALSE-------------------------------------------------------------
# roc_curve <- pROC::roc(test_data$empyr1, cat_glm_bayes_joint_gibbs_predicted_y)
# plot(roc_curve, main = "ROC Curve (cat_glm_bayes_joint_gibbs)", col = "blue", lwd = 2)
## ----eval = FALSE-------------------------------------------------------------
# names(cat_glm_bayes_joint_gibbs_model)
## ----eval = FALSE-------------------------------------------------------------
# # The Date and time when sampling ended.
# cat_glm_bayes_joint_gibbs_model$sys_time
#
# # The summary statistics
# cat_glm_bayes_joint_gibbs_model$inform_df
## ----eval = FALSE-------------------------------------------------------------
# cat_glm_bayes_joint_tau_lower <- cat_glm_bayes_joint(
# formula = ~., # Same as `empyr1 ~ .`
# cat_init = cat_init,
# binomial_tau_lower = 0.5, # Default: 0.05
# chains = 1, # Default: 4
# iter = 100, # Default: 2000
# warmup = 50 # Default: 1000
# )
#
# cat_glm_bayes_joint_tau_lower
## ----eval = FALSE-------------------------------------------------------------
# cat_glm_bayes_joint_tau_lower_predicted_y <- predict(
# cat_glm_bayes_joint_tau_lower,
# newdata = test_data,
# type = "response"
# )
#
# cat(
# "Catalytic `cat_glm_bayes_joint_tau_lower` - Logarithmic Error:",
# get_mean_logarithmic_error(
# Y = test_data$empyr1,
# est_Y = cat_glm_bayes_joint_tau_lower_predicted_y
# )
# )
## ----eval = FALSE-------------------------------------------------------------
# roc_curve <- pROC::roc(test_data$empyr1, cat_glm_bayes_joint_tau_lower_predicted_y)
# plot(roc_curve, main = "ROC Curve (MLE)", col = "blue", lwd = 2)
## ----eval = FALSE-------------------------------------------------------------
# cat_glm_bayes_joint_theta <- cat_glm_bayes_joint(
# formula = ~., # Same as `empyr1 ~ .`
# cat_init = cat_init,
# binomial_joint_theta = TRUE, # Default: FALSE
# chains = 1, # Default: 4
# iter = 100, # Default: 2000
# warmup = 50 # Default: 1000
# )
#
# cat_glm_bayes_joint_theta
## ----eval = FALSE-------------------------------------------------------------
# cat_glm_bayes_joint_theta_predicted_y <- predict(
# cat_glm_bayes_joint_theta,
# newdata = test_data,
# type = "response"
# )
#
# cat(
# "Catalytic `cat_glm_bayes_joint_theta` - Logarithmic Error:",
# get_mean_logarithmic_error(
# Y = test_data$empyr1,
# est_Y = cat_glm_bayes_joint_theta_predicted_y
# )
# )
## ----eval = FALSE-------------------------------------------------------------
# roc_curve <- pROC::roc(test_data$empyr1, cat_glm_bayes_joint_theta_predicted_y)
# plot(roc_curve, main = "ROC Curve (cat_glm_bayes_joint_theta)", col = "blue", lwd = 2)
## ----eval = FALSE-------------------------------------------------------------
# cat_glm_bayes_joint_alpha <- cat_glm_bayes_joint(
# formula = ~., # Same as `empyr1 ~ .`
# cat_init = cat_init,
# binomial_joint_theta = TRUE, # Default: FALSE
# binomial_joint_alpha = TRUE, # Default: FALSE
# chains = 1, # Default: 4
# iter = 100, # Default: 2000
# warmup = 50 # Default: 1000
# )
#
# cat_glm_bayes_joint_alpha
## ----eval = FALSE-------------------------------------------------------------
# cat_glm_bayes_joint_alpha_predicted_y <- predict(
# cat_glm_bayes_joint_alpha,
# newdata = test_data,
# type = "response"
# )
#
# cat(
# "Catalytic `cat_glm_bayes_joint_alpha` - Logarithmic Error:",
# get_mean_logarithmic_error(
# Y = test_data$empyr1,
# est_Y = cat_glm_bayes_joint_alpha_predicted_y
# )
# )
## ----eval = FALSE-------------------------------------------------------------
# roc_curve <- pROC::roc(test_data$empyr1, cat_glm_bayes_joint_alpha_predicted_y)
# plot(roc_curve, main = "ROC Curve (cat_glm_bayes_joint_alpha)", col = "blue", lwd = 2)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.