catalytic_glm_binomial.R
In catalytic: Tools for Applying Catalytic Priors in Statistical Modeling

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.align = "center",
  fig.width = 7,
  fig.height = 6,
  cache = TRUE
)

## ----eval=FALSE---------------------------------------------------------------
#  library(catalytic)
#  
#  set.seed(1)
#  
#  # Function for calculating the mean of logarithmic error between true response and estimated response
#  get_mean_logarithmic_error <- function(Y, est_Y) {
#    Y <- pmax(0.0001, pmin(0.9999, Y))
#    est_Y <- pmax(0.0001, pmin(0.9999, est_Y))
#    return(mean(Y * log(Y / est_Y) + (1 - Y) * log((1 - Y) / (1 - est_Y))))
#  }
#  
#  # Load swim dataset for binomial analysis
#  data("swim")
#  swim_data <- cbind(swim$x, swim$y)
#  
#  # Seperate observation data into train and test data
#  n <- 5 * ncol(swim$x + 1) # Size for training data
#  train_idx <- sample(1:nrow(swim_data), n)
#  train_data <- swim_data[train_idx, ]
#  test_data <- swim_data[-train_idx, ]
#  
#  dim(train_data)

## ----eval=FALSE---------------------------------------------------------------
#  # Fit a logistic regression model (GLM)
#  glm_model <- stats::glm(
#    formula = empyr1 ~ .,
#    family = binomial,
#    data = train_data
#  )
#  
#  predicted_y <- predict(
#    glm_model,
#    newdata = test_data,
#    type = "response"
#  )
#  
#  cat(
#    "MLE GLM Binomial Model - Logarithmic Error:",
#    get_mean_logarithmic_error(
#      Y = test_data$empyr1,
#      est_Y = predicted_y
#    )
#  )

## ----eval=FALSE---------------------------------------------------------------
#  roc_curve <- pROC::roc(test_data$empyr1, predicted_y)
#  plot(roc_curve, main = "ROC Curve (MLE)", col = "blue", lwd = 2)

## ----eval=FALSE---------------------------------------------------------------
#  cat_init <- cat_glm_initialization(
#    formula = empyr1 ~ 1,
#    family = binomial, # Default: gaussian
#    data = train_data,
#    syn_size = 50, # Default: 4 times the number of predictors
#    x_degree = NULL, # Default: NULL, which means degrees of all predictors are 1
#    resample_only = FALSE, # Default: FALSE
#    na_replace = mean # Default: stats::na.omit
#  )
#  
#  cat_init

## ----eval=FALSE---------------------------------------------------------------
#  names(cat_init)

## ----eval=FALSE---------------------------------------------------------------
#  # The number of observations (rows) in the original dataset (`obs_data`)
#  cat_init$obs_size
#  
#  # The information detailing the process of resampling synthetic data
#  cat_init$syn_x_resample_inform

## ----eval=FALSE---------------------------------------------------------------
#  cat_glm_model <- cat_glm(
#    formula = empyr1 ~ female + agege35, # Same as `~ female + agege35`
#    cat_init = cat_init,
#    tau = 10 # Default: number of predictors / 4
#  )
#  
#  cat_glm_model

## ----eval=FALSE---------------------------------------------------------------
#  cat_glm_predicted_y <- predict(
#    cat_glm_model,
#    newdata = test_data,
#    type = "response"
#  )
#  
#  cat(
#    "Catalytic `cat_glm` - Logarithmic Error:",
#    get_mean_logarithmic_error(
#      Y = test_data$empyr1,
#      est_Y = cat_glm_predicted_y
#    )
#  )

## ----eval=FALSE---------------------------------------------------------------
#  roc_curve <- pROC::roc(test_data$empyr1, cat_glm_predicted_y)
#  plot(roc_curve, main = "ROC Curve (cat_glm)", col = "blue", lwd = 2)

## ----eval=FALSE---------------------------------------------------------------
#  names(cat_glm_model)

## ----eval=FALSE---------------------------------------------------------------
#  # The formula used for modeling
#  cat_glm_model$formula
#  
#  # The fitted GLM model object obtained from `stats::glm` with `tau`
#  cat_glm_model$coefficients

## ----eval=FALSE---------------------------------------------------------------
#  cat_glm_tune_cv <- cat_glm_tune(
#    formula = empyr1 ~ ., # Same as `~ .`
#    cat_init = cat_init,
#    risk_estimate_method = "cross_validation", # Default auto-select based on the data size
#    discrepancy_method = "logistic_deviance", # Default auto-select based on family
#    tau_seq = seq(0.1, 5.1, 0.5), # Default is a numeric sequence around the number of predictors / 4. Do not recommand to including 0 for cross validation.
#    cross_validation_fold_num = 3 # Default: 5
#  )
#  
#  cat_glm_tune_cv

## ----eval=FALSE---------------------------------------------------------------
#  plot(cat_glm_tune_cv)

## ----eval=FALSE---------------------------------------------------------------
#  cat_glm_tune_boots <- cat_glm_tune(
#    formula = ~., # Same as `empyr1 ~ .`
#    cat_init = cat_init,
#    risk_estimate_method = "parametric_bootstrap", # Default auto-select based on the data size
#    discrepancy_method = "logistic_deviance", # Default auto-select based on family
#    tau_0 = 2, # Default: number of predictors / 4
#    parametric_bootstrap_iteration_times = 10, # Default: 100
#  )
#  
#  cat_glm_tune_boots

## ----eval=FALSE---------------------------------------------------------------
#  cat_glm_tune_stein <- cat_glm_tune(
#    formula = ~., # Same as `empyr1 ~ .`
#    cat_init = cat_init,
#    risk_estimate_method = "steinian_estimate", # Default auto-select based on the data size
#    discrepancy_method = "logistic_deviance", # default  auto select based on family
#  )
#  
#  cat_glm_tune_stein

## ----eval=FALSE---------------------------------------------------------------
#  cat_glm_tune_auto <- cat_glm_tune(
#    formula = ~.,
#    cat_init = cat_init
#  )
#  
#  cat_glm_tune_auto

## ----eval=FALSE---------------------------------------------------------------
#  cat_glm_tune_predicted_y <- predict(
#    cat_glm_tune_auto,
#    newdata = test_data,
#    type = "response"
#  )
#  
#  cat(
#    "Catalytic `cat_glm_tune` - Logarithmic Error:",
#    get_mean_logarithmic_error(
#      Y = test_data$empyr1,
#      est_Y = cat_glm_tune_predicted_y
#    )
#  )

## ----eval=FALSE---------------------------------------------------------------
#  roc_curve <- pROC::roc(test_data$empyr1, cat_glm_tune_predicted_y)
#  plot(roc_curve, main = "ROC Curve (cat_glm_tune)", col = "blue", lwd = 2)

## ----eval=FALSE---------------------------------------------------------------
#  names(cat_glm_tune_auto)

## ----eval=FALSE---------------------------------------------------------------
#  # The method used for risk estimation
#  cat_glm_tune_auto$risk_estimate_method
#  
#  # Selected optimal tau value from `tau_seq` that minimizes discrepancy error
#  cat_glm_tune_auto$tau

## ----eval = FALSE-------------------------------------------------------------
#  cat_glm_bayes_model <- cat_glm_bayes(
#    formula = empyr1 ~ ., # Same as `~ .`
#    cat_init = cat_init,
#    tau = 50, # Default: number of predictors / 4
#    chains = 1, # Default: 4
#    iter = 100, # Default: 2000
#    warmup = 50, # Default: 1000
#    algorithm = "NUTS" # Default: NUTS
#  )
#  
#  cat_glm_bayes_model

## ----eval = FALSE-------------------------------------------------------------
#  traceplot(cat_glm_bayes_model)

## ----eval = FALSE-------------------------------------------------------------
#  traceplot(cat_glm_bayes_model, inc_warmup = TRUE)

## ----eval = FALSE-------------------------------------------------------------
#  cat_glm_bayes_predicted_y <- predict(
#    cat_glm_bayes_model,
#    newdata = test_data,
#    type = "response"
#  )
#  
#  cat(
#    "MLE GLM Binomial Model - Logarithmic Error:",
#    get_mean_logarithmic_error(
#      Y = test_data$empyr1,
#      est_Y = cat_glm_bayes_predicted_y
#    )
#  )

## ----eval = FALSE-------------------------------------------------------------
#  roc_curve <- pROC::roc(test_data$empyr1, cat_glm_bayes_predicted_y)
#  plot(roc_curve, main = "ROC Curve (cat_glm_bayes)", col = "blue", lwd = 2)

## ----eval = FALSE-------------------------------------------------------------
#  names(cat_glm_bayes_model)

## ----eval = FALSE-------------------------------------------------------------
#  # The number of Markov chains used during MCMC sampling in `rstan`.
#  cat_glm_bayes_model$chain
#  
#  # The mean estimated coefficients from the Bayesian GLM model
#  cat_glm_bayes_model$coefficients

## ----eval = FALSE-------------------------------------------------------------
#  cat_glm_bayes_joint_model <- cat_glm_bayes_joint(
#    formula = empyr1 ~ ., # Same as `~ .`
#    cat_init = cat_init,
#    chains = 1, # Default: 4
#    iter = 100, # Default: 2000
#    warmup = 50, # Default: 1000
#    algorithm = "NUTS", # Default: NUTS
#    tau_alpha = 2, # Default: 2
#    tau_gamma = 1 # Default: 1
#  )
#  
#  cat_glm_bayes_joint_model

## ----eval = FALSE-------------------------------------------------------------
#  traceplot(cat_glm_bayes_joint_model)

## ----eval = FALSE-------------------------------------------------------------
#  traceplot(cat_glm_bayes_joint_model, inc_warmup = TRUE)

## ----eval = FALSE-------------------------------------------------------------
#  cat_glm_bayes_joint_predicted_y <- predict(
#    cat_glm_bayes_joint_model,
#    newdata = test_data,
#    type = "response"
#  )
#  
#  cat(
#    "Catlytic `cat_glm_bayes_joint` - Logarithmic Error:",
#    get_mean_logarithmic_error(
#      Y = test_data$empyr1,
#      est_Y = cat_glm_bayes_joint_predicted_y
#    )
#  )

## ----eval = FALSE-------------------------------------------------------------
#  roc_curve <- pROC::roc(test_data$empyr1, cat_glm_bayes_joint_predicted_y)
#  plot(roc_curve, main = "ROC Curve (cat_glm_bayes_joint)", col = "blue", lwd = 2)

## ----eval = FALSE-------------------------------------------------------------
#  names(cat_glm_bayes_joint_model)

## ----eval = FALSE-------------------------------------------------------------
#  # The estimated tau parameter from the MCMC sampling `rstan::sampling`,
#  cat_glm_bayes_joint_model$tau
#  
#  # The mean estimated coefficients from the Bayesian GLM model
#  cat_glm_bayes_joint_model$coefficients

## ----eval = FALSE-------------------------------------------------------------
#  cat_glm_bayes_joint_gibbs_model <- cat_glm_bayes_joint_gibbs(
#    formula = empyr1 ~ ., # Same as `~ .`
#    cat_init = cat_init,
#    iter = 100, # Default: 1000
#    warmup = 50, # Default: 500
#    coefs_iter = 2, # Default: 5
#    tau_0 = 1, # Default: number of predictors / 4
#    tau_alpha = 2, # Default: 2
#    tau_gamma = 1, # Default: 1
#    refresh = TRUE # Default: TRUE
#  )
#  
#  cat_glm_bayes_joint_gibbs_model

## ----eval = FALSE-------------------------------------------------------------
#  traceplot(cat_glm_bayes_joint_gibbs_model)

## ----eval = FALSE-------------------------------------------------------------
#  traceplot(cat_glm_bayes_joint_gibbs_model, pars = c("female", "hsdip", "numchild"))

## ----eval = FALSE-------------------------------------------------------------
#  cat_glm_bayes_joint_gibbs_predicted_y <- predict(
#    cat_glm_bayes_joint_gibbs_model,
#    newdata = test_data,
#    type = "response"
#  )
#  
#  cat(
#    "Catalytic `cat_glm_bayes_joint_gibbs` - Logarithmic Error:",
#    get_mean_logarithmic_error(
#      Y = test_data$empyr1,
#      est_Y = cat_glm_bayes_joint_gibbs_predicted_y
#    )
#  )

## ----eval = FALSE-------------------------------------------------------------
#  roc_curve <- pROC::roc(test_data$empyr1, cat_glm_bayes_joint_gibbs_predicted_y)
#  plot(roc_curve, main = "ROC Curve (cat_glm_bayes_joint_gibbs)", col = "blue", lwd = 2)

## ----eval = FALSE-------------------------------------------------------------
#  names(cat_glm_bayes_joint_gibbs_model)

## ----eval = FALSE-------------------------------------------------------------
#  # The Date and time when sampling ended.
#  cat_glm_bayes_joint_gibbs_model$sys_time
#  
#  # The summary statistics
#  cat_glm_bayes_joint_gibbs_model$inform_df

## ----eval = FALSE-------------------------------------------------------------
#  cat_glm_bayes_joint_tau_lower <- cat_glm_bayes_joint(
#    formula = ~., # Same as `empyr1 ~ .`
#    cat_init = cat_init,
#    binomial_tau_lower = 0.5, # Default: 0.05
#    chains = 1, # Default: 4
#    iter = 100, # Default: 2000
#    warmup = 50 # Default: 1000
#  )
#  
#  cat_glm_bayes_joint_tau_lower

## ----eval = FALSE-------------------------------------------------------------
#  cat_glm_bayes_joint_tau_lower_predicted_y <- predict(
#    cat_glm_bayes_joint_tau_lower,
#    newdata = test_data,
#    type = "response"
#  )
#  
#  cat(
#    "Catalytic `cat_glm_bayes_joint_tau_lower` - Logarithmic Error:",
#    get_mean_logarithmic_error(
#      Y = test_data$empyr1,
#      est_Y = cat_glm_bayes_joint_tau_lower_predicted_y
#    )
#  )

## ----eval = FALSE-------------------------------------------------------------
#  roc_curve <- pROC::roc(test_data$empyr1, cat_glm_bayes_joint_tau_lower_predicted_y)
#  plot(roc_curve, main = "ROC Curve (MLE)", col = "blue", lwd = 2)

## ----eval = FALSE-------------------------------------------------------------
#  cat_glm_bayes_joint_theta <- cat_glm_bayes_joint(
#    formula = ~., # Same as `empyr1 ~ .`
#    cat_init = cat_init,
#    binomial_joint_theta = TRUE, # Default: FALSE
#    chains = 1, # Default: 4
#    iter = 100, # Default: 2000
#    warmup = 50 # Default: 1000
#  )
#  
#  cat_glm_bayes_joint_theta

## ----eval = FALSE-------------------------------------------------------------
#  cat_glm_bayes_joint_theta_predicted_y <- predict(
#    cat_glm_bayes_joint_theta,
#    newdata = test_data,
#    type = "response"
#  )
#  
#  cat(
#    "Catalytic `cat_glm_bayes_joint_theta` - Logarithmic Error:",
#    get_mean_logarithmic_error(
#      Y = test_data$empyr1,
#      est_Y = cat_glm_bayes_joint_theta_predicted_y
#    )
#  )

## ----eval = FALSE-------------------------------------------------------------
#  roc_curve <- pROC::roc(test_data$empyr1, cat_glm_bayes_joint_theta_predicted_y)
#  plot(roc_curve, main = "ROC Curve (cat_glm_bayes_joint_theta)", col = "blue", lwd = 2)

## ----eval = FALSE-------------------------------------------------------------
#  cat_glm_bayes_joint_alpha <- cat_glm_bayes_joint(
#    formula = ~., # Same as `empyr1 ~ .`
#    cat_init = cat_init,
#    binomial_joint_theta = TRUE, # Default: FALSE
#    binomial_joint_alpha = TRUE, # Default: FALSE
#    chains = 1, # Default: 4
#    iter = 100, # Default: 2000
#    warmup = 50 # Default: 1000
#  )
#  
#  cat_glm_bayes_joint_alpha

## ----eval = FALSE-------------------------------------------------------------
#  cat_glm_bayes_joint_alpha_predicted_y <- predict(
#    cat_glm_bayes_joint_alpha,
#    newdata = test_data,
#    type = "response"
#  )
#  
#  cat(
#    "Catalytic `cat_glm_bayes_joint_alpha` - Logarithmic Error:",
#    get_mean_logarithmic_error(
#      Y = test_data$empyr1,
#      est_Y = cat_glm_bayes_joint_alpha_predicted_y
#    )
#  )

## ----eval = FALSE-------------------------------------------------------------
#  roc_curve <- pROC::roc(test_data$empyr1, cat_glm_bayes_joint_alpha_predicted_y)
#  plot(roc_curve, main = "ROC Curve (cat_glm_bayes_joint_alpha)", col = "blue", lwd = 2)
Any scripts or data that you put into this service are public.
catalytic documentation built on April 4, 2025, 5:51 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
catalytic
Tools for Applying Catalytic Priors in Statistical Modeling

inst/doc/catalytic_glm_binomial.R
In catalytic: Tools for Applying Catalytic Priors in Statistical Modeling

Try the catalytic package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

catalytic Tools for Applying Catalytic Priors in Statistical Modeling

inst/doc/catalytic_glm_binomial.R In catalytic: Tools for Applying Catalytic Priors in Statistical Modeling

Try the catalytic package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

catalytic
Tools for Applying Catalytic Priors in Statistical Modeling

inst/doc/catalytic_glm_binomial.R
In catalytic: Tools for Applying Catalytic Priors in Statistical Modeling