catalytic_cox.R
In catalytic: Tools for Applying Catalytic Priors in Statistical Modeling

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.align = "center",
  fig.width = 7,
  fig.height = 6,
  cache = TRUE
)

## ----eval=FALSE---------------------------------------------------------------
#  library(survival)
#  library(catalytic)
#  
#  set.seed(1)
#  
#  # Compute the Partial Likelihood for the Cox Proportional Hazards Model
#  get_cox_partial_likelihood <- function(X,
#                                         time,
#                                         status,
#                                         coefs,
#                                         entry_points) {
#    # Identify the risk set indices for Cox proportional hazards model
#    get_cox_risk_set_idx <- function(time_of_interest,
#                                     entry_vector,
#                                     time_vector,
#                                     status_vector) {
#      time_vector <- as.vector(time_vector)
#      entry_vector <- as.vector(entry_vector)
#      status_vector <- as.vector(status_vector)
#  
#      # Find indices where subjects are at risk at the given time of interest
#      return(which((time_of_interest >= entry_vector) & (
#        (time_vector == time_of_interest & status_vector == 1) | (
#          time_vector + 1e-08 > time_of_interest))))
#    }
#  
#    X <- as.matrix(X)
#    time <- as.vector(time)
#    status <- as.vector(status)
#  
#    pl <- 0
#  
#    # Calculate partial likelihood for each censored observation
#    for (i in which(status == 1)) {
#      risk_set_idx <- get_cox_risk_set_idx(
#        time_of_interest = time[i],
#        entry_vector = entry_points,
#        time_vector = time,
#        status_vector = status
#      )
#  
#      # Compute the linear predictors for the risk set
#      exp_risk_lp <- exp(X[risk_set_idx, , drop = FALSE] %*% coefs)
#  
#      # Update partial likelihood
#      pl <- pl + sum(X[i, , drop = FALSE] * coefs) - log(sum(exp_risk_lp))
#    }
#  
#    return(pl)
#  }
#  
#  # Load pbc dataset for Cox proportional hazards model
#  data("pbc")
#  
#  pbc <- pbc[stats::complete.cases(pbc), 2:20] # Remove NA values and `id` column
#  
#  pbc$trt <- ifelse(pbc$trt == 1, 1, 0) # Convert trt to binary
#  pbc$sex <- ifelse(pbc$sex == "f", 1, 0) # Convert sex to binary
#  pbc$edema2 <- ifelse(pbc$edema == 0.5, 1, 0) # Convert edema2 to binary
#  pbc$edema <- ifelse(pbc$edema == 1, 1, 0) # Convert edema to binary
#  pbc$time <- pbc$time / 365 # Convert time to years
#  pbc$status <- (pbc$status == 2) * 1 # Convert status to binary
#  
#  # Identify columns to be standardized
#  not_standarlized_index <- c(1, 2, 3, 5, 6, 7, 8, 9, 20)
#  
#  # Standardize the columns
#  pbc[, -not_standarlized_index] <- scale(pbc[, -not_standarlized_index])
#  
#  # Seperate observation data into train and test data
#  train_n <- (ncol(pbc) - 2) * 5
#  train_idx <- sample(1:nrow(pbc), train_n)
#  train_data <- pbc[train_idx, ]
#  test_data <- pbc[-train_idx, ]
#  
#  test_x <- test_data[, -which(names(test_data) %in% c("time", "status"))]
#  test_time <- test_data$time
#  test_status <- test_data$status
#  test_entry_points <- rep(0, nrow(test_x))
#  
#  dim(train_data)

## ----eval=FALSE---------------------------------------------------------------
#  # Calculate MPLE (Marginal Partial Likelihood Estimate) prediction score with 0 coefficients
#  MPLE_0 <- get_cox_partial_likelihood(
#    X = test_x,
#    time = test_time,
#    status = test_status,
#    coefs = rep(0, (ncol(test_x))),
#    entry_points = test_entry_points
#  )
#  
#  # Fit a COX regression model
#  cox_model <- survival::coxph(
#    formula = survival::Surv(time, status) ~ .,
#    data = train_data
#  )
#  
#  # Calculate MPLE (Marginal Partial Likelihood Estimate) prediction score of estimated coefficients minus 0 coefficients
#  cat(
#    "MLE COX Model - Predition Score:",
#    get_cox_partial_likelihood(
#      X = test_x,
#      time = test_time,
#      status = test_status,
#      coefs = coef(cox_model),
#      entry_points = test_entry_points
#    ) - MPLE_0
#  )

## ----eval=FALSE---------------------------------------------------------------
#  cat_init <- cat_cox_initialization(
#    formula = survival::Surv(time, status) ~ 1,
#    data = train_data,
#    syn_size = 100, # Default: 4 times the number of predictors
#    hazard_constant = 1, # Default: calculated from observed data
#    entry_points = rep(0, nrow(train_data)), # Default: Null, which is vector of zeros
#    x_degree = NULL, # Default: NULL, which means degrees of all predictors are 1
#    resample_only = FALSE, # Default: FALSE
#    na_replace = mean # Default: stats::na.omit
#  )
#  
#  cat_init

## ----eval=FALSE---------------------------------------------------------------
#  names(cat_init)

## ----eval=FALSE---------------------------------------------------------------
#  # The number of observations (rows) in the original dataset (`obs_data`)
#  cat_init$obs_size
#  
#  # The information detailing the process of resampling synthetic data
#  cat_init$syn_x_resample_inform

## ----eval=FALSE---------------------------------------------------------------
#  cat_cox_model <- cat_cox(
#    formula = survival::Surv(time, status) ~ ., # Same as `~ .`
#    cat_init = cat_init,
#    tau = 10, # Default: number of predictors
#    method = "WME", # Default: CRE
#    init_coefficients = NULL, # Default: all zeros
#    tol = 0.01, # Default: 1e-5
#    max_iter = 5 # Default: 25
#  )
#  
#  cat_cox_model

## ----eval=FALSE---------------------------------------------------------------
#  cat(
#    "Catalytic `cat_cox` - Predition Score:",
#    get_cox_partial_likelihood(
#      X = test_x,
#      time = test_time,
#      status = test_status,
#      coefs = coef(cat_cox_model),
#      entry_points = test_entry_points
#    ) - MPLE_0
#  )

## ----eval=FALSE---------------------------------------------------------------
#  names(cat_cox_model)

## ----eval=FALSE---------------------------------------------------------------
#  # The formula used for modeling
#  cat_cox_model$formula
#  
#  # The fitted model object obtained from `survival::coxph` with `tau`
#  cat_cox_model$coefficients

## ----eval=FALSE---------------------------------------------------------------
#  cat_cox_tune_model <- cat_cox_tune(
#    formula = survival::Surv(time, status) ~ ., # Same as `~ .`
#    cat_init = cat_init,
#    method = "CRE", # Default: "CRE"
#    tau_seq = seq(1, 5, 0.5), # Default is a numeric sequence around the number of predictors
#    cross_validation_fold_num = 3, # Default: 5
#    tol = 1 # Additional arguments passed to the `cat_cox` function
#  )
#  
#  cat_cox_tune_model

## ----eval=FALSE---------------------------------------------------------------
#  plot(cat_cox_tune_model, text_pos = 2, legend_pos = "bottomright")

## ----eval=FALSE---------------------------------------------------------------
#  cat(
#    "Catalytic `cat_cox_tune` - Predition Score:",
#    get_cox_partial_likelihood(
#      X = test_x,
#      time = test_time,
#      status = test_status,
#      coefs = coef(cat_cox_tune_model),
#      entry_points = test_entry_points
#    ) - MPLE_0
#  )

## ----eval=FALSE---------------------------------------------------------------
#  names(cat_cox_tune_model)

## ----eval=FALSE---------------------------------------------------------------
#  # The estimated coefficients from the fitted model
#  cat_cox_tune_model$coefficients
#  
#  # Selected optimal tau value from `tau_seq` that maximize the likelihood
#  cat_cox_tune_model$tau

## ----eval = FALSE-------------------------------------------------------------
#  cat_cox_bayes_model <- cat_cox_bayes(
#    formula = survival::Surv(time, status) ~ ., # Same as `~ .
#    cat_init = cat_init,
#    tau = 1, # Default: NULL
#    chains = 1, # Default: 4
#    iter = 100, # Default: 2000
#    warmup = 50, # Default: 1000
#    hazard_beta = 2 # Default: 2
#  )
#  
#  cat_cox_bayes_model

## ----eval=FALSE---------------------------------------------------------------
#  traceplot(cat_cox_bayes_model)

## ----eval=FALSE---------------------------------------------------------------
#  traceplot(cat_cox_bayes_model, inc_warmup = TRUE)

## ----eval = FALSE-------------------------------------------------------------
#  cat(
#    "Catalytic `cat_cox_bayes` - Predition Score:",
#    get_cox_partial_likelihood(
#      X = test_x,
#      time = test_time,
#      status = test_status,
#      coefs = coef(cat_cox_bayes_model),
#      entry_points = test_entry_points
#    ) - MPLE_0
#  )

## ----eval = FALSE-------------------------------------------------------------
#  names(cat_cox_bayes_model)

## ----eval = FALSE-------------------------------------------------------------
#  # The number of Markov chains used during MCMC sampling in `rstan`.
#  cat_cox_bayes_model$chain
#  
#  # The mean estimated coefficients from the Bayesian model
#  cat_cox_bayes_model$coefficients

## ----eval = FALSE-------------------------------------------------------------
#  cat_cox_bayes_joint_model <- cat_cox_bayes_joint(
#    formula = survival::Surv(time, status) ~ ., # Same as `~ .
#    cat_init = cat_init,
#    chains = 1, # Default: 4
#    iter = 100, # Default: 2000
#    warmup = 50, # Default: 1000
#    tau_alpha = 2, # Default: 2
#    tau_gamma = 1, # Default: 1
#    hazard_beta = 2 # Default: 2
#  )
#  
#  cat_cox_bayes_joint_model

## ----eval = FALSE-------------------------------------------------------------
#  traceplot(cat_cox_bayes_joint_model)

## ----eval = FALSE-------------------------------------------------------------
#  traceplot(cat_cox_bayes_joint_model, inc_warmup = TRUE)

## ----eval = FALSE-------------------------------------------------------------
#  cat(
#    "Catalytic `cat_cox_bayes_joint` - Predition Score:",
#    get_cox_partial_likelihood(
#      X = test_x,
#      time = test_time,
#      status = test_status,
#      coefs = coef(cat_cox_bayes_joint_model),
#      entry_point = test_entry_points
#    ) - MPLE_0
#  )

## ----eval = FALSE-------------------------------------------------------------
#  names(cat_cox_bayes_joint_model)

## ----eval = FALSE-------------------------------------------------------------
#  # The estimated tau parameter from the MCMC sampling `rstan::sampling`,
#  cat_cox_bayes_joint_model$tau
#  
#  # The mean estimated coefficients from the Bayesian model
#  cat_cox_bayes_joint_model$coefficients

Any scripts or data that you put into this service are public.

catalytic documentation built on April 4, 2025, 5:51 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

catalytic
Tools for Applying Catalytic Priors in Statistical Modeling

inst/doc/catalytic_cox.R
In catalytic: Tools for Applying Catalytic Priors in Statistical Modeling

Try the catalytic package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

catalytic Tools for Applying Catalytic Priors in Statistical Modeling

inst/doc/catalytic_cox.R In catalytic: Tools for Applying Catalytic Priors in Statistical Modeling

Try the catalytic package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

catalytic
Tools for Applying Catalytic Priors in Statistical Modeling

inst/doc/catalytic_cox.R
In catalytic: Tools for Applying Catalytic Priors in Statistical Modeling