Nothing
#' Copula regression models with Cox margins for semi-competing risk data under right censoring cases
#'
#' @description Fits a copula model with Cox margins for semi-competing risk data under right censoring cases.
#'
#' @name rc_scmprisk_Cox
#' @aliases rc_scmprisk_Cox
#' @param data a data frame. Each row is a single individual with single observation. Must have \code{time1} (time for non-terminal event),
#' \code{time2} (time for terminal event), \code{status1} (indicator for non-terminal event, 0 if exactly observe, 1 if censored by terminal event, end of cohort or loss to follow-up),
#' \code{status2} (indicator for terminal event, 0 if exactly observe, 1 if censored by end of cohort or loss to follow-up),
#' and \code{covariates} by column.
#' @param var_list List of covariates to be included in the model.
#' @param copula_type The type of copula to use (e.g., "Clayton", "Gumbel").
#' @param l1 Lower bound for Bernstein polynomial for event 1 (default is 0).
#' @param l2 Lower bound for Bernstein polynomial for event 2 (default is 0).
#' @param m1 Order of Bernstein polynomial for event 1 (default is 3).
#' @param m2 Order of Bernstein polynomial for event 2 (default is 3).
#' @param u1 Upper bound for Bernstein polynomial for event 1 (default is max(obs_time) + 1).
#' @param u2 Upper bound for Bernstein polynomial for event 2 (default is max(obs_time) + 1).
#' @param method1a1 Optimization method for step 1a1 (default is "BFGS").
#' @param method1a2 Optimization method for step 1a2 (default is "BFGS").
#' @param method1b Optimization method for step 1b (default is "BFGS").
#' @param method2 Optimization method for step 2 (default is "BFGS").
#' @param stepsize Maximum number of iterations for optimization steps (default is 10000).
#' @param initial_par1a1 initial value for parameter when estimating in step 1a_1.
#' @param initial_par1a2 initial value for parameter when estimating in step 1a_2.
#' @param eta_ini_ini a vector of initial values for copula parameters, default is 2
#' @importFrom corpcor pseudoinverse
#' @importFrom survival coxph
#' @importFrom stats pchisq
#' @importFrom stats optim
#' @export
#'
#' @source
#' Tao Sun, Yunlong Li, Zhengyan Xiao, Ying Ding, Xiaojun Wang (2022).
#' Semiparametric copula method for semi-competing risks data
#' subject to interval censoring and left truncation:
#' Application to disability in elderly. Statistical Methods in Medical Research (Accepted).
#'
#' @source
#' Tao Sun, Weijie Liang, Gongzi Zhang, Danhui Yi, Ying Ding, Lihai Zhang (2024).
#' Penalised semi-parametric copula method for semi-competing risks data:
#' Application to hip fracture in elderly. Journal of the Royal Statistical Society Series C: Applied Statistics (Accepted)
#'
#' @details The input data must be a data frame. Each row is a single individual with single observation. Must have \code{time1} (time for non-terminal event),
#' \code{time2} (time for terminal event), \code{status1} (indicator for non-terminal event, 0 if exactly observe, 1 if censored by terminal event, end of cohort or loss to follow-up),
#' \code{status2} (indicator for terminal event, 0 if exactly observe, 1 if censored by end of cohort or loss to follow-up),
#' and \code{covariates} by column. \cr
#'
#'
#' The supported copula model in this version is \code{"Clayton"} and \code{"Gumbel"}.
#' The parametric generator functions of copula functions are list below:
#'
#' The Clayton copula has a generator \deqn{\phi_{\eta}(t) = (1+t)^{-1/\eta},}
#' with \eqn{\eta > 0} and Kendall's \eqn{\tau = \eta/(2+\eta)}. \cr
#'
#' The Gumbel copula has a generator \deqn{\phi_{\eta}(t) = \exp(-t^{1/\eta}),}
#' with \eqn{\eta \ge 1} and Kendall's \eqn{\tau = 1-1/\eta}. \cr
#'
#'
#' The marginal Cox semiparametric models are built based on Bernstein polynomials, which is formulated below:
#'
#' \deqn{S(t|Z) = \exp[-\Lambda(t) e^{Z^{\top}\beta}],} where \eqn{t} is time, \eqn{Z} is covariate,
#' \eqn{\beta} is the coefficient vector, and \eqn{\Lambda(t)} is the cumulative baseline hazard function.
#' We approximate \eqn{\Lambda(t)} in a sieve space constructed by Bernstein polynomials with degree \eqn{m}. By default, \eqn{m=3}.
#' In the end, all model parameters are estimated by the sieve estimators (Sun and Ding, In Press).
#'
#' The Cox proportional hazards model assumes that the hazard ratio is constant over time, with the form
#' \eqn{h(t|Z) = h_0(t) e^{Z^{\top}\beta}}, where \eqn{h_0(t)} is the baseline hazard function and \eqn{Z} is the covariate vector.
#' This formulation is flexible and allows \eqn{\Lambda(t)} to be approximated using Bernstein polynomials, which can be tuned by the degree parameter \code{m}.
#' The degree of Bernstein polynomial \code{m} can be selected based on the AIC value to provide an optimal balance between model complexity and fit. \cr
#'
#' Optimization methods can be all methods (except \code{"Brent"}) from \code{optim}, such as
#' \code{"Nelder-Mead"}, \code{"BFGS"}, \code{"CG"}, \code{"L-BFGS-B"}, \code{"SANN"}.
#'
#' @return a \code{CopulaCenR} object summarizing the model.
#' Can be used as an input to general \code{S3} methods including
#' \code{summary}, \code{print}, \code{coef},
#' \code{logLik}, \code{AIC}.
#' @examples
#' # fit a rc_Clayton_Cox model
#' data("data_scmprisk_rc")
#' Clayton_rc <- rc_scmprisk_Cox(data_scmprisk_rc,var_list = c("x1","x2","x3"), copula = "Clayton",
#' l1=0, m1 = 3,
#' l2=0, m2 = 3, eta_ini_ini = 2,method1a1="Nelder-Mead",method1a2="Nelder-Mead"
#' )
#' Clayton_rc
rc_scmprisk_Cox <- function(data, var_list, copula_type = "Clayton", l1 = 0, l2 = 0, m1 = 3, m2 = 3, u1 = NULL, u2 = NULL, eta_ini_ini = 2,
method1a1 = "BFGS", method1a2 = "BFGS", method1b = "BFGS", method2 = "BFGS", stepsize = 10000, initial_par1a1 = NULL, initial_par1a2 = NULL) {
# Data pre-processing
data <- data[, c("time1", "event1", "time2", "event2", var_list)]
# Create datasets for both events
tmp1 <- data[, c("time1", "event1", var_list)]
tmp2 <- data[, c("time2", "event2", var_list)]
tmp1$id <- 1:nrow(tmp1)
tmp1$ind <- 1
tmp2$id <- 1:nrow(tmp2)
tmp2$ind <- 2
colnames(tmp1) <- colnames(tmp2) <- c("obs_time", "status", var_list, "id", "ind")
data <- rbind(tmp1, tmp2)
data <- data[order(data$id, data$ind), ]
data <- data[, c("id", "ind", "obs_time", "status", var_list)]
# Convert data to list format
data_2 <- data_preprocess_rc(data, var_list)
indata1 <- data_2$indata1
indata2 <- data_2$indata2
# Generate dummy variables
tmp1 <- get_covariates_rc(indata1, var_list)
tmp2 <- get_covariates_rc(indata2, var_list)
x1 <- as.matrix(tmp1$x, nrow = data_2$n)
x2 <- as.matrix(tmp2$x, nrow = data_2$n)
n <- data_2$n
# Extract new variable list
var_list <- tmp1$var_list
p1 <- dim(x1)[2]
p2 <- dim(x2)[2]
# Bernstein polynomial setting
if (is.null(u1)) u1 <- max(indata1$obs_time) + 1
if (is.null(u2)) u2 <- max(indata2$obs_time) + 1
# Generate Bernstein polynomial parameters
data_processed1 <- data_process_scmprisk_rc_sp(indata1, var_list, l = l1, u = u1, m = m1)
data_processed2 <- data_process_scmprisk_rc_sp(indata2, var_list, l = l2, u = u2, m = m2)
b1 <- data_processed1$b
b2 <- data_processed2$b
b1_d <- data_processed1$b_d
b2_d <- data_processed2$b_d
# Initial values
phi1_ini_ini <- rep(0, m1 + 1)
phi2_ini_ini <- rep(0, m2 + 1)
copula <- copula_type
# Initial beta values from marginal Cox models
fit_event1 <- coxph(as.formula(paste0("Surv(obs_time, status)~", paste0(var_list, collapse = "+"))), data = indata1)
fit_event2 <- coxph(as.formula(paste0("Surv(obs_time, status)~", paste0(var_list, collapse = "+"))), data = indata2)
beta1_ini_ini <- fit_event1$coefficients
beta2_ini_ini <- fit_event2$coefficients
#eta_ini_ini <- 2
initial_ini <- c(phi1_ini_ini, phi2_ini_ini, beta1_ini_ini, beta2_ini_ini, eta_ini_ini)
# MLE Process
phi1_0 <- initial_ini[1:(m1 + 1)]
phi2_0 <- initial_ini[(m1 + 2):(m1 + 2 + m2)]
beta1_0 <- initial_ini[(m1 + 3 + m2):(m1 + 2 + m2 + ncol(x1))]
beta2_0 <- initial_ini[(m1 + 3 + m2 + ncol(x1)):(m1 + 2 + m2 + ncol(x1) + ncol(x2))]
eta_0 <- initial_ini[length(initial_ini)]
# Step 1a
if (is.null(initial_par1a1)) {
initial_par1a1 <- c(phi1_0, beta1_0)
}
fit1_a_1 <- optim(par = initial_par1a1, bern_base1_log_lik_rc,
x1 = x1, indata1 = indata1, b1 = b1, b1_d = b1_d, m1 = m1,
method = method1a1, control = list(maxit = stepsize), hessian = FALSE)
phi1_a_1 <- fit1_a_1$par[1:(m1 + 1)]
beta1_a_1 <- fit1_a_1$par[(m1 + 2):(m1 + 1 + length(beta1_0))]
if (is.null(initial_par1a2)) {
initial_par1a2 <- c(phi2_0, beta2_0)
}
fit1_a_2 <- optim(par = initial_par1a2, bern_base2_log_lik_rc,
x2 = x2, indata2 = indata2, b2 = b2, b2_d = b2_d, m2 = m2,
method = method1a2, control = list(maxit = stepsize), hessian = FALSE)
phi1_a_2 <- fit1_a_2$par[1:(m2 + 1)]
beta1_a_2 <- fit1_a_2$par[(m2 + 2):(m2 + 1 + length(beta2_0))]
# Step 1b
fit1_b <- optim(par = c(log(eta_0), phi1_a_1, beta1_a_1), bern_copula_pseudo_log_lik_rc,
fitted = c(phi1_a_2, beta1_a_2),
x1 = x1, x2 = x2, indata1 = indata1, indata2 = indata2,
b1 = b1, b2 = b2, b1_d = b1_d, b2_d = b2_d, m1 = m1, m2 = m2, copula = copula,
method = method1b, control = list(maxit = stepsize), hessian = FALSE)
eta1_b <- exp(fit1_b$par[1])
phi1_b_1 <- fit1_b$par[2:(m1 + 2)]
beta1_b_1 <- fit1_b$par[(m1 + 3):(m1 + 2 + length(beta1_a_1))]
# Step 2
fit2 <- optim(par = c(log(eta1_b), phi1_b_1, beta1_b_1, phi1_a_2, beta1_a_2), bern_copula_log_lik_rc,
x1 = x1, x2 = x2, indata1 = indata1, indata2 = indata2,
b1 = b1, b2 = b2, b1_d = b1_d, b2_d = b2_d, m1 = m1, m2 = m2, beta1_b_1 = beta1_b_1, copula = copula,
method = method2, control = list(maxit = stepsize), hessian = TRUE)
# Extract results
inv_info <- pseudoinverse(fit2$hessian)
dih <- diag(inv_info)
dih[dih < 0] <- 0
se <- sqrt(dih)
eta <- exp(fit2$par[1])
code <- fit2$convergence
# Calculate tau based on copula type
if (copula_type == "Clayton") {
tau <- eta / (eta + 2)
} else if (copula_type == "Gumbel") {
tau <- 1 - 1 / eta
} else {
stop("Unsupported copula type. Please use 'Clayton' or 'Gumbel'.")
}
beta <- fit2$par[2:length(fit2$par)]
theta <- c(eta, beta)
llk <- -1 * fit2$value
AIC <- 2 * length(theta) - 2 * llk
stat <- (theta - 0)^2 / se^2
pvalue <- pchisq(stat, 1, lower.tail = FALSE)
summary_results <- cbind(theta, se, stat, pvalue)
summary_results <- summary_results[c(1,((m1+1+2):(m1+1+2+length(var_list)-1)),((1+m1+1+length(var_list)+m2+2):(1+m1+1+length(var_list)+m2+2+length(var_list)-1))),]
# Create CopulaCenR object
result <- list(
code = code,
eta = eta,
tau = tau,
beta = beta,
logLik = llk,
AIC = AIC,
summary = summary_results,
call = match.call()
)
# Assign the CopulaCenR class
class(result) <- "CopulaCenR"
# Return the object
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.