Stepwise Selection for Fine-Gray Competing Risks Models"

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)

Introduction

The crrstep package provides stepwise variable selection for the Fine-Gray proportional subdistribution hazards model for competing risks data. This vignette demonstrates how to use the package with both simple and complex formula specifications.

Installation

install.packages('crrstep')
library(crrstep)
library(cmprsk)

Simulated Data

set.seed(123)
n <- 400
age <- rnorm(n, mean = 60, sd = 10)
sex <- factor(sample(c('Male', 'Female'), n, replace = TRUE))
treatment <- factor(sample(c('Control', 'Treatment'), n, replace = TRUE))
biomarker <- rnorm(n, mean = 100, sd = 20)
linpred <- 0.03 * (age - 60) + 0.5 * (sex == 'Male') - 0.4 * (treatment == 'Treatment') + 0.01 * biomarker
base_hazard <- 0.1
true_time <- rexp(n, rate = base_hazard * exp(linpred))
p_cause1 <- plogis(linpred)
cause <- ifelse(runif(n) < p_cause1, 1, 2)
censor_time <- rexp(n, rate = 0.05)
ftime <- pmin(true_time, censor_time)
fstatus <- ifelse(ftime == censor_time, 0, cause)
sim_data <- data.frame(ftime, fstatus, age, sex, treatment, biomarker)
table(sim_data$fstatus)

Backward Selection with AIC

result_back <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
print(result_back)

Forward Selection with AIC

result_fwd <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'forward',
  criterion = 'AIC',
  trace = TRUE
)
print(result_fwd)

Different Selection Criteria

result_bic <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'BIC',
  trace = FALSE
)
cat('AIC selected variables:', nrow(result_back$coefficients), '\\n')
cat('BIC selected variables:', nrow(result_bic$coefficients), '\\n')

Forcing Variables with scope.min

result_min <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~ age + sex,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
print(result_min)

Factor Variables

set.seed(456)
sim_data$stage <- factor(sample(c('I', 'II', 'III', 'IV'), n, replace = TRUE))
sim_data$region <- factor(sample(c('North', 'South', 'East', 'West'), n, replace = TRUE))
result_factors <- crrstep(
  formula = ftime ~ age + sex + stage + region,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'BIC',
  trace = TRUE
)
summary(result_factors)
coef(result_factors)
logLik(result_factors)
BIC(result_factors)

Interaction Terms

result_interact <- crrstep(
  formula = ftime ~ age + sex + treatment + age:sex + sex:treatment,
  scope.min = ~ age + sex,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
print(result_interact)

Polynomial Terms

sim_data$biomarker_c <- scale(sim_data$biomarker, scale = FALSE)[,1]
result_poly <- crrstep(
  formula = ftime ~ age + sex + biomarker_c + I(biomarker_c^2),
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
print(result_poly)

Polynomial-Factor Interactions

result_complex <- crrstep(
  formula = ftime ~ age + treatment + biomarker_c + I(biomarker_c^2) + biomarker_c:treatment + I(biomarker_c^2):treatment,
  scope.min = ~ age,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'BIC',
  trace = TRUE
)
print(result_complex)

Log Transformations

sim_data$biomarker_pos <- sim_data$biomarker - min(sim_data$biomarker) + 1
result_log <- crrstep(
  formula = ftime ~ age + sex + log(biomarker_pos) + log(biomarker_pos):sex,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
print(result_log)

Three-Way Interactions

result_threeway <- crrstep(
  formula = ftime ~ age + sex + treatment + stage + sex:treatment + sex:stage + treatment:stage + sex:treatment:stage,
  scope.min = ~ age,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'BIC',
  trace = TRUE
)
print(result_threeway)

Full CRR Object

fit_full <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = FALSE,
  crr.object = TRUE
)
cat('Coefficients:\\n')
print(fit_full$coef)
cat('\\nLog-likelihood:\\n')
print(fit_full$loglik)

Missing Data

sim_data_miss <- sim_data
sim_data_miss$age[sample(1:n, 20)] <- NA
sim_data_miss$biomarker[sample(1:n, 15)] <- NA
result_miss <- crrstep(
  formula = ftime ~ age + sex + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  data = sim_data_miss,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = TRUE
)
print(result_miss)

Using Subset

result_subset <- crrstep(
  formula = ftime ~ age + treatment + biomarker,
  scope.min = ~1,
  etype = fstatus,
  subset = sim_data$sex == 'Female',
  data = sim_data,
  failcode = 1,
  direction = 'backward',
  criterion = 'AIC',
  trace = FALSE
)
print(result_subset)

Best Practices

  1. Center continuous variables when using polynomial terms
  2. Include main effects in scope.min when testing interactions
  3. Use BIC or BICcr for more parsimonious models
  4. Check for multicollinearity before modeling

Troubleshooting

Convergence issues: Center/scale variables, remove correlated variables Singular matrix errors: Remove redundant variables, combine sparse factor levels

References

Kuk, D. and Varadhan, R. (2013). Model selection in competing risks regression. Statistics in Medicine.

Fine, J.P. and Gray, R.J. (1999). A proportional hazards model for the subdistribution of a competing risk. Journal of the American Statistical Association, 94(446), 496-509.

Session Info

sessionInfo()


Try the crrstep package in your browser

Any scripts or data that you put into this service are public.

crrstep documentation built on Jan. 15, 2026, 1:06 a.m.