# TODO:
# 1. sort out Weibull parameterization in other code file
# 2. consider adding simulation of discrete time survival analysis
# 3. Missing data!
# show that the beta values make it harder to recover
# Try the following:
# Beta[] <- c(-3, -1), c(-4, -1), and c(-5, -1)
# Look at the N and the number of events in summary(mod1)
# Estimation is wrong when the number of events is almost equal to N
# Resources:
# I think John Fox is great: https://socialsciences.mcmaster.ca/jfox/Books/Companion/appendices/Appendix-Cox-Regression.pdf
# Very understandable, ties it into the R output
# Full textbook: http://sistemas.fciencias.unam.mx/~ediaz/Cursos/Estadistica3/Libros/0a9X.pdf
# This is a decent reference
# As far as R resources, I think this is a great series:
# 1. http://www.sthda.com/english/wiki/survival-analysis-basics
# 2. http://www.sthda.com/english/wiki/cox-proportional-hazards-model
# 3. http://www.sthda.com/english/wiki/cox-model-assumptions
# This makes it super easy to just mash a few buttons and get output
#---
rm(list = ls())
gc()
N = 1e4 #this should be divisible by however many groups you use!
number.groups <- 2
number.timepoints <- 1
set.seed(10182021)
dat <- data.frame('USUBJID' = rep(paste0('Subject_', formatC(1:N, width = 4, flag = '0')), length.out= N*number.timepoints),
'Group' = rep(paste0('Group_', 1:number.groups), length.out = N*number.timepoints),
stringsAsFactors=F)
# Create Beta parameters for these design matrix:
X <- model.matrix( ~ Group , data = dat)
# Create Beta
Beta <- matrix(0, nrow = ncol(X), dimnames=list(colnames(X), 'param'))
Beta[] <- c(-4, -1)
# Matrix multiply:
XB <- X %*% Beta
# Hazard Ratio
ht <- exp(XB)
range(ht)
# Data mgmt
dat$ht <- as.vector(ht)
dat$time <- NA
dat$status <- NA
# Hazard Function, with density function as follows:
pdf_surv <- function(tt, ht) ht*exp(-1*ht*tt)
# tt is the time
# ht is the subject-specific hazard ratio
# https://socialsciences.mcmaster.ca/jfox/Books/Companion/appendices/Appendix-Cox-Regression.pdf
# Probability of event over 6 months, 180 days:
tt <- 1:180
i <- 1
# Loop over subjects
for (i in 1:nrow(dat)) {
pp <- pdf_surv(tt = tt, ht = dat$ht[i]) # prob for subject i
pp <- cumsum(pp) # it's a cumulative process
out <- runif(n = 1) < pp # random draw
# Censored:
if (sum(out) == 0) {
dat$status[i] <- 1 # censored
dat$time[i] <- tail(tt, 1) # time of censoring
}
# Dead:
if (sum(out) > 0) {
dat$status[i] <- 2 # dead
dat$time[i] <- tt[which(cumsum(out) == 1)] #survival time in months
}
}# end loop over i subjects
#dat1 <- dat[dat$Group == 'Group_1', ]
#xtabs(~ status + Group + time, data = dat)
#-------------------
# Analyze - confirm can recover gen param
library(survival)
fit <- survfit(Surv(time, status) ~ Group, data = dat)
print(fit)
library(ggplot2)
library(survminer)
ggsurvplot(fit,
pval = TRUE, conf.int = TRUE,
risk.table = TRUE, # Add risk table
risk.table.col = "strata", # Change risk table color by groups
linetype = "strata", # Change line type by groups
surv.median.line = "hv", # Specify median survival
ggtheme = theme_bw()) # Change ggplot2 theme
#palette = c("#E7B800", "#2E9FDF"))
#mod0 <- coxph(Surv(time, status) ~ 1, data = dat)
mod1 <- coxph(Surv(time, status) ~ Group, data = dat)
#anova(mod0, mod1, test = 'LRT')
summary(mod1)
tmp <- data.frame(fit$time, fit$n.risk, fit$n.event, fit$n.censor, fit$surv)
tmp$p <- tmp$fit.n.event/tmp$fit.n.risk
tmp1 <- tmp[tt,]
tmp2 <- tmp[(tt+tail(tt,1)),]
mean(tmp2$p/tmp1$p)
median(tmp2$p/tmp1$p)
exp(-1)
#---- Proportional hazards ratio holds? Test it
test.ph <- cox.zph(mod1)
test.ph
ggcoxzph(test.ph)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.