Nothing
###############################################################################
## Exercise 28 (Modelling cause-specific mortality)
## Flexible parametric models (fpm) implemented in Stata (stpm2) and R (rstpm2)
## One difference: Restricted cubic splines
## using truncated power basis (stpm2, Stata)
## using B-spline basis (rstpm2, R)
## R codes suggested by Xing-Rong Liu and Mark Clements (2016-03-06)
## For more detail interpretation, please see solutions_biostat3_2016.pdf file
###############################################################################
## @knitr loadDependencies
library(biostat3)
library(rstpm2) # for the flexible parametric model
library(ggplot2)
library(tinyplot)
## @knitr loadPreprocess
## Extract subset and create 0/1 outcome variables
melanoma0 <- biostat3::melanoma |> subset(stage=="Localised") |>
transform(event = ifelse(status=="Dead: cancer" & surv_mm<120, 1, 0),
time = pmin(120, surv_mm)/12,
agegrp1 = (agegrp=="0-44")+0, # used by time-dependent effect
agegrp2 = (agegrp=="45-59")+0, # used by time-dependent effect
agegrp3 = (agegrp=="60-74")+0, # used by time-dependent effect
agegrp4 = (agegrp=="75+")+0) # used by time-dependent effect
agegrps <- levels(melanoma0$agegrp)
## @knitr a_flex
## (a) Flexible parametric model with df=4
fpma <- stpm2(Surv(time,event) ~ year8594, data=melanoma0, df=4)
summary(fpma)
eform(fpma)["year8594Diagnosed 85-94",]
## @knitr a_cox
cox <- coxph(Surv(time, event) ~ year8594,
data=melanoma0) # year8594 is a categorical variable
summary(cox)
## @knitr b_surv
## (b) Prediction and plots of survival and hazard by calendar period
years <- levels(melanoma0$year8594)
s <- predict(fpma,newdata=data.frame(year8594=years),grid=TRUE,full=TRUE,se.fit=TRUE,
type="surv")
head(s)
ggplot(s, aes(x=time,y=Estimate,fill=year8594,ymin=lower,ymax=upper)) +
xlab("Time since diagnosis (years)") +
ylab("Survival") +
geom_ribbon(alpha=0.6) +
geom_line()
## @knitr b_haz
predict(fpma,newdata=data.frame(year8594=years),grid=TRUE,full=TRUE,se.fit=TRUE,
type="hazard") |>
with(plt(Estimate~time|year8594, ymin=lower,ymax=upper,
xlab="Time since diagnosis (years)",
ylab="Survival",
type="ribbon"))
## @knitr c_haz_log
## (c) hazards on log scale, adding log="y"
predict(fpma,newdata=data.frame(year8594=years),grid=TRUE,full=TRUE,se.fit=TRUE,
type="hazard") |>
with(plt(Estimate~time|year8594, ymin=lower,ymax=upper,
xlab="Time since diagnosis (years)",
log="y",
ylab="Survival",
type="ribbon"))
## @knitr d_AIC_BIC
## utility function to row bind from a list
lapply(1:6, function(i) {
fitaic <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=i)
data.frame(
i,
AIC=AIC(fitaic),
BIC=BIC(fitaic),
beta=as.numeric(coef(fitaic)[2]),
se=coef(summary(fitaic))[2,2])
}) |> do.call(what=rbind)
## @knitr e_base_surv
## Baseline survival
fitaic0 <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=6)
plot(fitaic0,newdata=data.frame(year8594=years[1]), lty=6, ci=FALSE,
xlab="Time since diagnosis (years)")
for (i in 1:5 ) {
fitaic <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=i)
lines(fitaic,newdata=data.frame(year8594=years[1]), lty=i)
}
legend("topright", legend=paste0("df=",1:6), lty=1:6)
## @knitr e_base_haz
## Baseline hazard
fitaic1 <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=6)
plot(fitaic1,newdata=data.frame(year8594=years[1]), lty=6, type="haz",
ci=FALSE, xlab="Time since diagnosis (years)", ylab="Hazard")
for (i in 1:5 ) {
fitaic <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=i)
lines(fitaic,type="haz",newdata=data.frame(year8594=years[1]), lty=i)
}
legend("topright", legend=paste0("df=",1:6), lty=1:6)
## @knitr f_sex_age
fpmf <- stpm2(Surv(time, event) ~ sex + year8594 + agegrp,
data=melanoma0, df=4)
summary(fpmf)
eform(fpmf)[2:6,]
## To test the overall effect of age with LR test
fpmf2 <- stpm2(Surv(time,event) ~ sex + year8594, data=melanoma0, df=4)
anova(fpmf, fpmf2)
## @knitr g_sex_age
summary(fit <- coxph(Surv(time, event) ~ sex + year8594 + agegrp,
data=melanoma0))
anova(fit)
## @knitr h_time_varying
## (h) Change to time-varying effect of agegrp2:4
## NB: including main effect of agegrp
fpmh <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
data=melanoma0, tvc=list(agegrp2 = 2, agegrp3 = 2, agegrp4 = 2),
df=4)
summary(fpmh)
## LR test comparing fpmh (non-PH for agegrp2:4) with fpmf(PH for agegrp2:4)
anova(fpmh, fpmf)
## @knitr h_time_varying_penalised
## I investigated the non-proportional effect of age with penalized models
## here, sp is the optimal smoothing parameters estimated from models without sp argument
pfit0 <- pstpm2(Surv(time,event) ~ sex + year8594 + agegrp,
data=melanoma0, sp=0.1359685)
## The time-dependent effects including linear forms of age groups
pfit1 <- pstpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
tvc=list(agegrp2=7,agegrp3=7,agegrp4=7),
data=melanoma0, sp=c( 0.1429949, 1.6133966, 1.3183117, 1.9958815))
anova(pfit1, pfit0)## the results also suggest there is strong evidence
## @knitr i_plot_base_haz
## Plot of baseline hazard with fpmh
newdata1 <- data.frame(sex="Male",year8594="Diagnosed 75-84",
agegrp2=0, agegrp3=0, agegrp4=0)
plot(fpmh, newdata=newdata1, xlab="Time since diagnosis (years)",
type="haz")
## @knitr j_age_HR
plot(fpmh, newdata=newdata1, xlab="Time since diagnosis (years)",
type="hr",var="agegrp2", ci=FALSE, ylim=c(0,6))
lines(fpmh, newdata=newdata1, type="hr", var="agegrp3", lty=2)
lines(fpmh, newdata=newdata1, type="hr", var="agegrp4", lty=3)
legend("topright", legend=paste0(agegrps[-1]," vs 0-44"), lty=1:3)
## @knitr j_oldest
plot(fpmh, newdata=newdata1,
type="hr", log="y",
exposed=function(data) transform(data,agegrp4=agegrp4+1), # same as var="agegrp4"
xlab="Time since diagnosis (years)")
## @knitr j_age_HR_ggplot
lapply(2:4, function(i)
predict(fpmh, newdata=newdata1, type="hr",var=paste0("agegrp",i),
grid=TRUE, se.fit=TRUE, full=TRUE) |>
transform(ageGroup=paste0(agegrps[i]," vs 0-44"))) |>
do.call(what=rbind) |>
ggplot(aes(x=time,y=Estimate,fill=ageGroup,ymin=lower,ymax=upper)) +
geom_ribbon(alpha=0.3) +
geom_line() +
ylim(0,8) +
xlab("Time since diagnosis (years)") +
ylab("Hazard ratio") +
labs(fill="Age group")
## @knitr k_haz_diff
plot(fpmh,newdata=newdata1,
type="hdiff", var="agegrp4",
xlab="Time since diagnosis (years)")
## @knitr l_surv_diff
plot(fpmh,newdata=newdata1,
type="sdiff", var="agegrp4",
xlab="Time since diagnosis (years)")
## @knitr m_time_dep_eff
lapply(1:3, function(i) {
fitdf <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
data=melanoma0, tvc=list(agegrp2 = i, agegrp3 = i, agegrp4 = i),
df=4)
data.frame(i,
aic=AIC(fitdf),
bic=BIC(fitdf))
}) |> do.call(what=rbind)
## Plots with different df
fitdf1 <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
data=melanoma0,
tvc = list(agegrp2 = 1, agegrp3 = 1, agegrp4 = 1), df=4)
plot(fitdf1, newdata = newdata1,
type="hr", ci=FALSE, log="y", var="agegrp4",
xlab="Time since diagnosis (years)")
for (i in 2:3 ) {
fitdf <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
data=melanoma0, tvc=list(agegrp2 = i, agegrp3 = i, agegrp4 = i),
df=4)
lines(fitdf, newdata=newdata1,
type="hr", lty=i,
var="agegrp4")
}
legend("topright", legend=c("1 df", "2 df", "3 df"), lty=1:3)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.