inst/doc/q28.R

###############################################################################
## Exercise 28 (Modelling cause-specific mortality)
## Flexible parametric models (fpm) implemented in Stata (stpm2) and R (rstpm2)
## One difference: Restricted cubic splines
##                 using truncated power basis (stpm2, Stata)
##                 using B-spline basis (rstpm2, R)
## R codes suggested by Xing-Rong Liu and Mark Clements (2016-03-06)
## For more detail interpretation, please see solutions_biostat3_2016.pdf file
###############################################################################

## @knitr loadDependencies
library(biostat3)  
library(rstpm2)  # for the flexible parametric model
library(ggplot2)
library(tinyplot)

## @knitr loadPreprocess

## Extract subset and create 0/1 outcome variables
melanoma0 <- biostat3::melanoma |> subset(stage=="Localised") |>
             transform(event = ifelse(status=="Dead: cancer" & surv_mm<120, 1, 0),
                       time = pmin(120, surv_mm)/12,
                       agegrp1 = (agegrp=="0-44")+0,  # used by time-dependent effect
                       agegrp2 = (agegrp=="45-59")+0, # used by time-dependent effect
                       agegrp3 = (agegrp=="60-74")+0, # used by time-dependent effect
                       agegrp4 = (agegrp=="75+")+0)   # used by time-dependent effect
agegrps <- levels(melanoma0$agegrp)
    
## @knitr a_flex
## (a) Flexible parametric model with df=4
fpma <- stpm2(Surv(time,event) ~ year8594, data=melanoma0, df=4)
summary(fpma)
eform(fpma)["year8594Diagnosed 85-94",]

## @knitr a_cox
cox <- coxph(Surv(time, event) ~ year8594,
             data=melanoma0) # year8594 is a categorical variable
summary(cox)

## @knitr b_surv
## (b) Prediction and plots of survival and hazard by calendar period
years <- levels(melanoma0$year8594)

s <- predict(fpma,newdata=data.frame(year8594=years),grid=TRUE,full=TRUE,se.fit=TRUE,
             type="surv")
head(s)
ggplot(s, aes(x=time,y=Estimate,fill=year8594,ymin=lower,ymax=upper)) +
    xlab("Time since diagnosis (years)") +
    ylab("Survival") +
    geom_ribbon(alpha=0.6) +
    geom_line()

## @knitr b_haz
predict(fpma,newdata=data.frame(year8594=years),grid=TRUE,full=TRUE,se.fit=TRUE,
        type="hazard") |>
    with(plt(Estimate~time|year8594, ymin=lower,ymax=upper,
             xlab="Time since diagnosis (years)",
             ylab="Survival",
             type="ribbon"))

## @knitr c_haz_log
## (c) hazards on log scale, adding log="y"
predict(fpma,newdata=data.frame(year8594=years),grid=TRUE,full=TRUE,se.fit=TRUE,
        type="hazard") |>
    with(plt(Estimate~time|year8594, ymin=lower,ymax=upper,
             xlab="Time since diagnosis (years)",
             log="y",
             ylab="Survival",
             type="ribbon"))

## @knitr d_AIC_BIC
## utility function to row bind from a list
lapply(1:6, function(i) {
    fitaic <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=i)
    data.frame(
        i,
        AIC=AIC(fitaic),
        BIC=BIC(fitaic),
        beta=as.numeric(coef(fitaic)[2]),
        se=coef(summary(fitaic))[2,2])
}) |> do.call(what=rbind)

## @knitr e_base_surv
## Baseline survival
fitaic0 <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=6)
plot(fitaic0,newdata=data.frame(year8594=years[1]), lty=6, ci=FALSE,
     xlab="Time since diagnosis (years)")
for (i in 1:5 ) {
  fitaic <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=i)
  lines(fitaic,newdata=data.frame(year8594=years[1]), lty=i)
}
legend("topright", legend=paste0("df=",1:6), lty=1:6)

## @knitr e_base_haz
## Baseline hazard
fitaic1 <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=6)
plot(fitaic1,newdata=data.frame(year8594=years[1]), lty=6, type="haz",
     ci=FALSE, xlab="Time since diagnosis (years)", ylab="Hazard")
for (i in 1:5 ) {
  fitaic <- stpm2(Surv(time, event) ~ year8594, data=melanoma0, df=i)
  lines(fitaic,type="haz",newdata=data.frame(year8594=years[1]), lty=i)
}
legend("topright", legend=paste0("df=",1:6), lty=1:6)

## @knitr f_sex_age
fpmf <- stpm2(Surv(time, event) ~ sex + year8594 + agegrp,
              data=melanoma0, df=4)
summary(fpmf)
eform(fpmf)[2:6,]

## To test the overall effect of age with LR test
fpmf2 <- stpm2(Surv(time,event) ~ sex + year8594, data=melanoma0, df=4)
anova(fpmf, fpmf2)


## @knitr g_sex_age
summary(fit <- coxph(Surv(time, event) ~ sex + year8594 + agegrp,
                     data=melanoma0))
anova(fit)

## @knitr h_time_varying
## (h) Change to time-varying effect of agegrp2:4
## NB: including main effect of agegrp
fpmh <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
              data=melanoma0, tvc=list(agegrp2 = 2, agegrp3 = 2, agegrp4 = 2),
              df=4)
summary(fpmh)

## LR test comparing fpmh (non-PH for agegrp2:4) with fpmf(PH for agegrp2:4)
anova(fpmh, fpmf)

## @knitr h_time_varying_penalised
## I investigated the non-proportional effect of age with penalized models
## here, sp is the optimal smoothing parameters estimated from models without sp argument
pfit0 <- pstpm2(Surv(time,event) ~ sex + year8594 + agegrp,
                data=melanoma0, sp=0.1359685)

## The time-dependent effects including linear forms of age groups
pfit1 <- pstpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
                tvc=list(agegrp2=7,agegrp3=7,agegrp4=7),
                data=melanoma0, sp=c( 0.1429949, 1.6133966, 1.3183117, 1.9958815))
anova(pfit1, pfit0)## the results also suggest there is strong evidence

## @knitr i_plot_base_haz
## Plot of baseline hazard with fpmh
newdata1 <- data.frame(sex="Male",year8594="Diagnosed 75-84",
                       agegrp2=0, agegrp3=0, agegrp4=0)
plot(fpmh, newdata=newdata1, xlab="Time since diagnosis (years)", 
     type="haz")

## @knitr j_age_HR
plot(fpmh, newdata=newdata1, xlab="Time since diagnosis (years)",
     type="hr",var="agegrp2", ci=FALSE, ylim=c(0,6))
lines(fpmh, newdata=newdata1, type="hr", var="agegrp3", lty=2)
lines(fpmh, newdata=newdata1, type="hr", var="agegrp4", lty=3)
legend("topright", legend=paste0(agegrps[-1]," vs 0-44"), lty=1:3)

## @knitr j_oldest
plot(fpmh, newdata=newdata1,
     type="hr", log="y",
     exposed=function(data) transform(data,agegrp4=agegrp4+1), # same as var="agegrp4"
     xlab="Time since diagnosis (years)")

## @knitr j_age_HR_ggplot
lapply(2:4, function(i)
    predict(fpmh, newdata=newdata1, type="hr",var=paste0("agegrp",i),
            grid=TRUE, se.fit=TRUE, full=TRUE) |>
    transform(ageGroup=paste0(agegrps[i]," vs 0-44"))) |>
    do.call(what=rbind) |>
    ggplot(aes(x=time,y=Estimate,fill=ageGroup,ymin=lower,ymax=upper)) +
    geom_ribbon(alpha=0.3) +
    geom_line() +
    ylim(0,8) +
    xlab("Time since diagnosis (years)") +
    ylab("Hazard ratio") +
    labs(fill="Age group")

## @knitr k_haz_diff
plot(fpmh,newdata=newdata1,
     type="hdiff", var="agegrp4",
     xlab="Time since diagnosis (years)")

## @knitr l_surv_diff
plot(fpmh,newdata=newdata1,
     type="sdiff", var="agegrp4",
     xlab="Time since diagnosis (years)")

## @knitr m_time_dep_eff
lapply(1:3, function(i) {
    fitdf <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
                   data=melanoma0, tvc=list(agegrp2 = i, agegrp3 = i, agegrp4 = i),
                   df=4)
    data.frame(i,
               aic=AIC(fitdf),
               bic=BIC(fitdf))
}) |> do.call(what=rbind)

## Plots with different df
fitdf1 <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
                data=melanoma0,
                tvc = list(agegrp2 = 1, agegrp3 = 1, agegrp4 = 1), df=4)

plot(fitdf1, newdata = newdata1,
     type="hr", ci=FALSE, log="y", var="agegrp4",
     xlab="Time since diagnosis (years)")
for (i in 2:3 ) {
    fitdf <- stpm2(Surv(time,event) ~ sex + year8594 + agegrp2 + agegrp3 + agegrp4,
                   data=melanoma0, tvc=list(agegrp2 = i, agegrp3 = i, agegrp4 = i),
                   df=4)
    lines(fitdf, newdata=newdata1,
          type="hr", lty=i,
          var="agegrp4")
}
legend("topright", legend=c("1 df", "2 df", "3 df"), lty=1:3)

Try the biostat3 package in your browser

Any scripts or data that you put into this service are public.

biostat3 documentation built on Oct. 29, 2024, 5:07 p.m.