Codes for Survival Analysis^[See childRmd/_18survival.Rmd
file for other codes, and childRmd/_19shinySurvival.Rmd
for shiny
application]
https://www.datasurg.net/2019/09/12/survival-analysis-with-strata-clusters-frailties-and-competing-risks-in-in-finalfit/
https://link.springer.com/article/10.1007/s00701-019-04096-9
Calculate survival time
mydata$int <- lubridate::interval( lubridate::ymd(mydata$SurgeryDate), lubridate::ymd(mydata$LastFollowUpDate) ) mydata$OverallTime <- lubridate::time_length(mydata$int, "month") mydata$OverallTime <- round(mydata$OverallTime, digits = 1)
mydata$OverallTime <- mydata$genel_sagkalim
recode death status outcome as numbers for survival analysis
## Recoding mydata$Death into mydata$Outcome mydata$Outcome <- forcats::fct_recode(as.character(mydata$Death), "1" = "TRUE", "0" = "FALSE") mydata$Outcome <- as.numeric(as.character(mydata$Outcome))
it is always a good practice to double-check after recoding^[JAMA retraction after miscoding – new Finalfit function to check recoding]
table(mydata$Death, mydata$Outcome)
library(survival) # data(lung) # km <- with(lung, Surv(time, status)) km <- with(mydata, Surv(OverallTime, Outcome)) head(km,80) plot(km)
Kaplan-Meier Plot Log-Rank Test
# Drawing Survival Curves Using ggplot2 # https://rpkgs.datanovia.com/survminer/reference/ggsurvplot.html dependentKM <- "Surv(OverallTime, Outcome)" explanatoryKM <- "LVI" mydata %>% finalfit::surv_plot(.data = ., dependent = dependentKM, explanatory = explanatoryKM, xlab='Time (months)', pval=TRUE, legend = 'none', break.time.by = 12, xlim = c(0,60) # legend.labs = c('a','b') )
# Drawing Survival Curves Using ggplot2 # https://rpkgs.datanovia.com/survminer/reference/ggsurvplot.html mydata %>% finalfit::surv_plot(.data = ., dependent = "Surv(OverallTime, Outcome)", explanatory = "LVI", xlab='Time (months)', pval=TRUE, legend = 'none', break.time.by = 12, xlim = c(0,60) # legend.labs = c('a','b') )
library(finalfit) library(survival) explanatoryUni <- "LVI" dependentUni <- "Surv(OverallTime, Outcome)" mydata %>% finalfit::finalfit(dependentUni, explanatoryUni) -> tUni knitr::kable(tUni[, 1:4], row.names=FALSE, align=c('l', 'l', 'r', 'r', 'r', 'r'))
tUni_df <- tibble::as_tibble(tUni, .name_repair = "minimal") %>% janitor::clean_names() tUni_df_descr <- paste0("When ", tUni_df$dependent_surv_overall_time_outcome[1], " is ", tUni_df$x[2], ", there is ", tUni_df$hr_univariable[2], " times risk than ", "when ", tUni_df$dependent_surv_overall_time_outcome[1], " is ", tUni_df$x[1], "." )
\noindent\colorbox{yellow}{ \parbox{\dimexpr\linewidth-2\fboxsep}{
$ r tUni_df_descr
$
} }
km_fit <- survfit(Surv(OverallTime, Outcome) ~ LVI, data = mydata) km_fit plot(km_fit) # summary(km_fit)
km_fit_median_df <- summary(km_fit) km_fit_median_df <- as.data.frame(km_fit_median_df$table) %>% janitor::clean_names() %>% tibble::rownames_to_column()
km_fit_median_df <- summary(km_fit) km_fit_median_df <- as.data.frame(km_fit_median_df$table) %>% tibble::rownames_to_column() names(km_fit_median_df) <- paste0("m", 1:dim(km_fit_median_df)[2]) km_fit_median_definition <- km_fit_median_df %>% dplyr::mutate( description = glue::glue( "When {m1}, median survival is {m8} [{m9} - {m10}, 95% CI] months." ) ) %>% dplyr::select(description) %>% dplyr::pull()
sTable <- summary(km_fit)$table st <- data.frame() for (i in seq_len(nrow(km_fit))) { if (nrow(km_fit) == 1) g <- sTable else g <- sTable[i,] nevents <- sum(g['events']) n <- g['n.max'] ncensor <- n - nevents median <- g['median'] mean <- g['*rmean'] prop <- nevents / n print(rowNo=i, list( censored=ncensor, events=nevents, n=n, prop=nevents/n, median=median, mean=mean)) } st$setStatus('complete') results1 <- st
km_fit broom::tidy(km_fit)
km_fit_median_df %>% dplyr::mutate( description = glue::glue( "When {rowname}, median survival is {median} [{x0_95lcl} - {x0_95ucl}, 95% CI] months." ) ) %>% dplyr::select(description) %>% dplyr::pull() -> km_fit_median_definition
\noindent\colorbox{yellow}{ \parbox{\dimexpr\linewidth-2\fboxsep}{
r km_fit_median_definition
} }
summary(km_fit, times = c(12,36,60))
km_fit_summary <- summary(km_fit, times = c(12,36,60)) km_fit_df <- as.data.frame(km_fit_summary[c("strata", "time", "n.risk", "n.event", "surv", "std.err", "lower", "upper")])
km_fit_df %>% dplyr::mutate( description = glue::glue( "When {strata}, {time} month survival is {scales::percent(surv)} [{scales::percent(lower)}-{scales::percent(upper)}, 95% CI]." ) ) %>% dplyr::select(description) %>% dplyr::pull() -> km_fit_definition
\noindent\colorbox{yellow}{ \parbox{\dimexpr\linewidth-2\fboxsep}{
r km_fit_definition
} }
library(survival) surv_fit <- survival::survfit(Surv(time, status) ~ ph.ecog, data=lung) insight::is_model_supported(surv_fit) insight::find_formula(surv_fit)
report::report_participants(mydata)
dependentKM <- "Surv(OverallTime, Outcome2)" explanatoryKM <- c("explanatory1", "explanatory2" )
source(here::here("R", "gc_survival.R"))
mydependent <- "Surv(time, status)" explanatory <- "Organ" mysurvival <- function(mydata, mydependent, explanatory) { {{mydata}} %>% finalfit::surv_plot(dependent = {{mydependent}}, explanatory = {{explanatory}}, xlab='Time (months)', pval=TRUE, legend = 'none', break.time.by = 12, xlim = c(0,60) ) } # library(tidyverse) mysurvival(mydata = whippleables, mydependent = mydependent, explanatory = explanatory) explanatory <- c("Organ", "LVI") deneme <- purrr::map(explanatory, mysurvival, mydata = whippleables, mydependent = mydependent)
dependentKM <- "Surv(OverallTime, Outcome)" explanatoryKM <- "TStage" mydata %>% finalfit::surv_plot(.data = ., dependent = dependentKM, explanatory = explanatoryKM, xlab='Time (months)', pval=TRUE, legend = 'none', break.time.by = 12, xlim = c(0,60) # legend.labs = c('a','b') )
survminer::pairwise_survdiff( formula = Surv(OverallTime, Outcome) ~ TStage, data = mydata, p.adjust.method = "BH" )
km_fit print(km_fit, scale=1, digits = max(options()$digits - 4,3), print.rmean=getOption("survfit.print.rmean"), rmean = getOption('survfit.rmean'), print.median=getOption("survfit.print.median"), median = getOption('survfit.median') )
library(finalfit) library(survival) explanatoryMultivariate <- explanatoryKM dependentMultivariate <- dependentKM mydata %>% finalfit(dependentMultivariate, explanatoryMultivariate, metrics=TRUE) -> tMultivariate knitr::kable(tMultivariate, row.names=FALSE, align=c("l", "l", "r", "r", "r", "r"))
explanatory = c("age.factor", "sex.factor", "obstruct.factor", "perfor.factor") dependent = 'mort_5yr' colon_s %>% hr_plot(dependent, explanatory)
# https://tidymodels.github.io/parsnip/reference/surv_reg.html library(parsnip) surv_reg() #> Parametric Survival Regression Model Specification (regression) #> # Parameters can be represented by a placeholder: surv_reg(dist = varying()) #> Parametric Survival Regression Model Specification (regression) #> #> Main Arguments: #> dist = varying() #> model <- surv_reg(dist = "weibull") model #> Parametric Survival Regression Model Specification (regression) #> #> Main Arguments: #> dist = weibull #> update(model, dist = "lnorm")#> Parametric Survival Regression Model Specification (regression) #> #> Main Arguments: #> dist = lnorm #> # From randomForest rf_1 <- randomForest(x, y, mtry = 12, ntree = 2000, importance = TRUE) # From ranger rf_2 <- ranger( y ~ ., data = dat, mtry = 12, num.trees = 2000, importance = 'impurity' ) # From sparklyr rf_3 <- ml_random_forest( dat, intercept = FALSE, response = "y", features = names(dat)[names(dat) != "y"], col.sample.rate = 12, num.trees = 2000 ) rand_forest(mtry = 12, trees = 2000) %>% set_engine("ranger", importance = 'impurity') %>% fit(y ~ ., data = dat) rand_forest(mtry = 12, trees = 2000) %>% set_engine("spark") %>% fit(y ~ ., data = dat)
mb_followup$OverallTime <- mb_followup$months mb_followup$Outcome <- mb_followup$`rec(1,0)` mb_followup$Operation <- mb_followup$`op type (1,2,3)` ## Recoding mb_followup$Operation mb_followup$Operation <- as.character(mb_followup$Operation) mb_followup$Operation <- forcats::fct_recode(mb_followup$Operation, "Type3" = "3", "Type2" = "2", "Type1" = "1") ## Reordering mb_followup$Operation mb_followup$Operation <- factor(mb_followup$Operation, levels=c("Type3", "Type2", "Type1")) library(magrittr) mb_followup %$% table(Operation, `op type (1,2,3)`)
library(survival) library(survminer) library(finalfit) mb_followup %>% finalfit::surv_plot('Surv(OverallTime, Outcome)', 'Operation', xlab='Time (months)', pval=TRUE, legend = 'none', # pval.coord break.time.by = 12, xlim = c(0,60), ylim = c(0.8, 1) # legend.labs = c('a','b') )
Univariate Cox-Regression
explanatoryUni <- 'Operation' dependentUni <- 'Surv(OverallTime, Outcome)' mb_followup %>% finalfit(dependentUni, explanatoryUni) -> tUni knitr::kable(tUni[, 1:4], row.names=FALSE, align=c('l', 'l', 'r', 'r', 'r', 'r'))
Univariate Cox-Regression Summary
tUni_df <- tibble::as_tibble(tUni, .name_repair = 'minimal') %>% janitor::clean_names(dat = ., case = 'snake') n_level <- dim(tUni_df)[1] tUni_df_descr <- function(n) { paste0( 'When ', tUni_df$dependent_surv_overall_time_outcome[1], ' is ', tUni_df$x[n + 1], ', there is ', tUni_df$hr_univariable[n + 1], ' times risk than ', 'when ', tUni_df$dependent_surv_overall_time_outcome[1], ' is ', tUni_df$x[1], '.' ) } results5 <- purrr::map(.x = c(2:n_level-1), .f = tUni_df_descr) print(unlist(results5))
\pagebreak
Median Survival
km_fit <- survfit(Surv(OverallTime, Outcome) ~ Operation, data = mb_followup) # km_fit # summary(km_fit) km_fit_median_df <- summary(km_fit) km_fit_median_df <- as.data.frame(km_fit_median_df$table) %>% janitor::clean_names(dat = ., case = 'snake') %>% tibble::rownames_to_column(.data = ., var = 'Derece') km_fit_median_df # km_fit_median_df %>% # knitr::kable(format = "latex") %>% # kableExtra::kable_styling(latex_options="scale_down") km_fit_median_df %>% dplyr::mutate( description = glue::glue( 'When, Derece, {Derece}, median survival is {median} [{x0_95lcl} - {x0_95ucl}, 95% CI] months.' ) ) %>% dplyr::mutate( description = gsub(pattern = 'thefactor=', replacement = ' is ', x = description) ) %>% dplyr::select(description) %>% dplyr::pull() -> km_fit_median_definition # km_fit_median_definition
1-3-5-yr survival
summary(km_fit, times = c(12,36,60)) km_fit_summary <- summary(km_fit, times = c(12,36,60)) km_fit_df <- as.data.frame(km_fit_summary[c('strata', 'time', 'n.risk', 'n.event', 'surv', 'std.err', 'lower', 'upper')]) km_fit_df %>% knitr::kable(format = "latex") %>% kableExtra::kable_styling(latex_options="scale_down") km_fit_df %>% dplyr::mutate( description = glue::glue( 'When {strata}, {time} month survival is {scales::percent(surv)} [{scales::percent(lower)}-{scales::percent(upper)}, 95% CI].' ) ) %>% dplyr::select(description) %>% dplyr::pull() -> km_fit_definition km_fit_definition
\pagebreak
Pairwise Comparisons
survminer::pairwise_survdiff( formula = Surv(OverallTime, Outcome) ~ Operation, data = mb_followup, p.adjust.method = 'BH' )
\pagebreak
library(gt) library(gtsummary) library(survival) fit1 <- survfit(Surv(ttdeath, death) ~ trt, trial) tbl_strata_ex1 <- tbl_survival( fit1, times = c(12, 24), label = "{time} Months" ) fit2 <- survfit(Surv(ttdeath, death) ~ 1, trial) tbl_nostrata_ex2 <- tbl_survival( fit2, probs = c(0.1, 0.2, 0.5), header_estimate = "**Months**" )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.