Codes for Survival Analysis^[See childRmd/_18survival.Rmd file for other codes, and childRmd/_19shinySurvival.Rmd for shiny application]

https://www.datasurg.net/2019/09/12/survival-analysis-with-strata-clusters-frailties-and-competing-risks-in-in-finalfit/

https://link.springer.com/article/10.1007/s00701-019-04096-9

Calculate survival time

mydata$int <- lubridate::interval(
  lubridate::ymd(mydata$SurgeryDate),
  lubridate::ymd(mydata$LastFollowUpDate)
  )
mydata$OverallTime <- lubridate::time_length(mydata$int, "month")
mydata$OverallTime <- round(mydata$OverallTime, digits = 1)
mydata$OverallTime <- mydata$genel_sagkalim

recode death status outcome as numbers for survival analysis

## Recoding mydata$Death into mydata$Outcome
mydata$Outcome <- forcats::fct_recode(as.character(mydata$Death),
               "1" = "TRUE",
               "0" = "FALSE")
mydata$Outcome <- as.numeric(as.character(mydata$Outcome))

it is always a good practice to double-check after recoding^[JAMA retraction after miscoding – new Finalfit function to check recoding]

table(mydata$Death, mydata$Outcome)

Kaplan-Meier

library(survival)
# data(lung)
# km <- with(lung, Surv(time, status))
km <- with(mydata, Surv(OverallTime, Outcome))
head(km,80)
plot(km)

Kaplan-Meier Plot Log-Rank Test

# Drawing Survival Curves Using ggplot2
# https://rpkgs.datanovia.com/survminer/reference/ggsurvplot.html
dependentKM <- "Surv(OverallTime, Outcome)"
explanatoryKM <- "LVI"

mydata %>%
  finalfit::surv_plot(.data = .,
                      dependent = dependentKM,
                      explanatory = explanatoryKM,
                      xlab='Time (months)',
                      pval=TRUE,
                      legend = 'none',
                      break.time.by = 12,
                      xlim = c(0,60)
                      # legend.labs = c('a','b')
                      )
# Drawing Survival Curves Using ggplot2
# https://rpkgs.datanovia.com/survminer/reference/ggsurvplot.html

mydata %>%
  finalfit::surv_plot(.data = .,
                      dependent = "Surv(OverallTime, Outcome)",
                      explanatory = "LVI",
                      xlab='Time (months)',
                      pval=TRUE,
                      legend = 'none',
                      break.time.by = 12,
                      xlim = c(0,60)
                      # legend.labs = c('a','b')
                      )

Univariate Cox-Regression

library(finalfit)
library(survival)
explanatoryUni <- "LVI"
dependentUni <- "Surv(OverallTime, Outcome)"

mydata %>%
finalfit::finalfit(dependentUni, explanatoryUni) -> tUni

knitr::kable(tUni[, 1:4], row.names=FALSE, align=c('l', 'l', 'r', 'r', 'r', 'r'))
tUni_df <- tibble::as_tibble(tUni, .name_repair = "minimal") %>% 
  janitor::clean_names() 

tUni_df_descr <- paste0("When ",
                        tUni_df$dependent_surv_overall_time_outcome[1],
                        " is ",
                        tUni_df$x[2],
                        ", there is ",
                        tUni_df$hr_univariable[2],
                        " times risk than ",
                        "when ",
                        tUni_df$dependent_surv_overall_time_outcome[1],
                        " is ",
                        tUni_df$x[1],
                        "."
                        )
`r tUni_df_descr`

\noindent\colorbox{yellow}{ \parbox{\dimexpr\linewidth-2\fboxsep}{

$ r tUni_df_descr $

} }

Kaplan-Meier Median Survival

km_fit <- survfit(Surv(OverallTime, Outcome) ~ LVI, data = mydata)
km_fit
plot(km_fit)
# summary(km_fit)
km_fit_median_df <- summary(km_fit)
km_fit_median_df <- as.data.frame(km_fit_median_df$table) %>% 
  janitor::clean_names() %>% 
  tibble::rownames_to_column()
km_fit_median_df <- summary(km_fit)
            km_fit_median_df <- as.data.frame(km_fit_median_df$table) %>%
                tibble::rownames_to_column()

            names(km_fit_median_df) <- paste0("m", 1:dim(km_fit_median_df)[2])

            km_fit_median_definition <- 

            km_fit_median_df %>%
                dplyr::mutate(
                    description =
                        glue::glue(
                            "When {m1}, median survival is {m8} [{m9} - {m10}, 95% CI] months."
                        )
                ) %>%
                dplyr::select(description) %>%
                dplyr::pull() 
sTable <- summary(km_fit)$table
            st <- data.frame()

            for (i in seq_len(nrow(km_fit))) {
                if (nrow(km_fit) == 1)
                    g <- sTable
                else
                    g <- sTable[i,]
                nevents <- sum(g['events'])
                n <- g['n.max']
                ncensor <- n - nevents
                median <- g['median']
                mean <- g['*rmean']
                prop <- nevents / n

                print(rowNo=i, list(
                    censored=ncensor,
                    events=nevents,
                    n=n,
                    prop=nevents/n,
                    median=median,
                    mean=mean))
            }

            st$setStatus('complete')


            results1 <- st
km_fit
broom::tidy(km_fit)
km_fit_median_df %>% 
  dplyr::mutate(
    description = 
      glue::glue(
      "When {rowname}, median survival is {median} [{x0_95lcl} - {x0_95ucl}, 95% CI] months."
    )
  ) %>% 
  dplyr::select(description) %>% 
  dplyr::pull() -> km_fit_median_definition
`r km_fit_median_definition`

\noindent\colorbox{yellow}{ \parbox{\dimexpr\linewidth-2\fboxsep}{

r km_fit_median_definition

} }

1-3-5-yr survival

summary(km_fit, times = c(12,36,60))
km_fit_summary <- summary(km_fit, times = c(12,36,60))

km_fit_df <- as.data.frame(km_fit_summary[c("strata", "time", "n.risk", "n.event", "surv", "std.err", "lower", "upper")])
km_fit_df %>% 
  dplyr::mutate(
    description = 
      glue::glue(
      "When {strata}, {time} month survival is {scales::percent(surv)} [{scales::percent(lower)}-{scales::percent(upper)}, 95% CI]."
    )
  ) %>% 
  dplyr::select(description) %>% 
  dplyr::pull() -> km_fit_definition
`r km_fit_definition`

\noindent\colorbox{yellow}{ \parbox{\dimexpr\linewidth-2\fboxsep}{

r km_fit_definition

} }

library(survival)
surv_fit <- survival::survfit(Surv(time, status) ~ ph.ecog, data=lung)
insight::is_model_supported(surv_fit)
insight::find_formula(surv_fit)
report::report_participants(mydata)
dependentKM <-  "Surv(OverallTime, Outcome2)"

explanatoryKM <- c("explanatory1",
               "explanatory2"
               )
source(here::here("R", "gc_survival.R"))

mydependent <-  "Surv(time, status)"
explanatory <- "Organ"

mysurvival <- function(mydata, mydependent, explanatory) {
    {{mydata}} %>%
        finalfit::surv_plot(dependent = {{mydependent}},
                            explanatory = {{explanatory}},
                            xlab='Time (months)',
                            pval=TRUE,
                            legend = 'none',
                            break.time.by = 12,
                            xlim = c(0,60)
        )
}


# library(tidyverse)
mysurvival(mydata = whippleables, mydependent = mydependent, explanatory = explanatory)

explanatory <- c("Organ", "LVI")

deneme <- purrr::map(explanatory, mysurvival, mydata = whippleables, mydependent = mydependent)

Pairwise comparison

dependentKM <- "Surv(OverallTime, Outcome)"
explanatoryKM <- "TStage"

mydata %>%
  finalfit::surv_plot(.data = .,
                      dependent = dependentKM,
                      explanatory = explanatoryKM,
                      xlab='Time (months)',
                      pval=TRUE,
                      legend = 'none',
                      break.time.by = 12,
                      xlim = c(0,60)
                      # legend.labs = c('a','b')
                      )
survminer::pairwise_survdiff(
  formula = Surv(OverallTime, Outcome) ~ TStage, 
                             data = mydata,
                             p.adjust.method = "BH"
  )
km_fit
print(km_fit, 
      scale=1,
      digits = max(options()$digits - 4,3),
      print.rmean=getOption("survfit.print.rmean"),
      rmean = getOption('survfit.rmean'),
      print.median=getOption("survfit.print.median"),
      median = getOption('survfit.median')

      )

Multivariate Analysis Survival

library(finalfit)
library(survival)
explanatoryMultivariate <- explanatoryKM
dependentMultivariate <- dependentKM

mydata %>%
  finalfit(dependentMultivariate, explanatoryMultivariate, metrics=TRUE) -> tMultivariate

knitr::kable(tMultivariate, row.names=FALSE, align=c("l", "l", "r", "r", "r", "r"))

explanatory = c("age.factor", "sex.factor", "obstruct.factor", "perfor.factor") dependent = 'mort_5yr' colon_s %>% hr_plot(dependent, explanatory)

# https://tidymodels.github.io/parsnip/reference/surv_reg.html
library(parsnip)
surv_reg()
#> Parametric Survival Regression Model Specification (regression)
#> # Parameters can be represented by a placeholder:
surv_reg(dist = varying())

#> Parametric Survival Regression Model Specification (regression)
#> 
#> Main Arguments:
#>   dist = varying()
#> 
model <- surv_reg(dist = "weibull")
model
#> Parametric Survival Regression Model Specification (regression)
#> 
#> Main Arguments:
#>   dist = weibull
#> update(model, dist = "lnorm")#> Parametric Survival Regression Model Specification (regression)
#> 
#> Main Arguments:
#>   dist = lnorm
#> 


# From randomForest
rf_1 <- randomForest(x, y, mtry = 12, ntree = 2000, importance = TRUE)

# From ranger
rf_2 <- ranger(
  y ~ ., 
  data = dat, 
  mtry = 12, 
  num.trees = 2000, 
  importance = 'impurity'
)

# From sparklyr
rf_3 <- ml_random_forest(
  dat, 
  intercept = FALSE, 
  response = "y", 
  features = names(dat)[names(dat) != "y"], 
  col.sample.rate = 12,
  num.trees = 2000
)




rand_forest(mtry = 12, trees = 2000) %>%
  set_engine("ranger", importance = 'impurity') %>%
  fit(y ~ ., data = dat)


rand_forest(mtry = 12, trees = 2000) %>%
  set_engine("spark") %>%
  fit(y ~ ., data = dat)
mb_followup$OverallTime <- mb_followup$months
mb_followup$Outcome <- mb_followup$`rec(1,0)`
mb_followup$Operation <- mb_followup$`op type (1,2,3)`

## Recoding mb_followup$Operation
mb_followup$Operation <- as.character(mb_followup$Operation)
mb_followup$Operation <- forcats::fct_recode(mb_followup$Operation,
               "Type3" = "3",
               "Type2" = "2",
               "Type1" = "1")


## Reordering mb_followup$Operation
mb_followup$Operation <- factor(mb_followup$Operation, levels=c("Type3", "Type2", "Type1"))

library(magrittr)
mb_followup %$% table(Operation, `op type (1,2,3)`) 
library(survival)
library(survminer)
library(finalfit)
mb_followup %>%
  finalfit::surv_plot('Surv(OverallTime, Outcome)', 'Operation', 
  xlab='Time (months)', pval=TRUE, legend = 'none',
  # pval.coord
    break.time.by = 12, xlim = c(0,60), ylim = c(0.8, 1)

# legend.labs = c('a','b')

)

Univariate Cox-Regression

explanatoryUni <- 'Operation'
dependentUni <- 'Surv(OverallTime, Outcome)'
mb_followup %>%
finalfit(dependentUni, explanatoryUni) -> tUni

knitr::kable(tUni[, 1:4], row.names=FALSE, align=c('l', 'l', 'r', 'r', 'r', 'r'))

Univariate Cox-Regression Summary

tUni_df <- tibble::as_tibble(tUni, .name_repair = 'minimal') %>%
janitor::clean_names(dat = ., case = 'snake')


n_level <- dim(tUni_df)[1]

tUni_df_descr <- function(n) {
    paste0(
        'When ',
        tUni_df$dependent_surv_overall_time_outcome[1],
        ' is ',
        tUni_df$x[n + 1],
        ', there is ',
        tUni_df$hr_univariable[n + 1],
        ' times risk than ',
        'when ',
        tUni_df$dependent_surv_overall_time_outcome[1],
        ' is ',
        tUni_df$x[1],
        '.'
    )

}



results5 <- purrr::map(.x = c(2:n_level-1), .f = tUni_df_descr)

print(unlist(results5))

\pagebreak

Median Survival

km_fit <- survfit(Surv(OverallTime, Outcome) ~ Operation, data = mb_followup)

# km_fit

# summary(km_fit)

km_fit_median_df <- summary(km_fit)
km_fit_median_df <- as.data.frame(km_fit_median_df$table) %>%
    janitor::clean_names(dat = ., case = 'snake') %>%
    tibble::rownames_to_column(.data = ., var = 'Derece')

km_fit_median_df

# km_fit_median_df %>% 
#   knitr::kable(format = "latex") %>% 
#   kableExtra::kable_styling(latex_options="scale_down")

km_fit_median_df %>%
    dplyr::mutate(
        description =
        glue::glue(
        'When, Derece, {Derece}, median survival is {median} [{x0_95lcl} - {x0_95ucl}, 95% CI] months.'
)
    ) %>%
        dplyr::mutate(
description = gsub(pattern = 'thefactor=', replacement = ' is ', x = description)
        ) %>%
    dplyr::select(description) %>%
    dplyr::pull() -> km_fit_median_definition

# km_fit_median_definition

1-3-5-yr survival

summary(km_fit, times = c(12,36,60))

km_fit_summary <- summary(km_fit, times = c(12,36,60))

km_fit_df <- as.data.frame(km_fit_summary[c('strata', 'time', 'n.risk', 'n.event', 'surv', 'std.err', 'lower', 'upper')])

km_fit_df %>% 
  knitr::kable(format = "latex") %>% 
  kableExtra::kable_styling(latex_options="scale_down")





km_fit_df %>%
    dplyr::mutate(
        description =
glue::glue(
    'When {strata}, {time} month survival is {scales::percent(surv)} [{scales::percent(lower)}-{scales::percent(upper)}, 95% CI].'
)
    ) %>%
    dplyr::select(description) %>%
    dplyr::pull() -> km_fit_definition

km_fit_definition

\pagebreak

Pairwise Comparisons

    survminer::pairwise_survdiff(
    formula = Surv(OverallTime, Outcome) ~ Operation,
    data = mb_followup,
    p.adjust.method = 'BH'
)

\pagebreak


library(gt)
library(gtsummary)

library(survival)
fit1 <- survfit(Surv(ttdeath, death) ~ trt, trial)
tbl_strata_ex1 <-
  tbl_survival(
    fit1,
    times = c(12, 24),
    label = "{time} Months"
  )

fit2 <- survfit(Surv(ttdeath, death) ~ 1, trial)
tbl_nostrata_ex2 <-
  tbl_survival(
    fit2,
    probs = c(0.1, 0.2, 0.5),
    header_estimate = "**Months**"
  )


sbalci/histopathology-template documentation built on June 29, 2023, 5:52 a.m.