knitr::opts_chunk$set(echo = TRUE)

# This is necessary to make the data available in the session
devtools::load_all()

library(tidyverse)
library(survival)
library(rstan)

options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)
brca10

Making the Data

survt <- brca10 %>%
  select(patient, daystodeath, daystolastfollowup) %>%
  mutate(survt = if_else(is.na(daystodeath), daystolastfollowup, daystodeath)) %>%
  drop_na(survt) %>%
  filter(survt > 0) %>%
  mutate(is_censored = is.na(daystodeath)) %>%
  select(-daystodeath, -daystolastfollowup) %>%
  inner_join(brca10, by = "patient") %>%
  mutate_at(.vars = vars(COL1A1:EEF2), scale)

survt

Intercept Only

Classical Fit

KM_fit <- survfit(Surv(survt$survt, 1 - survt$is_censored) ~ 1)
plot(KM_fit, conf.int = TRUE)

Bayesian Fit

n   <- nrow(survt)
N_c <- sum(survt$is_censored)
N_o <- n - sum(survt$is_censored)
P   <- 1
y_c <- with(survt, survt[is_censored])
y_o <- with(survt, survt[!is_censored])

dat <- list(N_c = N_c, y_c = y_c, X_c = matrix(1, N_c, 1),
            N_o = N_o, y_o = y_o, X_o = matrix(1, N_o, 1),
            P = P)
weibull_fit <- stan("00-weibull-model.stan",
                    model_name = "weibull_intercept_only",
                    data = dat,
                    chains = 4,
                    iter = 10000,
                    warmup = 9000,
                    cores = 4,
                    verbose = FALSE)

post <- rstan::extract(weibull_fit)
plot(KM_fit, col = "blue", conf.int = TRUE,
     xlab = "Days", ylab = "Survival Probability")
for (i in 1:4000) {
  brca_ecdf <- ecdf(post$post_pred[i,])
  curve(1 - brca_ecdf(x), from = 0, to = 10000, add = TRUE, col = "lightblue")
}
lines(KM_fit, col = "blue", add = TRUE, conf.int = TRUE)
legend("topright",
       legend = c("KM Curve and Intervals", "Posterior Survival Draws"),
       col = c("blue", "lightblue"),
       lty = c(1, 0),
       pch = c(NA, 15),
       bty = 'n')

1 Predictor

Classical Fit

survfit(Surv(survt$survt, 1 - survt$is_censored) ~ 1 + survt$COL1A1)

Bayesian

n   <- nrow(survt)
N_c <- sum(survt$is_censored)
N_o <- n - sum(survt$is_censored)
P   <- 2
y_c <- with(survt, survt[is_censored])
y_o <- with(survt, survt[!is_censored])

A <- survt %>% 
  select(COL1A1:EEF2) %>%
  as.matrix
X <- model.matrix(~ A)

X_c = X[survt$is_censored, 1:P, drop=FALSE]
X_o = X[!survt$is_censored, 1:P, drop=FALSE]

dat <- list(N_c = N_c, y_c = y_c, X_c = X_c,
            N_o = N_o, y_o = y_o, X_o = X_o,
            P = P)
weibull_fit <- stan("00-weibull-model-full.stan",
                    model_name = "weibull-with-genes",
                    data = dat,
                    chains = 4, 
                    iter = 10000,
                    warmup = 9000,
                    cores = 4,
                    verbose = FALSE)

post <- rstan::extract(weibull_fit)
str(post)
hist(post$alpha, breaks = 50, main = "alpha")
abline(v = median(post$alpha), col = "red")
hist(post$beta[,1], breaks = 50, main = "(intercept)")
abline(v = median(post$beta[,1]), col = "red")
hist(post$beta[,2], breaks = 50, main = "COL1A1")
abline(v = median(post$beta[,2]), col = "red")
plot(weibull_fit, pars = c("alpha", "beta"), ci_level = 0.87, outer_level = 0.97)
S <- function(t, x, alpha, beta1, beta2) {
  lambda <- exp((beta1 + beta2 * x) * alpha)
  pweibull(t, alpha, lambda, lower.tail = FALSE)
}

alpha <- mean(post$alpha)
beta1 <- mean(post$beta[, 1])
beta2 <- mean(post$beta[, 2])

days <- seq(0, 10000, 500)
gene_expr <- seq(min(survt$COL1A1), max(survt$COL1A1), length.out = length(days))

d <- expand_grid(days, gene_expr) %>%
  mutate(prob = S(days, gene_expr, alpha, beta1, beta2))


ggplot(d, aes(x = days, y = gene_expr, fill = prob)) +
  geom_tile() +
  scale_fill_gradient(low = "black", high = "white", limits = c(0, 1)) +
  labs(x = "Days since last Visit", y = "Standardized COL1A1 Expression Count")


adknudson/cp3brca documentation built on June 9, 2020, 11:46 p.m.