Files_Replicate_Analysis/Chapter 3 examples/Chapple Comparison/Chapple Simulation.R

#============================================================================
# Preliminaries - load required packages
# Notes: If the following packages aren't already installed, then
#        they can be  installed from CRAN by typing, for example:
#        install.packages("package-name"). Note that this requires an
#        internet connection.
# For PiecewiseChangepoint package, either the package binary should be
# installed, or run devtools::install_github("Anon19820/PiecewiseChangepoint")
# (if devtools is installed)
#============================================================================

#Pathway to output the results -- Need to Change as Appropriate


pathway <- "~/Simulation Study 2022/Chapple Comparison/"



library("survival")
library("BayesReversePLLH")
library("PiecewiseChangepoint")
library("sfsmisc")

GetSurvPEH = function(x, s, lam, J) {
  y = x
  for (m in 1:length(x)) {
    for (k in 1:(J + 1)) {
      if ((x[m] > s[k]) && (x[m] <= s[k + 1])) {
        if (k > 1) {
          y[m] = exp(-exp(lam[k]) * (y[m] - s[k]) -
                       sum(exp(lam[1:(k - 1)]) * (s[2:k] - s[1:(k -
                                                                  1)])))
        }
        else {
          y[m] = exp(-exp(lam[k]) * (y[m] - s[k]))
        }
      }
    }
  }
  return(y)
}



GetSurvPEH_collapse <- function (x, mod) {

  # sample.index <- sample(nrow(mod$changepoint), size = 1000)
  # mod$k.stacked <-  mod$k.stacked[sample.index,]
  # mod$changepoint <-  mod$changepoint[sample.index,]
  # mod$lambda <-  mod$lambda[sample.index,]


  SurvHold = matrix(nrow = nrow(mod$changepoint), ncol = length(x))
  j.val <- apply(mod$k.stacked,1, function(x){length(na.omit(x))})
  n_rows <- nrow(mod$changepoint)
  log_lambda <- log(mod$lambda)
  for (b in 1:n_rows) {
    s = na.omit(c(0,mod$changepoint[b, ],100))
    lam =  na.omit(log_lambda[b, ])
    J = j.val[b]
    SurvHold[b, ] = GetSurvPEH(x, s, lam, J)
  }
  return(SurvHold)
}




GetCensorTimePIECE=function(PCT, ##Desired Percentage of
                            n, ##Number of patients
                            Rate_ACC, ##Accrual Per unit time
                            Rate_piece,
                            chng,
                            B, #Bootstraps,
                            factor_pool = 10 #Assume accruals happen together
){

  if(PCT==1){

    return(10000)

  }else{

    Storage =rep(NA,B)

    for(b in 1:B){
      ##First Let's Simulate accural times


      ACC=cumsum(rexp(n/factor_pool,Rate_ACC))
      ACC <-sample(ACC, n, replace = T)

      ##Now get the trial Times
      TIMES = PiecewiseChangepoint:::rpwexp(n, lam=Rate_piece, s = chng)

      ##Trial Times at death
      Y=TIMES+ACC
      Storage[b]=quantile(Y,prob=PCT)

    }
    ##Ok Now we have the Storage
    ##Lets find the Cut Trial time cut point to get desired censoring.

    return(mean(Storage))
  }


}

#source(paste0("~/Simulation Study 2022/Chapple Comparison/Chapple Simulation Backup Functions.R"))

max_time =  2
rate1 <- c(0.25)
rate2 <- c(0.5)
rate3 <- c(0.75)

rate4 <- c(0.5,0.75)
rate5 <- c(0.25,0.75)
rate6 <- c(0.75,0.5)
rate7 <- c(0.75,0.25)

rate8 <- c(0.25,0.5,0.75)
rate9 <- c(0.75,0.5,0.25)
rate10 <- c(0.75,0.2,0.75)
rate11 <- c(0.2,0.75,0.2)


censor_vec <- c(0,0.33)
censor_vec <- c(0)
sample_vec <- c(100,300,500)
#sample_vec <- c(300,500,1000)
rate_list <- list()

for(i in 1:11){
  current_rate <-get(paste0("rate",i))
  rate_list[[i]] <- current_rate
}



sim.study_comp <- function(n_obs,n_events_req,rate,t_change, max_time =2,
                           n.sims, sims = 10750,burn_in = 750, lambda.prior = 1,
                           n.chains = 1, time_horizon =25){


  time_seq <- seq(0, time_horizon, length.out = 1500)
  if(any(is.na(t_change))){
    num.breaks = 0
  }else{
    num.breaks <- length(t_change)
  }

  # 1 Generate holding variables ----

  selc_mod_Collasping <- selc_mod_Chapple <- n.events <-  rep(NA, n.sims)
  RMSE_Collapsing_time_horizon <- RMSE_Collapsing_restrict <- RMSE_Chapple_time_horizon <- RMSE_Chapple_restrict <- rep(NA, n.sims)
  ISSE_Collapsing_time_horizon <- ISSE_Collapsing_restrict <- ISSE_Chapple_time_horizon <- ISSE_Chapple_restrict <- rep(NA, n.sims)

  # 2 True Values ----

  ## 2.1 True Survival ----

  if(any(is.na(t_change))){
    True_Surv <- pexp(time_seq, rate, lower.tail = F)
  }else{
    True_Surv <- GetSurvPEH(x =time_seq,s = c(0,t_change,100), lam =  log(rate), J  = length(t_change))

  }
  True_Surv[1] <- 1


  ## 2.2 RSt (Restricted Mean Survival) ----

  True_RSt_restrict <- sfsmisc::integrate.xy(fx = True_Surv[time_seq<= max_time],
                                             x = time_seq[time_seq<= max_time])

  True_RSt_time_horizon <- sfsmisc::integrate.xy(fx = True_Surv,
                                                 x = time_seq)


  # 3 Run Simulations ----


  for(i in 1:n.sims){
    print(paste0("Sim ",i," of ",n.sims))


    df <- PiecewiseChangepoint::gen_piece_df(n_obs = n_obs,n_events_req = n_events_req,
                                             num.breaks = length(t_change),rate = rate ,
                                             t_change = t_change, max_time = max_time)

    n.events[i] <- sum(df$status)


    Collapsing_Model <-    collapsing.model(df,
                                            n.iter = sims,
                                            burn_in = burn_in,
                                            n.chains = n.chains,
                                            timescale = "years",
                                            lambda.prior = lambda.prior)



    Chapple_Model <- BayesPiecewiseHazard(Y = df$time, I1 =df$status, Poi = lambda.prior,  B = sims*n.chains)
    for( j in 1:nrow(Chapple_Model[[1]])){ # Requires a large number to calculate survival for time_horizon
      Chapple_Model[[1]][j,which.max(Chapple_Model[[1]][j, ])] <- 100
    }

    Surv.all.Chapple  <- GetALLSurvPEH(time_seq, Chapple_Model)
    Surv.all.Chapple[,1] <- 1

    Surv.all.collapsing  <- PiecewiseChangepoint:::get_Surv(object = Collapsing_Model,time = time_seq)
    #Chapple's function is slower but gives the same results
    # Surv.all.collapsing  <- GetSurvPEH_collapse(time_seq, mod = mod)
    # Surv.all.collapsing[,1] <- 1



    ## 3.1 Calculate ISSE ----

    #Can use the rectangular integration from Chapple
    #Both approaches are the same for ISSE, however,
    #sfsmisc::integrate.xy is much more accurate for estimating restricted mean survival:

    #time_seq <- seq(0, max(Times), length.out = 1000)
    #dtheta <- mean(diff(time_seq))
    #sum(((True_Surv-colMeans(Surv.all.Chapple))^2)*dtheta)

    ISSE_Chapple_time_horizon[i] <- sfsmisc::integrate.xy(x = time_seq, fx = (True_Surv-colMeans(Surv.all.Chapple))^2)

    ISSE_Chapple_restrict[i] <- sfsmisc::integrate.xy(x = time_seq[time_seq<= max_time],
                                                      fx = ((True_Surv-colMeans(Surv.all.Chapple))^2)[time_seq<= max_time])


    ISSE_Collapsing_time_horizon[i] <- sfsmisc::integrate.xy(x = time_seq, fx = (True_Surv-rowMeans(Surv.all.collapsing))^2)
    ISSE_Collapsing_restrict[i] <- sfsmisc::integrate.xy(x = time_seq[time_seq<= max_time],
                                                         fx = ((True_Surv-rowMeans(Surv.all.collapsing))^2)[time_seq<= max_time])

    ## 3.2 Calculate RSME ----

    #Doesn't matter if the integration is done seperately or at the mean of survival you get same E[R_St]
    # where R_St is restricted mean survival
    #sep_int <- apply(Surv.all.collapsing,2, function(x){sfsmisc::integrate.xy(x = time_seq,fx = x)})
    #mean(sep_int)
    RMSE_Collapsing_time_horizon[i] <- (sfsmisc::integrate.xy(x = time_seq,
                                                              fx = rowMeans(Surv.all.collapsing))-True_RSt_time_horizon)^2

    RMSE_Collapsing_restrict[i] <- (sfsmisc::integrate.xy(x = time_seq[time_seq<= max_time],
                                                          fx = rowMeans(Surv.all.collapsing)[time_seq<= max_time]) - True_RSt_restrict)^2


    RMSE_Chapple_time_horizon[i] <- (sfsmisc::integrate.xy(x = time_seq,
                                                           fx = colMeans(Surv.all.Chapple))-True_RSt_time_horizon)^2

    RMSE_Chapple_restrict[i] <- (sfsmisc::integrate.xy(x = time_seq[time_seq<= max_time],
                                                       fx = colMeans(Surv.all.Chapple)[time_seq<= max_time])
                                 - True_RSt_restrict)^2


    # 4 Most Probable model----

    selc_mod_Collasping[i] <- as.numeric(names(which.max(Collapsing_Model$prob.changepoint)))
    selc_mod_Chapple[i] <- as.numeric(names(table(Chapple_Model[[3]])[which.max(table(Chapple_Model[[3]]))]))

    # 5 Survival Plot ----

    # Not used but provided for reference

    #https://stackoverflow.com/questions/43173044/how-to-compute-the-mean-survival-time

    # km <- survfit(Surv(time,status)~1, data = df)
    # sum(diff(c(0,km$time))*c(1,km$surv[1:(length(km$surv)-1)]))
    # survival:::survmean(true_km, rmean=max(Times_event))[[1]]["*rmean"]
    #
    #
    # plot(km)
    # lines(y = Survs, x = time_seq)
    # lines(y = colMeans(Surv.all.Chappel), x = time_seq, col = "red")
    # lines(y = colMeans(Surv.all.collapsing), x = time_seq, col = "blue")


  }

  #Highest posterior model
  prob.correct_Collapsing <- mean(selc_mod_Collasping == num.breaks)
  prob.correct_Chapple <- mean(selc_mod_Chapple == num.breaks)

  list_result <- list()

  list_result[["Collapsing"]] <- list(selc_mod = selc_mod_Collasping,
                                      prob.correct = prob.correct_Collapsing,
                                      RMSE_time_horizon = RMSE_Collapsing_time_horizon,
                                      RMSE_restrict = RMSE_Collapsing_restrict,
                                      ISSE_time_horizon = ISSE_Collapsing_time_horizon,
                                      ISSE_restrict = ISSE_Collapsing_time_horizon)


  list_result[["RJMCMC"]] <- list(selc_mod = selc_mod_Chapple,
                                   prob.correct = prob.correct_Chapple,
                                   RMSE_time_horizon = RMSE_Chapple_time_horizon,
                                   RMSE_restrict = RMSE_Chapple_restrict,
                                   ISSE_time_horizon = ISSE_Chapple_time_horizon,
                                   ISSE_restrict = ISSE_Chapple_time_horizon)

  list_result[["n_events"]] <- n.events

  return(list_result)

}


#Occurs when lambda_df row is equal to 1
for(q in 1:length(censor_vec) ){
  for(j in 1:length(sample_vec)){
    for(i in 1:length(rate_list)){

      if(length(rate_list[[i]]) ==1){
        t_change_curr <- NA
      }else{
        t_change_curr  <- c(0.5,1)[1:(length(rate_list[[i]])-1)]
      }

      current_name <- paste0("Compare_RJMCMC_",paste0(rate_list[[i]],collapse = "_"),"_size_",sample_vec[j],"_censor_",censor_vec[q])
      print(current_name)

      assign(current_name, sim.study_comp(n_obs = sample_vec[j],
                                          n_events_req = round(sample_vec[j]*(1-censor_vec[q])),
                                          rate = rate_list[[i]],
                                          t_change = t_change_curr,
                                          max_time = max_time,
                                          n.sims = 500,
                                          sims = 20750,
                                          burn_in = 750,
                                          lambda.prior = 1))

      saveRDS(get(current_name), file = paste0(pathway,current_name,".rds"))

    }
  }
}

for(i in 1:length(rate_list)){
    dir.create(paste0(pathway, paste0("Compare_RJMCMC_",paste0(rate_list[[i]],collapse = "_")), "_plots"))
}


library("ggplot2")
names_sq_error <- c("RMSE_time_horizon","RMSE_restrict","ISSE_time_horizon","ISSE_restrict")
df_correct <-  NULL
for(i in 1:length(rate_list)){

  folder_output <- paste0(pathway, paste0("Compare_RJMCMC_",paste0(rate_list[[i]],collapse = "_")), "_plots")
  df_mod <- df_output <- NULL

  for(q in 1:length(censor_vec) ){
    for(j in 1:length(sample_vec)){


      if(length(rate_list[[i]]) ==1){
        t_change_curr <- NA
      }else{
        t_change_curr  <- c(0.5,1)[1:(length(rate_list[[i]])-1)]
      }

      current_name <- paste0("Compare_RJMCMC_",paste0(rate_list[[i]],collapse = "_"),"_size_",sample_vec[j],"_censor_",censor_vec[q])

      scen_eval <- paste0("Censoring ",censor_vec[q], "; Size ", sample_vec[j]  )
      scen_eval_list <- readRDS(file = paste0(pathway,current_name,".rds"))

      df_correct_temp <-  data.frame(Scenario = paste0(scen_eval,paste0("; Hazard c(", paste0(rate_list[[i]], collapse = ","),")")))
      df_correct_temp$Collaping <- scen_eval_list[[1]]$prob.correct
      df_correct_temp$RJMCMC <- scen_eval_list[[2]]$prob.correct

      df_correct <- rbind(df_correct, df_correct_temp)
       for(x in 1:2){#Number of models
        df_temp <- data.frame(Scen = scen_eval, Model = names(scen_eval_list)[x])
        for(p in 1:4){#Number of measures
          df_temp <- cbind(df_temp,scen_eval_list[[x]][[names_sq_error[p]]])
        }
        names(df_temp) <- c("Scenario", "Model", names_sq_error)
        df_mod <- rbind(df_mod, df_temp)
      }
      df_output  <- rbind(df_output, df_mod)
    }
  }

  for( r in 1:4){
    index <- grep(names_sq_error[r], colnames(df_output) )
    plot_save <- ggplot(df_output, aes(x=Scenario, y=df_output[,index], fill=Model)) +
      geom_boxplot()+
      ylim(c(0,.1))+
      ylab(gsub("_", " ",names_sq_error[r]))+
      theme(axis.text.x = element_text(angle = 45, hjust=1))+
      xlab(paste0("Scenario = \u03BB \u2208  c(", paste0(rate_list[[i]], collapse = ","),")"))
    ggsave(paste0(folder_output,"/", names_sq_error[r],".png" ), height = 6, width = 8)

  }


}

df_correct_final <-dplyr::distinct(df_correct)

df_collapsing <- df_correct_final[, c("Scenario", "Collaping")]
df_collapsing$Model <- "Collapsing"

df_RJMCMC <- df_correct_final[, c("Scenario", "RJMCMC")]
df_RJMCMC$Model <- "RJMCMC"

names(df_RJMCMC) <- names(df_collapsing) <- c("Scenario", "Prob. correct", "Model")

df_prob_correct <- rbind(df_collapsing, df_RJMCMC)

df_prob_correct$Scenario <- stringr::str_remove_all(df_prob_correct$Scenario, "Censoring 0; ")


scen_eval_name <- c()
for(i in 1:length(rate_list)){
    for(j in 1:length(sample_vec)){
      scen_eval <- paste0("Size ", sample_vec[j]  )
      scen_eval_name <-  c(scen_eval_name, paste0(scen_eval,paste0("; Hazard c(", paste0(rate_list[[i]], collapse = ","),")")))
  }
}

df_prob_correct$Scenario <- factor(df_prob_correct$Scenario , levels=scen_eval_name)


df_prob_correct<- df_prob_correct %>%
  arrange(match(Scenario , scen_eval_name))

ggplot(df_prob_correct[1:18,], aes(x=Scenario, y=`Prob. correct`, colour=Model)) +
  geom_point()+
  ylim(c(0,1))+
  theme(axis.text.x = element_text(angle = 45, hjust=1))+
  ggtitle("No Change-point Scenarios")
ggsave("~/Simulation Study 2022/Chapple Comparison/No Change-point Scenarios.png", height = 6, width = 10)


ggplot(df_prob_correct[19:42,], aes(x=Scenario, y=`Prob. correct`, colour=Model)) +
  geom_point()+
  ylim(c(0,1))+
  theme(axis.text.x = element_text(angle = 45, hjust=1))+
  ggtitle("One Change-point Scenarios")
ggsave("~/Simulation Study 2022/Chapple Comparison/One Change-point Scenarios.png", height = 6, width = 10)

ggplot(df_prob_correct[43:66,], aes(x=Scenario, y=`Prob. correct`, colour=Model)) +
  geom_point()+
  ylim(c(0,1))+
  theme(axis.text.x = element_text(angle = 45, hjust=1))+
  ggtitle("Two Change-point Scenarios")
ggsave("~/Simulation Study 2022/Chapple Comparison/Two Change-point Scenarios.png", height = 6, width = 10)


# BACKUP----

## 1 Validate Survival Functions ----
# Ensuring both functions give the same survival


### 1.1 Multiple Change-points ----
ncp <- 2
cp_sim <-runif(ncp)

lambda_df <- matrix(runif(ncp+1), nrow = 1)
changepoint_df <- matrix(cp_sim[order(cp_sim)], nrow = 1)

object <- list(lambda = lambda_df,
               changepoint = changepoint_df)

abs(as.numeric(get_Surv(object, time = seq(0,10, by = 0.5))) -
GetSurvPEH(x =seq(0,10, by= 0.5),s = c(0,changepoint_df,100), lam =  log(lambda_df), J  = length(cp_sim))) < 0.0001

### 1.2 No Change-points ----

lambda_df <- matrix(runif(ncp+1), nrow = 1)[,1, drop = F]
changepoint_df <- matrix(NA, nrow = 1)

object <- list(lambda = lambda_df,
               changepoint = changepoint_df)

as.numeric(get_Surv(object, time = seq(0,10, by = 0.5)))
pexp(seq(0,10, by = 0.5), rate = lambda_df, lower.tail = F)

## 2 Calculate Time Horizon ----
# Calculate appropriate get time-horizon for the simulations
# Ensuring that less than 1% of population is surviving

time_horizon <- 25
for(i in 1:length(rate_list)){
  if(length(rate_list[[i]]) ==1){
    t_change_curr <- NA
    surv_time <- pexp(time_horizon, rate =rate_list[[i]], lower.tail = F)
  }else{
    t_change_curr  <- c(0.5,1)[1:(length(rate_list[[i]])-1)]
    surv_time <-  GetSurvPEH(x =time_horizon,s = c(0,t_change_curr,100), lam =  log(rate_list[[i]]), J  = length(t_change_curr))
  }
  print(surv_time<0.01)

}


## 3 Simulating Events ----

#Approach used by Chapple can give a situation where many of the censorsed observations
# have time 0 otherwise they would have negative survival
# By selecting the correct pooling (so that observations accure togehter )
# and a high accrual rate this can be avoided, however, I prefer my original approach,
# as you fix the "length" of the study. The disadvantage with my approach is that you
# have censoring throughout the study and at end of follow up and therefore the percentage
# censored refers to the proportion who are censored throughout the observed study time.

PCT <- .50
n <- 200

Rate_piece = c(0.75, 0.2,0.75)
Rate_ACC <-mean(Rate_piece)*20/PCT
chng = c(0.5,1)
B = 1000
factor_pool = n/(20*PCT)
censor_time <- GetCensorTimePIECE(PCT = PCT,
                                  n = n,
                                  Rate_ACC =Rate_ACC,
                                  Rate_piece = Rate_piece,
                                  chng = chng,
                                  B = B,
                                  factor_pool)

nboots <- 5000
events <- rep(NA, nboots)
for(i in 1:nboots){

  ACC <- cumsum(rexp(n/factor_pool, Rate_ACC))
  ACC <- sample(ACC, n, replace = T)

  TIMES<- PiecewiseChangepoint:::rpwexp(n, lam  =Rate_piece, s = chng)
  df_times <- data.frame(TIMES, ACC)

  df_final <- df_times %>% mutate(TIME_EVENT = ifelse(TIMES+ACC > censor_time,
                                                      censor_time - ACC, TIMES),
                                  status = ifelse(TIMES+ACC > censor_time,
                                                  0,1))

  if(any(df_final$TIME_EVENT <0)){
    stop("Negative Survival times, need a larger accural rate")
  }
  events[i] <- sum(df_final$status)
}

mean(events)/n
hist(events)
plot(survfit(Surv(TIME_EVENT, status)~1, data = df_final))


## 4 Integration of the Survival function ----
# I'm not sure how Chapple calculates the Area under the curve, however,
# if we use the rectangular box method it is less accurate than the function
# Shown by testing versus the analytic value of the Restricted Mean Survival


rate <- c(0.75,0.2,0.75)
t_change <- c(0.5,1)
#sfsmisc::integrate.xy

#integral cal exp(-lambdax)

time_seq <- seq(0, time_horizon, length.out = 1500)
dtheta <- mean(diff(time_seq))

Survs <- GetSurvPEH(x =time_seq,s = c(0,t_change,100), lam =  log(rate), J  = length(t_change))
Survs[1] <- 1
#TRUE
#exp(-lambdax)
t_change_diff <- diff(c(0, t_change, time_horizon))

cum_haz <- 0
cum_haz_seq <- c(0,t_change_diff*rate)
St_thres <- exp(-cumsum(cum_haz_seq))
#don't need the last St_thres
int_surv<- rep(NA, length(rate))
for(i in 1:length(rate)){
  int_surv[i] <- (exp(-0*rate[i])/rate[i] - exp(-t_change_diff[i]*rate[i])/rate[i])*St_thres[i]
}

sum(int_surv)
sfsmisc::integrate.xy(x =time_seq, fx = Survs)
sum(Survs*dtheta)
Philip-Cooney/PiecewiseChangepoint documentation built on Sept. 10, 2023, 9:49 p.m.