R/bootstrap_and_leave_one_out.R

Defines functions loo_ebmstate boot_ebmstate boot_coxrfx cumhazCIs_for_target_transition CIs_for_target_state extract_function boot_probtrans

Documented in boot_coxrfx boot_ebmstate boot_probtrans CIs_for_target_state cumhazCIs_for_target_transition extract_function loo_ebmstate

#' Bootstrap confidence intervals for transition probabilities
#' 
#' Generates 95\% highest density bootstrap interval estimates for transition probabilities computed using \code{probtrans_ebmstate} (clock-reset version).
#' 
#' @param coxrfx_fits_boot The list of CoxRFX objects obtained by running \code{boot_coxrfx}.
#' @param patient_data (Single) patient data in `long format`, possibly with `expanded` covariates
#' (as obtained by running \code{mstate::expand.covs}).
#' @param tmat Transition matrix for the multi-state model, as obtained by running \code{mstate::transMat}
#' @param initial_state The initial state for which transition probability estimates should be computed
#' @param max_time The maximum time for which estimates should be computed
#' @return Interval estimates for transition probabilities. 
#' @author Rui Costa
#' @seealso \code{\link{probtrans_ebmstate}}; \code{\link{boot_coxrfx}}; 
#' \code{\link[mstate]{transMat}}; \code{\link[mstate]{expand.covs}}


boot_probtrans<-function(coxrfx_fits_boot,patient_data,tmat,initial_state,max_time){
  stopifnot(
    "argument 'coxrfx_fits_boot' must be a list" = is.list(coxrfx_fits_boot),
    "argument 'patient_data' must be a data frame" = is.data.frame(patient_data),
    "argument 'tmat' must be a matrix" = is.matrix(tmat),
    "argument 'initial_state' must be an object of type 'character'" = is.character(initial_state),
    "argument 'max_time' must be numeric" = is.numeric(max_time)
  )
  msfit_objects_boot<-vector("list",length(coxrfx_fits_boot))
  probtrans_objects_boot<-vector("list",length(coxrfx_fits_boot))
  for(i in 1:length(coxrfx_fits_boot)){
    print(i)
    covariate_df<-as.data.frame(coxrfx_fits_boot[[i]]$Z)
    covariate_df$strata<-coxrfx_fits_boot[[i]]$strata
    mstate_data_expanded.boot<-list()
    mstate_data_expanded.boot$time<-coxrfx_fits_boot[[i]]$surv[,1]
    mstate_data_expanded.boot$status<-coxrfx_fits_boot[[i]]$surv[,2]
    patient_data2<-patient_data[names(patient_data)%in%names(covariate_df)]
    patient_data2$strata<-patient_data$strata
    
    #environment(coxrfx_fits_boot[[1]]$formula)$covariate_df<-covariate_df
    
    msfit_objects_boot[[i]]<-msfit_generic(coxrfx_fits_boot[[i]],patient_data2,trans=tmat)
    probtrans_objects_boot[[i]]<-probtrans_ebmstate(initial_state,msfit_objects_boot[[i]],"clockreset")[[1]]
    probtrans_objects_boot[[i]]<-probtrans_objects_boot[[i]][sapply(seq(from=0,to=max_time,length.out = 400),function(x) which.min(abs(probtrans_objects_boot[[i]]$time-x))),]
    
  }

  probtrans_CIs<-lapply(colnames(tmat),CIs_for_target_state,probtrans_objects_boot=probtrans_objects_boot)
  names(probtrans_CIs)<-colnames(tmat)
  return(list(probtrans_CIs=probtrans_CIs,probtrans_objects_boot=probtrans_objects_boot, msfit_objects_boot=msfit_objects_boot))
}

#' Ancillary function to \code{boot_ebmstate}.
#' 
#' Extracts the bootstrap estimates of transition probabilities for
#' target state `tstate` from a list
#' with bootstrap estimates of transition probabilities into multiple states.
#' This function is not meant to be called by the user.
#' 
#' @param list_object A list in which each individual element is a single
#' bootstrap estimate of the probability of transition
#' into different states.
#' @param tstate The state whose bootstrap estimates of transition probabilities we wish to extract
#' from \code{list_object}. 
#' @return Bootstrap estimates of transition probabilities into target state `tstate`. 
#' @details This function is an ancillary function of \code{CIs_for_target_state}, which
#' in turn is an ancillary function of \code{boot_ebmstate}.
#' @author Rui Costa
#' @seealso \code{\link{CIs_for_target_state}}; \code{\link{boot_ebmstate}} 

extract_function<-function(list_object,tstate){
  as.vector(list_object[tstate])
}

#' Ancillary function of \code{boot_ebmstate}.
#' 
#' Computes 95\% highest density bootstrap confidence 
#' intervals for the transition probabilities into \code{target_state}, 
#' given a list object with boostrap estimates of transition probabilities into multiple states. This 
#' function is not meant to be called by the user.
#' 
#' @param target_state The target state for whose transition probabilties the confidence intervals
#' are computed.
#' @param probtrans_objects_boot A list containing bootstrap estimates of transition probabilities.
#' @return 95\% highest density bootstrap confidence intervals for the transition
#' probabilities into \code{target_state}. 
#' @details Uses function \code{extract_function}.
#' @author Rui Costa
#' @seealso \code{\link{boot_ebmstate}}; \code{\link{extract_function}}.

CIs_for_target_state<-function(target_state,probtrans_objects_boot){
  target_state_boot_samples<-as.data.frame(sapply(probtrans_objects_boot, extract_function,tstate=target_state))
  apply(target_state_boot_samples,1,hdi,credMass=0.95)
}

#' Ancillary function of \code{boot_ebmstate}.
#' 
#' Computes 95\% highest density, non-parametric bootstrap confidence 
#' intervals for the cumulative hazard rate functions, 
#' given a list of \code{msfit} objects with boostrap estimates of cumulative hazard rate functions
#' for multiple transitions. This 
#' function is not meant to be called by the user.
#' 
#' @param transition The transition for which transition confidence intervals
#' are computed.
#' @param  msfit_objects_boot List of \code{msfit} objects with boostrap estimates 
#' of cumulative hazard rate functions
#' for multiple transitions.
#' @return 95\% highest density, non-parametric bootstrap confidence intervals for the cumulative
#' hazard rate functions.
#' @author Rui Costa
#' @seealso \code{\link{boot_ebmstate}}.


cumhazCIs_for_target_transition<-function(transition,msfit_objects_boot){
  unique_time_points<-sort(unique(unlist(sapply(msfit_objects_boot,function(x) unique(x[[1]][,"time"])))))
  cumhaz_fun<-function(msfit_object_boot,unique_time_point,transition){
    msfit_for_target_trans<-msfit_object_boot[[1]][msfit_object_boot[[1]][,"trans"]==transition,]
    msfit_for_target_trans[which.max(msfit_for_target_trans[,"time"]>=unique_time_point),"Haz"]
  }
  obj<-sapply(msfit_objects_boot,function(x) sapply(unique_time_points,cumhaz_fun,msfit_object_boot=x,transition=transition))
  output<-apply(obj,1,hdi,credMass=0.95)
  colnames(output)<-unique_time_points
  output
}


#' Bootstrap confidence intervals for regression coefficients
#' 
#' This function computes 95\% highest density bootstrap confidence intervals (non-parametric) for the regression coefficients estimated by CoxRFX.
#' 
#' @param mstate_data_expanded Data in `long format`, possibly with `expanded` covariates (as obtained by running mstate::expand.covs).
#' @param which_group A character vector with the same meaning as the `groups` argument of the function \code{CoxRFX} but named (with the covariate names).
#' @param min_nr_samples The confidence interval of any coefficient is based on a number of bootstrap samples at least as high as this argument. See details.
#' @param output Determines the sort of output. See value.
#' @param ... Further arguments to the CoxRFX function.
#' @return For each regression coefficient, the confidence intervals and the number of bootstrap samples on which they are based, if the `output` argument is equal to `CIs`; if `output` is equal to `CIs_and_coxrfx_fits`, also the \code{CoxRFX} objects for each bootstrap sample.  
#' @details In a given bootstrap sample there might not be enough information to generate 
#' estimates for all coefficients. If a covariate has little or no variation in a given bootstrap sample, 
#' no estimate of its coefficient will be computed. The present function will
#' keep taking bootstrap samples until every coefficient has been estimated
#' at least \code{min_nr_samples} times.
#' @author Rui Costa

boot_coxrfx<-function(mstate_data_expanded,which_group,min_nr_samples=100,output="CIs",...){
  coxrfx_fits_boot<-vector("list")
  rownames(mstate_data_expanded)<-1:nrow(mstate_data_expanded)
  boot_matrix<-matrix(nrow=0,ncol = sum(!names(mstate_data_expanded)%in%c("id","from","to","trans","Tstart","Tstop","time","status","strata","type")),dimnames = list(NULL,names(mstate_data_expanded)[!names(mstate_data_expanded)%in%c("id","from","to","trans","Tstart","Tstop","time","status","strata","type")]))
  j<-1
  repeat{
    boot_samples <- vector('list',length(unique(mstate_data_expanded$strata)))
    for (i in 1:length(boot_samples)){
        trans_rownames <- rownames(mstate_data_expanded[mstate_data_expanded$trans==i,])
        boot_samples[[i]] <- eval(call('sample', trans_rownames, replace = TRUE))
    }
    boot_samples <- unlist(boot_samples)
    # boot_samples_trans_1<-sample(rownames(mstate_data_expanded[mstate_data_expanded$trans==1,]),replace = TRUE)
    # boot_samples_trans_2<-sample(rownames(mstate_data_expanded[mstate_data_expanded$trans==2,]),replace = TRUE)
    # boot_samples_trans_3<-sample(rownames(mstate_data_expanded[mstate_data_expanded$trans==3,]),replace = TRUE)
    # boot_samples<-c(boot_samples_trans_1,boot_samples_trans_2,boot_samples_trans_3) 
    
    mstate_data_expanded.boot<-mstate_data_expanded[boot_samples,]
    
    ##exclude covariates without variance and binary covariates with less than 0.05 cases
    # vars_to_exclude<-vector("list",3)
    # for(i in 1:3){
    #   string_split<-strsplit(names(mstate_data_expanded.boot),"[.]")
    #   var_indices<-sapply(string_split,function(x) x[length(x)]==as.character(i))
    #   dummy_dataset<-mstate_data_expanded.boot[mstate_data_expanded.boot$trans==i,var_indices]
    #   which_have_variance<-apply(dummy_dataset, 2, function(x) var(x)>0)
    #   vars_to_exclude[[i]]<-names(dummy_dataset)[!which_have_variance]
    #   dummy_dataset<-dummy_dataset[which_have_variance]
    #   non_categorical_vars<-paste0(c("age_log","hb","anc_log","plt_log","bm_blasts_logit","ring_sideroblasts_logit","ipss","date"),paste0(".",as.character(i)))
    #   percentage_of_ones<-apply(dummy_dataset[!names(dummy_dataset)%in%non_categorical_vars], 2, function(x) sum(x)/length(x))
    #   which_less_than_five_percent<-which(percentage_of_ones<0.05)
    #   vars_to_exclude[[i]]<-c(vars_to_exclude[[i]],names(percentage_of_ones)[which_less_than_five_percent])
    # }
    # mstate_data_expanded.boot<-mstate_data_expanded.boot[!names(mstate_data_expanded.boot)%in%unlist(vars_to_exclude)]
    # 
    
    covariate_df<-mstate_data_expanded.boot[!names(mstate_data_expanded.boot)%in%c("id","from","to","trans","Tstart","Tstop","time","status","type")]
    groups2<-which_group[names(covariate_df)[names(covariate_df)!="strata"]]
    
    coxrfx_fits_boot[[j]]<-CoxRFX(covariate_df,Surv(mstate_data_expanded.boot$time,mstate_data_expanded.boot$status),groups =groups2,... )
    
    if(coxrfx_fits_boot[[j]]$iter[1]!=as.list(coxrfx_fits_boot[[j]]$call)$max.iter & sum(is.na(coxrfx_fits_boot[[j]]$coefficients))==0){
      boot_matrix<-rbind(boot_matrix,rep(NA,ncol(boot_matrix)))
      boot_matrix[j,names(coxrfx_fits_boot[[j]]$coefficients)]<-coxrfx_fits_boot[[j]]$coefficients
      print(min(apply(boot_matrix, 2, function(x) sum(!is.na(x)))))
      j<-j+1
    } 
    if(min(apply(boot_matrix, 2, function(x) sum(!is.na(x))))>=min_nr_samples) break
    
  }
  
  CIs<-apply(boot_matrix,2,hdi,credMass=0.95)
  CIs<-rbind(CIs,apply(boot_matrix, 2, function(x) sum(!is.na(x))))
  dimnames(CIs)[[1]][3]<-"n_samples"
  if(output=="CIs_and_coxrfx_fits"){
    return(list(CIs=CIs,coxrfx_fits_boot=coxrfx_fits_boot))
  }else if(output=="CIs"){
    return(CIs)
  }
}

#' Bootstrap samples and bootstrap interval estimates
#' 
#' This function computes bootstrap samples of regression coefficients,
#' cumulative hazard functions, and transition probability functions.
#' 
#' @param mstate_data A data frame with outcome and covariate data in long format.
#' @param which_group A character vector with the same meaning as the `groups` argument of the function \code{CoxRFX} but named (with the covariate names).
#' @param min_nr_samples The confidence interval of any coefficient is based on a number of bootstrap samples at least as high as this argument. See details.
#' @param patient_data The covariate data for which the estimates of cumulative hazards and transition probabilities are computed. 
#' Must contain: one row of data for each transition, all the covariate columns in the fitted model, and also the 'strata' column. 
#' @param initial_state The initial state for which transition probability estimates should be computed
#' @param tmat Transition matrix for the multi-state model, as obtained by running \code{mstate::transMat}
#' @param backup_file Path to file. Objects generated while the present function is running are stored in this file. 
#' This avoids losing all estimates if and when the algorithm breaks down. See argument \code{input_file}. 
#' @param input_file Path to \code{backup_file} (see argument \code{backup_file}). If this argument is given, all other arguments should be \code{NULL}.
#' @param time_model The model of time-dependency: either 'clockforward' or 'clockreset'.
#' @param coxrfx_args Named list with arguments to the \code{CoxRFX} function other than \code{Z},\code{surv} and \code{groups}.
#' @param msfit_args Named list with arguments to the \code{msfit_generic.coxrfx} function other than \code{object},\code{newdata} and \code{trans}.
#' @param probtrans_args Named list with arguments to the \code{probtrans_ebmstate} function other than \code{initia_state},\code{cumhaz} and \code{model}.
#' @return A list with: 95\% bootstrap intervals for each regression coefficient and for transition probabilities; 
#' bootstrap samples of regression coefficients, cumulative hazards and transition probabilities.
#' @details In a given bootstrap sample there might not be enough information to generate 
#' estimates for all coefficients. If a covariate has little or no variation in a given bootstrap sample, 
#' no estimate of its coefficient will be computed. The present function will
#' keep taking bootstrap samples until every coefficient has been estimated
#' at least \code{min_nr_samples} times. \code{covariate_df} should only contain the covariates
#' of the model one wishes to estimate.
#' @author Rui Costa
#' @export


boot_ebmstate <- function(mstate_data = NULL,
                          which_group = NULL,
                          min_nr_samples = NULL, patient_data = NULL,
                          initial_state = NULL, tmat = NULL, time_model = NULL,
                          backup_file = NULL, input_file = NULL,
                          coxrfx_args = NULL, msfit_args = NULL,
                          probtrans_args = NULL) {
  stopifnot(
    "argument 'mstate_data' must be a data frame" = is.data.frame(mstate_data),
    "argument 'which_group' must be of type 'character'" = is.character(which_group),
    "argument 'min_nr_samples' must be numeric" = is.numeric(min_nr_samples),
    "argument 'min_nr_samples' must be a positive integer" 
    = min_nr_samples > 0 & min_nr_samples/ceiling(min_nr_samples) == 1, 
    "argument 'patient_data' must be a data frame " = is.data.frame(patient_data),
    "argument 'initial_state' must be of type 'character'" = is.character(initial_state),
    "argument 'tmat' must be a matrix" = is.matrix(tmat),
    "argument 'time_model' must be either 'clockreset' or 'clockforward'" 
    = time_model %in% c("clockreset","clockforward"),
    "argument 'backup_file' must be of type 'character'" 
      = (is.null(backup_file) | is.character(backup_file)),
    "argument 'input_file' must be of type 'character'" 
      = (is.null(input_file) | is.character(input_file)),
    "argument 'coxrfx_args' must be a list" = (is.null(coxrfx_args) | is.list(coxrfx_args)),
    "argument 'msfit_args' must be a list" = (is.null(msfit_args) | is.list(msfit_args)),
    "argument 'probtrans_args' must be a list" = (is.null(probtrans_args) | is.list(probtrans_args))
  )
  list2env(coxrfx_args, envir = environment())
  if (!is.null(input_file)) {
    load(input_file)
  } else {
    coxrfx_fits_boot <- vector("list")
    msfit_objects_boot <- vector("list")
    probtrans_objects_boot <- vector("list")
    rownames(mstate_data) <- 1:nrow(mstate_data)
    names_to_exclude <- c("id", "from", "to", "trans", "Tstart", "Tstop",
                          "strata", "time", "status", "type")
    boot_matrix <- matrix(
      nrow = 0,
      ncol = sum(!names(mstate_data)
                 %in% names_to_exclude),
      dimnames = list(NULL, names(mstate_data)
                      [!names(mstate_data) %in% names_to_exclude])
    )
    j <- 1
  }
  tol <- unlist(mget("tol", ifnotfound = list(function(tol) 0.001)))
  max.iter <- unlist(mget("max.iter", ifnotfound = list(function(max.iter) 50)))
  sigma0 <- unlist(mget("sigma0", ifnotfound = list(function(sigma0) 0.1)))
  sigma.hat <- unlist(mget("sigma.hat", ifnotfound = list(function(sigma.hat) "df"))) # nolint: line_length_linter.
  verbose <- unlist(mget("verbose", ifnotfound = list(function(verbose) FALSE)))
  repeat {
    boot_samples_trans_1 <- sample(rownames(mstate_data[mstate_data$trans == 1, ]), # nolint: line_length_linter.
                                   replace = TRUE)
    boot_samples_trans_2 <- sample(rownames(mstate_data[mstate_data$trans == 2, ]), # nolint: line_length_linter.
                                   replace = TRUE)
    boot_samples_trans_3 <- sample(rownames(mstate_data[mstate_data$trans == 3, ]), # nolint: line_length_linter.
                                   replace = TRUE)
    boot_samples <- c(boot_samples_trans_1,boot_samples_trans_2,boot_samples_trans_3)  # nolint: line_length_linter.
    
    mstate_data.boot <- mstate_data[boot_samples, ]
    covariate_df <- mstate_data.boot[
      !names(mstate_data.boot) %in% c("id", "from", "to", "Tstart",
                                               "Tstop", "time", "status",
                                               "type")
    ]
    groups2 <- which_group[
      names(covariate_df)[!names(covariate_df) %in% c("strata", "trans")]
    ]
    if (time_model == "clockreset") {
      surv_object <- Surv(mstate_data.boot$time,
                          mstate_data.boot$status)
    } else if (time_model == "clockforward") {
      surv_object <- Surv(mstate_data.boot$Tstart,
                          mstate_data.boot$Tstop,
                          mstate_data.boot$status) 
    }
    which.mu <- unlist(
      mget("which.mu", ifnotfound = list(function(which.mu) unique(groups2)))
    )
    coxrfx_fits_boot[[j]] <- CoxRFX(covariate_df, surv_object, groups2,
                                    which.mu = which.mu, tol = tol, 
                                    max.iter = max.iter, sigma0 = sigma0,
                                    sigma.hat = sigma.hat, verbose = verbose,
                                    coxrfx_args)
    #coxrfx_fits_boot[[j]]<-do.call("CoxRFX",c(list(Z=covariate_df,surv=surv_object,groups=groups2),coxrfx_args))
    
    if (sum(is.na(coxrfx_fits_boot[[j]]$coefficients)) == 0) {
      boot_matrix <- rbind(boot_matrix, rep(NA, ncol(boot_matrix)))
      boot_matrix[
        j,
        names(coxrfx_fits_boot[[j]]$coefficients)
      ] <- coxrfx_fits_boot[[j]]$coefficients
      msfit_objects_boot[[j]] <- do.call(
        "msfit_generic",
        c(list(
          object = coxrfx_fits_boot[[j]],
          newdata = patient_data,
          trans = tmat
        ), msfit_args)
      )
      probtrans_objects_boot[[j]] <- do.call(
        "probtrans_ebmstate",
        c(list(
          initial_state = initial_state,
          cumhaz = msfit_objects_boot[[j]],
          model = time_model
        ), probtrans_args)
      )[[1]]
      print(min(apply(boot_matrix, 2, function(x) sum(!is.na(x)))))
      if(j %% 5 == 0 & !is.null(backup_file)) {
        save(coxrfx_fits_boot, probtrans_objects_boot,
             msfit_objects_boot, boot_matrix, j, file = backup_file)
      }
      j <- j + 1
    }
    if (
      min(apply(boot_matrix, 2, function(x) sum(!is.na(x)))) >= min_nr_samples
    ) break
  }
  CIs <- apply(boot_matrix, 2, hdi, credMass = 0.95)
  CIs <- rbind(CIs, apply(boot_matrix, 2, function(x) sum(!is.na(x))))
  dimnames(CIs)[[1]][3] <- "n_samples"
  
  probtrans_CIs <- lapply(colnames(tmat), CIs_for_target_state,
                          probtrans_objects_boot = probtrans_objects_boot)
  names(probtrans_CIs) <- colnames(tmat)
  
  cumhaz_CIs <- lapply(sort(unique(mstate_data$trans)),
                       cumhazCIs_for_target_transition,
                       msfit_objects_boot = msfit_objects_boot)

  return(list(coefficients_CIs = CIs,
              coxrfx_fits_boot = coxrfx_fits_boot,
              probtrans_CIs = probtrans_CIs,
              probtrans_objects_boot = probtrans_objects_boot, 
              msfit_objects_boot = msfit_objects_boot,
              patient_data = patient_data,
              cumhaz_CIs = cumhaz_CIs))
}

#' Leave-one-out estimation
#' 
#' This function computes leave-one-out estimation of regression coefficients,
#' cumulative hazard functions, and transition probability functions.
#' 
#' @param mstate_data Data in `long format`.
#' @param mstate_data_expanded Data in `long format`, possibly with `expanded` covariates (as obtained by running mstate::expand.covs).
#' @param which_group A character vector with the same meaning as the `groups` argument of the function \code{CoxRFX} but named (with the covariate names).
#' @param patient_IDs The IDs of the patients whose cumulative hazards and transition probabilities one wishes to estimate.
#' @param initial_state The initial state for which transition probability estimates should be computed
#' @param tmat Transition matrix for the multi-state model, as obtained by running \code{mstate::transMat}
#' @param backup_file Path to file. Objects generated while the present function is running are stored in this file. 
#' This avoids losing all estimates if and when the algorithm breaks down. See argument \code{input_file}. 
#' @param input_file Path to \code{backup_file} (see argument \code{backup_file}). If this argument is given, all other arguments should be \code{NULL}.
#' @param time_model The model of time-dependency: either 'clockforward' or 'clockreset'.
#' @param coxrfx_args Named list with arguments to the \code{CoxRFX} function other than \code{Z},\code{surv} and \code{groups}.
#' @param msfit_args Named list with arguments to the \code{msfit_generic.coxrfx} function other than \code{object},\code{newdata} and \code{trans}.
#' @param probtrans_args Named list with arguments to the \code{probtrans_ebmstate} function other than \code{initia_state},\code{cumhaz} and \code{model}.
#' @return A list with: 95\% bootstrap intervals for each regression coefficient and for transition probabilities; 
#' bootstrap samples of regression coefficients, cumulative hazards and transition probabilities.
#' @details In a given bootstrap sample there might not be enough information to generate 
#' estimates for all coefficients. If a covariate has little or no variation in a given bootstrap sample, 
#' no estimate of its coefficient will be computed. The present function will
#' keep taking bootstrap samples until every coefficient has been estimated
#' at least \code{min_nr_samples} times.
#' @author Rui Costa
#' @export


loo_ebmstate<-function(mstate_data,mstate_data_expanded,which_group,
                     patient_IDs,initial_state,tmat,time_model,
                     backup_file=NULL,input_file=NULL,coxrfx_args=list(),
                     msfit_args=NULL,probtrans_args=NULL){
stopifnot(
  "argument 'mstate_data' must be a data frame" = is.data.frame(mstate_data),
  "argument 'mstate_data_expanded' must be a data frame" = is.data.frame(mstate_data_expanded),
  "argument 'which_group' must be of type 'character'" = is.character(which_group),
  "argument 'patient_IDs' must be numeric" = is.numeric(patient_IDs),
  "argument 'initial_state' must be of type 'character'" = is.character(initial_state),
  "argument 'tmat' must be a matrix" = is.matrix(tmat),
  "argument 'time_model' must be either 'clockreset' or 'clockforward'" 
    = time_model %in% c("clockreset","clockforward"),
  "argument 'backup_file' must be of type 'character'" 
    = (is.null(backup_file) | is.character(backup_file)),
  "argument 'input_file' must be of type 'character'" 
    = (is.null(input_file) | is.character(input_file)),
  "argument 'coxrfx_args' must be a list" = (is.null(coxrfx_args) | is.list(coxrfx_args)),
  "argument 'msfit_args' must be a list" = (is.null(msfit_args) | is.list(msfit_args)),
  "argument 'probtrans_args' must be a list" = (is.null(probtrans_args) | is.list(probtrans_args))
  )
  list2env(coxrfx_args,envir = environment())
  if(!is.null(input_file)){
    load(input_file)
    indices<-j:length(patient_IDs)
  }else{
    coxrfx_fits_loo<-vector("list")
    msfit_objects_loo<-vector("list")
    probtrans_objects_loo<-vector("list")
    indices<-1:length(patient_IDs)
  }
  tol<-unlist(mget("tol",ifnotfound = list(function(tol) 0.001)))
  max.iter<- unlist(mget("max.iter",ifnotfound = list(function(max.iter) 50)))
  sigma0<- unlist(mget("sigma0",ifnotfound = list(function(sigma0) 0.1)))
  sigma.hat<- unlist(mget("sigma.hat",ifnotfound = list(function(sigma.hat) "df")))
  verbose<- unlist(mget("verbose",ifnotfound = list(function(verbose) FALSE)))
  
  for(j in indices){ 
    mstate_data_expanded_loo<-mstate_data_expanded[mstate_data_expanded$id!=patient_IDs[j],]
    covariate_df<-mstate_data_expanded_loo[!names(mstate_data_expanded_loo)%in%c("id","from","to","Tstart","Tstop","time","status","type")]
    groups2<-which_group[names(covariate_df)[!names(covariate_df)%in%c("strata","trans")]]
    if(time_model=="clockreset"){
      surv_object<-Surv(mstate_data_expanded_loo$time,mstate_data_expanded_loo$status) 
    }else if(time_model=="clockforward"){
      surv_object<-Surv(mstate_data_expanded_loo$Tstart,mstate_data_expanded_loo$Tstop,mstate_data_expanded_loo$status) 
    }
    which.mu<-unlist(mget("which.mu",ifnotfound = list(function(which.mu) unique(groups2))))
    coxrfx_fits_loo[[j]]<-CoxRFX(covariate_df,surv_object,groups2,which.mu =which.mu,
                                 tol = tol,
                                 max.iter = max.iter,
                                 sigma0 = sigma0,
                                 sigma.hat = sigma.hat,
                                 verbose = verbose,coxrfx_args)
    if(sum(is.na(coxrfx_fits_loo[[j]]$coefficients))==0){
      patient_data<-mstate_data[mstate_data$id==patient_IDs[j],,drop=FALSE][1,][rep(1,length(unique(mstate_data$trans))),]
      patient_data$trans<-1:length(unique(mstate_data$trans))
      patient_data<-expand.covs(patient_data,
                                covs = names(patient_data)[!names(patient_data)%in%c("id","from","to","trans","strata","Tstart","Tstop","time","status","type")])
      patient_data<-patient_data[names(mstate_data_expanded)]
      patient_data$strata<-unique(mstate_data[c("trans","strata")])[,2]
      msfit_objects_loo[[j]]<-do.call("msfit_generic",c(list(object=coxrfx_fits_loo[[j]],newdata=patient_data,trans=tmat),msfit_args))
      probtrans_objects_loo[[j]]<-do.call("probtrans_ebmstate",c(list(initial_state=initial_state,cumhaz=msfit_objects_loo[[j]],model=time_model),probtrans_args))
      if(j %%5==0 & !is.null(backup_file)){
        save(patient_IDs,coxrfx_fits_loo,msfit_objects_loo,probtrans_objects_loo,j,file=backup_file)
      }
      print(j)
    } 
  }
  
  return(list(coxrfx_fits_loo=coxrfx_fits_loo,
              probtrans_objects_loo=probtrans_objects_loo, 
              msfit_objects_loo=msfit_objects_loo,
              patient_IDs=patient_IDs))
}

Try the ebmstate package in your browser

Any scripts or data that you put into this service are public.

ebmstate documentation built on July 4, 2024, 9:07 a.m.