Nothing
#' Format Simulated Dataset for Hazard Ratio Calculation
#'
#' This internal function modifies the pooled-over-time dataset generated by the \code{\link{simulate}} function
#' to a format suitable for hazard ratio calculation.
#' @param i Integer specifying the index \code{intcomp}.
#' @param intcomp List of two numbers indicating a pair of interventions to be compared by a hazard ratio.
#' @param time_name Character string specifying the name of the time variable in \code{pools}.
#' @param pools Data table containing the simulated values for the covariates, outcome
#' probabilities, competing event probabilities, outcomes, and competing
#' events.
#' @return Data table consisting of failure time information for each individual or the last time
#' point information (for individuals who do not experience an event).
#' @keywords internal
#' @import data.table
hr_helper <- function(i, intcomp, time_name, pools){
pool <- pools[[intcomp[[i]]]][, .SD, .SDcols = c("id", time_name, "Y", "D")]
event_ids <- unique(pool[pool$Y == 1 | pool$D == 1]$id)
# Take failure time for each ID
new_pool <- pool[pool$Y == 1 | pool$D == 1][, .SD[1], id]
# Bind to last time point of each ID for individuals who do not experience an event
# Resulting datatable consists of information at censor time for each individual
new_pool <- rbind(new_pool, pool[!id %in% event_ids][, .SD[.N], id])
setorder(new_pool, id)
# Encode no event, main event, and competing event as 0, 1, and 2, respectively
set(new_pool, j = 'event', value = pmax(new_pool$Y, new_pool$D, na.rm = TRUE))
set(new_pool, j = 'Ycomp', value = new_pool$event)
new_pool[new_pool$D == 1 & new_pool$event == 1, "Ycomp" := 2]
set(new_pool, j = 'regime', value = i - 1)
return (new_pool)
}
#' General Error Catching
#'
#' This internal function catches various potential errors in the user input in \code{\link{gformula_survival}}, \code{\link{gformula_continuous_eof}}, and \code{\link{gformula_binary_eof}}.
#'
#' @param id Character string specifying the name of the ID variable in \code{obs_data}.
#' @param nsimul Number of subjects for whom to simulate data.
#' @param intvars List, whose elements are vectors of character strings. The kth vector in \code{intvars} specifies the name(s) of the variable(s) to be intervened
#' on in each round of the simulation under the kth intervention in \code{interventions}.
#' @param interventions List, whose elements are lists of vectors. Each list in \code{interventions} specifies a unique intervention on the relevant variable(s) in \code{intvars}. Each vector contains a function
#' implementing a particular intervention on a single variable, optionally
#' followed by one or more "intervention values" (i.e.,
#' integers used to specify the treatment regime).
#' @param int_times List, whose elements are lists of vectors. The kth list in \code{int_times} corresponds to the kth intervention in \code{interventions}. Each vector specifies the time points in which the relevant intervention is applied on the corresponding variable in \code{intvars}.
#' When an intervention is not applied, the simulated natural course value is used. By default, this argument is set so that all interventions are applied in all time points.
#' @param int_descript Vector of character strings, each describing an intervention.
#' @param covnames Vector of character strings specifying the names of the time-varying covariates in \code{obs_data}.
#' @param covtypes Vector of character strings specifying the "type" of each time-varying covariate included in \code{covnames}. The possible "types" are: \code{"binary"}, \code{"normal"}, \code{"categorical"}, \code{"bounded normal"}, \code{"zero-inflated normal"}, \code{"truncated normal"}, \code{"absorbing"}, \code{"categorical time"}, and \code{"custom"}.
#' @param basecovs Vector of character strings specifying the names of baseline covariates in \code{obs_data}.
#' @param histvars List of vectors. The kth vector specifies the names of the variables for which the kth history function
#' in \code{histories} is to be applied.
#' @param histories Vector of history functions to apply to the variables specified in \code{histvars}.
#' @param compevent_model Model statement for the competing event variable.
#' @param hazardratio Logical scalar indicating whether the hazard ratio should be calculated
#' between two interventions.
#' @param intcomp List of two numbers indicating a pair of interventions to be compared by a hazard ratio.
#' The default is \code{NA}, resulting in no hazard ratio calculation.
#' @param time_points Number of time points to simulate.
#' @param time_name Character string specifying the name of the time variable in \code{obs_data}.
#' @param outcome_type Character string specifying the "type" of the outcome. The possible "types" are: \code{"survival"}, \code{"continuous_eof"}, and \code{"binary_eof"}.
#' @param obs_data Data table containing the observed data.
#' @param parallel Logical scalar indicating whether to parallelize simulations of
#' different interventions to multiple cores.
#' @param ncores Integer specifying the number of cores to use in parallel
#' simulation.
#' @param nsamples Integer specifying the number of bootstrap samples to generate.
#' @param sim_data_b Logical scalar indicating whether to return the simulated data set. If bootstrap samples are used (i.e., \code{nsamples} is set to a value greater than 0), this argument must be set to \code{FALSE}.
#' @param outcome_name Character string specifying the name of the outcome variable in \code{obs_data}.
#' @param compevent_name Character string specifying the name of the competing event variable in \code{obs_data}.
#' @param comprisk Logical scalar indicating the presence of a competing event.
#' @param censor Logical scalar indicating the presence of a censoring variable in \code{obs_data}.
#' @param censor_name Character string specifying the name of the censoring variable in \code{obs_data}.
#' @param covmodels Vector of model statements for the time-varying covariates.
#' @param histvals List of length 3. First element contains a vector of integers specifying the number of lags back for the lagged function. Second element contains
#' a vector of integers indicating the number of lags back for the lagavg function. The last element is an indicator whether a cumavg term
#' appears in any of the model statements.
#' @param ipw_cutoff_quantile Percentile by which to truncate inverse probability weights.
#' @param ipw_cutoff_value Cutoff value by which to truncate inverse probability weights.
#'
#' @return No value is returned.
#' @keywords internal
#' @import data.table
error_catch <- function(id, nsimul, intvars, interventions, int_times, int_descript,
covnames, covtypes, basecovs,
histvars, histories, compevent_model,
hazardratio, intcomp, time_points, outcome_type,
time_name, obs_data, parallel, ncores, nsamples,
sim_data_b, outcome_name, compevent_name, comprisk,
censor, censor_name, covmodels, histvals, ipw_cutoff_quantile,
ipw_cutoff_value){
if (!is.data.table(obs_data)){
if (is.data.frame(obs_data)){
obs_data <- setDT(obs_data)
warning("obs_data must be of class 'data.table'. obs_data was coerced to a data table",
immediate. = TRUE)
} else{
stop("obs_data must be of class 'data.table'")
}
}
obs_colnames <- colnames(obs_data)
if (missing(id)){
stop("Missing parameter id")
} else if (!(id %in% obs_colnames)){
stop(paste('id', id, 'not found in obs_data'))
}
if (missing(outcome_name)){
stop("Missing parameter outcome_name")
} else if (!(outcome_name %in% obs_colnames)){
stop(paste('outcome_name', outcome_name, 'not found in obs_data'))
}
if (comprisk){
if (is.null(compevent_name)){
stop("Missing parameter compevent_name")
} else if (!(compevent_name %in% obs_colnames)){
stop(paste('compevent_name', compevent_name, 'not found in obs_data'))
}
}
if (censor){
if (is.null(censor_name)){
stop("Missing parameter censor_name")
} else if (!(censor_name %in% obs_colnames)){
stop(paste('censor_name', censor_name, 'not found in obs_data'))
}
}
if (!missing(covnames)){
for (covname in covnames){
if (!(covname %in% obs_colnames)){
stop(paste('Covariate', covname, 'in covnames not found in obs_data'))
}
}
}
if (!is.na(basecovs[[1]])){
for (basecov in basecovs){
if (!(basecov %in% obs_colnames)){
stop(paste('Covariate', basecov, 'in basecovs not found in obs_data'))
}
}
}
if (!is.null(histvars)){
for (histvar in unique(unlist(histvars))){
if (!(histvar %in% covnames)){
stop(paste('Covariate', histvar, 'in histvars not found in covnames'))
}
}
}
if (!is.na(nsimul) && nsimul < length(unique(obs_data[[id]]))){
warning("Number of simulated subjects desired is fewer than number of observed
subjects", immediate. = TRUE)
}
if (missing(time_name)){
stop("Missing parameter time_name")
} else if (!(time_name %in% colnames(obs_data))){
stop(paste('time_name', time_name, 'not found in obs_data'))
} else if (time_name == 't'){
stop('time_name cannot be set to "t"')
}
if(!is.numeric(obs_data[[time_name]])){
stop("Time variable in obs_data is not a numeric variable")
}
min_time <- min(obs_data[[time_name]])
correct_time_indicator <- tapply(obs_data[[time_name]], obs_data[[id]],
FUN = function(x){
all(x == min(min_time, 0):(length(x)+min(min_time, 0)-1))
})
if (!all(correct_time_indicator)){
stop("Time variable in obs_data not correctly specified. For each individual time records should begin with 0 (or, optionally -i if using i lags) and increase in increments of 1 in consecutive rows, where no time records are skipped.")
}
n_temp <- nrow(obs_data)
dif <- obs_data[2:n_temp][[time_name]] - obs_data[1:(n_temp - 1)][[time_name]]
rows_changing_id <- which(dif != 1)
if(!(all(obs_data[rows_changing_id + 1][[time_name]] == min_time))){
stop('obs_data is not sorted with respect to the ID variable and time variable. For each individual time records should begin with 0 (or, optionally -i if using i lags) and increase in increments of 1 in consecutive rows, where no time records are skipped. ')
}
obs_time_points_pos <- max(obs_data[[time_name]])+1
if (!is.null(time_points)){
if (time_points > obs_time_points_pos){
stop("Number of simulated time points desired is set to a value beyond the observed
follow-up in the data")
}
}
if (length(covnames) != length(covtypes)){
stop("Missing covariate information (covnames and covtypes are of unequal length)")
}
for (k in seq_along(covnames)){
if (covtypes[k] == 'zero-inflated normal'){
if (sum(obs_data[[covnames[k]]] < 0) != 0){
stop("zero-inflated normal covariates cannot contain negative values")
}
} else if (covtypes[k] == 'binary'){
if (is.factor(obs_data[[covnames[k]]])){
stop("binary covariates must be numeric 0-1")
}
}
}
all_covtypes <- c('binary', 'normal', 'categorical', 'bounded normal',
'zero-inflated normal', 'truncated normal',
'absorbing', 'categorical time', 'custom')
for (covtype in covtypes){
if (!(covtype %in% all_covtypes)){
stop(paste('covtype', covtype, 'is not one of the valid options'))
}
}
if (parallel & is.na(ncores)){
stop("Number of cores must be specified when parallel is set to TRUE")
}
if (!is.na(ncores)){
if (parallel && ncores > parallel::detectCores()){
stop("Number of cores requested exceeds maximum available number")
} else if (!parallel){
stop("Multiple core use requested for non-parallelized function")
}
}
# Check that all intervention parameters are equal in length
if ((is.null(intvars) && !is.null(interventions)) ||
(!is.null(intvars) && is.null(interventions))){
stop("Missing intervention information (intvars or interventions is NA)")
}
if (is.null(int_descript)){
if (length(intvars) != length(interventions)){
stop("Intervention parameters (intvars, interventions) are unequal lengths")
}
} else {
if (!all(sapply(list(length(intvars), length(interventions)),
FUN = identical, length(int_descript)))){
stop("Intervention parameters (intvars, interventions, int_descript) are unequal lengths")
}
}
if (!is.null(int_times)){
if (length(int_times) != length(interventions)){
stop("Intervention parameters (int_times, interventions) are unequal lengths")
}
}
# Check that, for static interventions, a vector of length time_points is given for the treatment values
if (is.null(time_points)){
time_points <- obs_time_points_pos
}
if (!is.null(interventions)){
for (i in seq_along(interventions)){
for (j in seq_along(interventions[[i]])){
if (identical(interventions[[i]][[j]][[1]], static) &&
length(interventions[[i]][[j]]) - 1 != time_points){
stop("For static interventions, a vector of length time_points must be given to specify the treatment values")
}
}
}
}
if (!is.null(int_times) && !all(unlist(int_times) %in% 0:(time_points - 1))){
stop("int_times includes time point(s) outside of 0, ..., t-1")
}
if ('categorical time' %in% covtypes & !is.factor(obs_data[[paste(time_name, "_f", sep = "")]])){
stop('Categorical time variable is not a factor')
}
if (is.null(interventions) && (hazardratio || !is.na(intcomp[[1]]))){
stop("Cannot calculate hazard ratio if no interventions are specified")
}
# Ensure that if user desires to generate function(s) of history for certain
# covariates, those covariates are named and the functions of history are specified
if (is.na(histories[1]) && !is.null(histvars)){
stop("Missing functions of history for covariates (histories)")
}
if (!is.na(histories[1]) && is.null(histvars)){
stop("Missing covariates for which to create histories (histvars)")
}
if (!is.na(histories[1]) & !is.null(histvars)){
if (!is.list(histvars)){
stop("histvars must be a list")
}
if (length(histories) != length(histvars)){
stop("History parameters (histories, histvars) are unequal lengths")
}
}
if (!is.na(histories[1])){
for (i in seq_along(histories)){
if (isTRUE(all.equal(histories[[i]], lagavg)) |
isTRUE(all.equal(histories[[i]], cumavg))){
for (histvar in histvars[[i]]){
if (covtypes[which(histvar == covnames)] %in%
c('categorical', 'categorical time')){
stop('Cannot apply (lagged) cumulative average function to categorical covariates')
}
}
}
if (isTRUE(all.equal(histories[[i]], lagged))){
if (length(histvals$lag_indicator) == 0) {
warning('lagged was supplied in histories, but no lagi_ terms appear in the model statements, where i refers to the number of lags back. If you intended to include a lag, you may have used the incorrect syntax in the model statements.',
immediate. = TRUE)
}
} else if (isTRUE(all.equal(histories[[i]], lagavg))){
if (length(histvals$lagavg_indicator) == 0) {
warning('lagavg was supplied in histories, but no lag_cumavgi terms appear in the model statements, where i refers to the number of lags back. If you intended to include a lagged average term, you may have used the incorrect syntax in the model statements.',
immediate. = TRUE)
}
} else if ((isTRUE(all.equal(histories[[i]], cumavg)))){
if (is.null(histvals$cumavg)){
warning('cumavg was supplied in histories, but no cumavg_ terms appear in the model statements. If you intended to include a cumulative average term, you may have used the incorrect syntax in the model statements.',
immediate. = TRUE)
}
}
}
}
# Ensure that if the simulated data set is going to be returned, bootstrap
# samples are not used
if (nsamples > 0 & sim_data_b){
stop("'sim_data_b' cannot be set to TRUE when nsamples > 0")
}
if (length(covmodels) != length(covnames)){
stop("covmodels and covnames are unequal lengths")
}
for (i in seq_along(covnames)){
if (covtypes[i] != 'categorical time'){
rel_model <- paste(deparse(covmodels[[i]]), collapse = "")
model_var <- stringr::str_extract(rel_model, '[^~]+')
model_var <- stringr::str_trim(model_var, 'left')
model_var <- stringr::str_trim(model_var, 'right')
if (!stringr::str_detect(covnames[i], model_var)){
stop("covmodels and covnames ordering do not match")
}
}
}
if (!is.null(ipw_cutoff_quantile) & !is.null(ipw_cutoff_value)){
stop("Only one of 'ipw_cutoff_quantile' and 'ipw_cutoff_value' can be supplied")
}
if (!is.null(ipw_cutoff_quantile)){
if (!is.numeric(ipw_cutoff_quantile)){
stop("'ipw_cutoff_quantile' must be a numeric variable")
}
if (ipw_cutoff_quantile <= 0 | ipw_cutoff_quantile > 1){
stop("'ipw_cutoff_quantile' must be between 0 and 1")
}
}
if (!is.null(ipw_cutoff_value)){
if (!is.numeric(ipw_cutoff_value)){
stop("'ipw_cutoff_value' must be a numeric variable")
}
if (ipw_cutoff_value <= 0){
stop("'ipw_cutoff_value' must be greater than 0")
}
}
}
trim_glm <- function(fit) {
fit$y <- c()
fit$model <- c()
fit$residuals <- c()
fit$fitted.values <- c()
fit$effects <- c()
fit$qr$qr <- c()
fit$linear.predictors <- c()
fit$weights <- c()
fit$prior.weights <- c()
fit$data <- c()
fit$family$variance <- c()
fit$family$dev.resids <- c()
fit$family$aic <- c()
fit$family$validmu <- c()
fit$family$simulate <- c()
attr(fit$terms,".Environment") <- c()
attr(fit$formula,".Environment") <- c()
return(fit)
}
trim_truncreg <- function(fit) {
fit$gradientObs <- c()
fit$fitted.values <- c()
fit$y <- c()
return(fit)
}
trim_multinom <- function(fit){
fit$fitted.values <- c()
fit$residuals <- c()
fit$weights <- c()
fit$y <- c()
return(fit)
}
add_rmse <- function(fit){
return (sqrt(mean((fit$y - stats::fitted(fit))^2)))
}
add_stderr <- function(fit){
if (any(class(fit) == 'multinom')){
return(summary(fit)$standard.errors)
} else {
return (stats::coefficients(summary(fit))[, "Std. Error"])
}
}
add_vcov <- function(fit){
return(stats::vcov(fit))
}
get_header <- function(int_descript, sample_size, nsimul, nsamples, ref_int){
header <- paste0("PREDICTED RISK UNDER MULTIPLE INTERVENTIONS\n\n",
"Intervention \t Description\n",
"0 \t\t Natural course\n")
if (!is.null(int_descript)){
for (i in seq_along(int_descript)){
header <- paste0(header, i, "\t\t ", int_descript[i], "\n")
}
}
header <- paste0(header, "\nSample size = ", sample_size,
", Monte Carlo sample size = ", nsimul, "\n",
"Number of bootstrap samples = ", nsamples, "\n")
if (ref_int == 0){
header <- paste0(header, "Reference intervention = natural course (0)\n")
} else {
header <- paste0(header, "Reference intervention = ", ref_int, "\n\n")
}
}
clusterAssign <- function(cl, name, value) {
parallel::clusterCall(cl, assign, x = name, value = value, envir = .GlobalEnv)
}
update_lag_indicator <- function(model, lag_indicator){
lag_terms <- unlist(stringr::str_extract_all(deparse(model), 'lag\\d+'))
lag_indicator_toadd <- as.numeric(stringr::str_extract(lag_terms, "\\d+"))
return(unique(c(lag_indicator, lag_indicator_toadd)))
}
update_lagavg_indicator <- function(model, lagavg_indicator){
lagavg_terms <- unlist(stringr::str_extract_all(deparse(model), 'lag_cumavg\\d+'))
lagavg_indicator_toadd <- as.numeric(stringr::str_extract(lagavg_terms, "\\d+"))
return(unique(c(lagavg_indicator, lagavg_indicator_toadd)))
}
update_cumavg_indicator <- function(model, cumavg_indicator){
if (!is.null(cumavg_indicator) |
any(stringr::str_detect(deparse(model), '[^_]cumavg'))){
return(1)
}
}
prep_cluster <- function(ncores, threads , covtypes, bootstrap_option = FALSE){
cl <- parallel::makeCluster(ncores)
myt <- rep(threads,ncores)
parallel::clusterApply(cl,myt,function(x) setDTthreads(x))
parallel::clusterCall(cl, library, package = 'data.table', character.only = TRUE)
if ('categorical' %in% covtypes){
parallel::clusterCall(cl, library, package = 'nnet', character.only = TRUE)
}
if ('truncated normal' %in% covtypes){
parallel::clusterCall(cl, library, package = 'truncreg', character.only = TRUE)
}
clusterAssign(cl, "make_histories", make_histories)
clusterAssign(cl, "rmse_calculate", rmse_calculate)
clusterAssign(cl, "pred_fun_cov", pred_fun_cov)
clusterAssign(cl, "fit_glm", fit_glm)
clusterAssign(cl, "fit_multinomial", fit_multinomial)
clusterAssign(cl, "fit_trunc_normal", fit_trunc_normal)
clusterAssign(cl, "pred_fun_Y", pred_fun_Y)
clusterAssign(cl, "pred_fun_D", pred_fun_D)
clusterAssign(cl, "visit_sum", visit_sum)
clusterAssign(cl, "natural", natural)
suppressWarnings(parallel::clusterExport(cl, as.vector(utils::lsf.str())))
if (bootstrap_option){
clusterAssign(cl, "simulate", simulate)
}
return(cl)
}
get_coeffs <- function(fits, fitD, time_points, outcome_name, compevent_name,
covnames){
coeffs <- lapply(fits, FUN = function(fit){
if (length(fit) == 2){
return (list(stats::coefficients(fit[[1]]), stats::coefficients(fit[[2]])))
} else {
return (stats::coefficients(fit))
}
})
if (!is.na(fitD)[[1]]){
if (time_points == 1){
coeffs <- stats::setNames(coeffs, c(outcome_name, compevent_name))
} else {
coeffs <- stats::setNames(coeffs, c(covnames, outcome_name, compevent_name))
}
}
else {
if (time_points == 1){
coeffs <- stats::setNames(coeffs, outcome_name)
} else {
coeffs <- stats::setNames(coeffs, c(covnames, outcome_name))
}
}
return(coeffs)
}
get_stderrs <- function(fits, fitD, time_points, outcome_name, compevent_name,
covnames){
stderrs <- lapply(fits, FUN = function(fit){
if (length(fit) == 2){
return (list(fit[[1]]$stderr, fit[[2]]$stderr))
} else {
return (fit$stderr)
}
})
if (!is.na(fitD)[[1]]){
if (time_points == 1){
stderrs <- stats::setNames(stderrs, c(outcome_name, compevent_name))
} else {
stderrs <- stats::setNames(stderrs, c(covnames, outcome_name, compevent_name))
}
}
else {
if (time_points == 1){
stderrs <- stats::setNames(stderrs, outcome_name)
} else {
stderrs <- stats::setNames(stderrs, c(covnames, outcome_name))
}
}
return(stderrs)
}
get_vcovs <- function(fits, fitD, time_points, outcome_name, compevent_name,
covnames){
vcovs <- lapply(fits, FUN = function(fit){
if (length(fit) == 2){
return (list(fit[[1]]$vcov, fit[[2]]$vcov))
} else {
return (fit$vcov)
}
})
if (!is.na(fitD)[[1]]){
if (time_points == 1){
vcovs <- stats::setNames(vcovs, c(outcome_name, compevent_name))
} else {
vcovs <- stats::setNames(vcovs, c(covnames, outcome_name, compevent_name))
}
}
else {
if (time_points == 1){
vcovs <- stats::setNames(vcovs, outcome_name)
} else {
vcovs <- stats::setNames(vcovs, c(covnames, outcome_name))
}
}
return(vcovs)
}
get_percent_intervened <- function(pools){
percent_intervened <- c(0, rep(NA, times = length(pools)))
average_percent_intervened <- c(0, rep(NA, times = length(pools)))
my_any <- function(x){
if (all(is.na(x))){
# Handles the edge case of an individual not having any eligible person-time
return(NA)
} else {
return(any(x, na.rm = TRUE))
}
}
if (length(pools) > 0){
for (int_num in 1:length(pools)){
df <- pools[[int_num]]
average_percent_intervened[int_num + 1] <- 100 * mean(df$intervened, na.rm = TRUE)
percent_intervened[int_num + 1] <- 100 * mean(tapply(df$intervened, df$id, my_any), na.rm = TRUE)
}
}
return(list(percent_intervened = percent_intervened,
average_percent_intervened = average_percent_intervened))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.