#' Run Analyses From Files
#'
#' This function runs a model from tabular input.
#'
#' The reference file should have two columns. `data`
#' can be added, having value `TRUE` where an absolute
#' file path is provided. `data` values must include
#' `state`, `tm`, and `parameters`, and can
#' also include `options`, `demographics` and
#' `data`. The corresponding values in the `file`
#' column give the names of the files (located in
#' `base_dir`) that contain the corresponding
#' information - or, in the case of `data`, the
#' directory containing the tables to be loaded.
#'
#' @param location Directory where the files are located.
#' @param reference Name of the reference file.
#' @param run_dsa Run DSA?
#' @param run_psa Run PSA?.
#' @param run_demo Run demographic analysis?
#' @param save Should the outputs be saved?
#' @param overwrite Should the outputs be overwritten?
#'
#' @return A list of evaluated models (always), and, if
#' appropriate input is provided, dsa (deterministic
#' sensitivity analysis), psa (probabilistic sensitivity
#' analysis) and demographics (results across different
#' demographic groups).
#'
#' @export
run_model_tabular <- function(location, reference = "REFERENCE.csv",
run_dsa = TRUE,
run_psa = TRUE, run_demo = TRUE,
save = FALSE, overwrite = FALSE) {
inputs <- gather_model_info(location, reference)
outputs <- eval_models_from_tabular(inputs,
run_dsa = run_dsa,
run_psa = run_psa,
run_demo = run_demo)
output_dir <- inputs$output_dir
if (save) {
save_outputs(outputs, output_dir, overwrite)
}
outputs
}
#' Gather Information for Running a Model From Tabular Data
#'
#' @param base_dir Directory where the files are located.
#' @param ref_file Name of the reference file.
#'
#' @return A list with elements: \itemize{ \item models (of
#' type `uneval_model`, created by
#' [create_model_list_from_tabular()]) \item
#' param_info \item output_dir where to store output
#' files, if specified \item demographic_file a table for
#' demographic analysis \item model_options a list of
#' model options.}
#'
#' @keywords internal
gather_model_info <- function(base_dir, ref_file) {
if (options()$heemod.verbose) message("* Reading files...")
if (options()$heemod.verbose) message("** Reading reference file...")
ref <- read_file(file.path(base_dir, ref_file))
if (any(pb <- duplicated(ref$data))) {
stop(sprintf(
"Duplicated values in reference file 'data' column: %s.",
paste(ref$data[pb], collapse = ", ")
))
}
if (is.null(ref$absolute_path)) {
ref$full_file <- file.path(base_dir, ref$file)
} else {
if (options()$heemod.verbose) message(sprintf(
"** Using absolute path for %s.",
paste(ref$data[ref$absolute_path & ! is.na(ref$absolute_path)],
collapse = ", ")
))
ref$full_file <- ifelse(
ref$absolute_path & ! is.na(ref$absolute_path),
ref$file,
file.path(base_dir, ref$file)
)
}
df_env <- new.env()
if (options()$heemod.verbose) message("** Reading model list...")
models <- create_model_list_from_tabular(
ref = ref,
df_env = df_env
)
model_options <- NULL
if ("options" %in% ref$data) {
if (options()$heemod.verbose) message("** Reading options...")
model_options <- create_options_from_tabular(
read_file(ref$full_file[ref$data == "options"])
)
}
if ("data" %in% ref$data) {
if (options()$heemod.verbose) message("** Reading external data...")
create_df_from_tabular(
ref$full_file[ref$data == "data"],
df_env
)
}
if ("source" %in% ref$data) {
if(options()$heemod.verbose) message("** Reading R source files...")
source_dir <- ref$full_file[ref$data == "source"]
short_source_dir <- ref$file[ref$data == "source"]
if (! dir.exists(source_dir)) {
stop("'source' directory missing: ", short_source_dir)
}
source_list <- list.files(source_dir, full.names = TRUE)
if (length(source_list) == 0) {
stop("No source files in 'source' directory: ",
short_source_dir)
}
for (this_file in source_list) {
source(this_file, echo = FALSE, local = df_env)
}
}
## note - the environment df_env gets included directly
## into param_info, so anything that will load anything
## into that environment needs to come before this statement
if (options()$heemod.verbose) message("** Reading parameters..")
param_info <- create_parameters_from_tabular(
read_file(ref$full_file[ref$data == "parameters"]),
df_env
)
output_dir <- NULL
if("output" %in% ref$data) {
if (options()$heemod.verbose) message("** Reading path to output directory...")
output_dir <- ref$full_file[ref$data == "output"]
}
demographic_file <- NULL
if("demographics" %in% ref$data) {
if (options()$heemod.verbose) message("** Reading demographic data...")
demographic_file <- create_demographic_table(
read_file(ref$full_file[ref$data == "demographics"]),
params = param_info$params
)
}
list(
models = models,
param_info = param_info,
output_dir = output_dir,
demographic_file = demographic_file,
model_options = model_options
)
}
#' Evaluate Models From a Tabular Source
#'
#' Execute a full set of analyses, possibly including
#' discrete sensitivity analysis, probabilistic sensitivity
#' analysis, and analyses across demographics.
#'
#' @param inputs Result from
#' [gather_model_info()].
#' @param run_dsa Run DSA?
#' @param run_psa Run PSA?
#' @param run_demo Run demographic analysis?
#'
#' @return a list \itemize{ \item `models` (always)
#' unevaluated model. \item `model_runs` (always)
#' evaluated models \item `dsa` (deterministic
#' sensitivity analysis) - if appropriate parameters
#' provided \item `psa` (probabilistic sensitivity
#' analysis) - if appropriate parameters provided \item
#' `demographics` results across different
#' demographic groups - if appropriate parameters
#' provided}
#'
#' @keywords internal
eval_models_from_tabular <- function(inputs,
run_dsa = TRUE,
run_psa = TRUE,
run_demo = TRUE) {
if (options()$heemod.verbose) message("* Running files...")
list_args <- c(
inputs$models,
list(
parameters = inputs$param_info$params,
init = inputs$model_options$init,
cost = inputs$model_options$cost,
effect = inputs$model_options$effect,
base_model = inputs$model_options$base_model,
method = inputs$model_options$method,
cycles = inputs$model_options$cycles
)
)
list_args <- Filter(
function(x) ! is.null(x),
list_args
)
if (! is.null(inputs$model_options$num_cores)) {
use_cluster(inputs$model_options$num_cores)
on.exit(
close_cluster()
)
}
if (options()$heemod.verbose) message("** Running models...")
model_runs <- do.call(
run_model,
list_args
)
model_dsa <- NULL
if (run_dsa & ! is.null(inputs$param_info$dsa)) {
if (options()$heemod.verbose) message("** Running DSA...")
model_dsa <- run_dsa(
model_runs,
inputs$param_info$dsa_params
)
}
model_psa <- NULL
if (! is.null(inputs$param_info$psa_params) & run_psa) {
if (options()$heemod.verbose) message("** Running PSA...")
model_psa <- run_psa(
model_runs,
psa = inputs$param_info$psa_params,
N = inputs$model_options$n
)
}
demo_res <- NULL
if (! is.null(inputs$demographic_file) & run_demo) {
if (options()$heemod.verbose) message("** Running demographic analysis...")
demo_res <- stats::update(model_runs, inputs$demographic_file)
}
## if(! is.null(inputs$model_options$num_cores)) {
## close_cluster()
## }
list(
models = inputs$models,
model_runs = model_runs,
dsa = model_dsa,
psa = model_psa,
demographics = demo_res
)
}
#' Read Models Specified by Files
#'
#' @param ref Imported reference file.
#' @param df_env An environment containing external data.
#'
#' @return A list of unevaluated models.
#'
#' @keywords internal
create_model_list_from_tabular <- function(ref, df_env = globalenv()) {
if(! inherits(ref, "data.frame")) stop("'ref' must be a data frame.")
if (options()$heemod.verbose) message("*** Reading states...")
state_file_info <-
read_file(ref$full_file[ref$data == "state"])
state_info <- parse_multi_spec(
state_file_info,
group_vars = ".state"
)
state_names <- state_info[[1]]$.state
## to accomodate partitioned survival models, we will allow for
## the possibility that there is no transition matrix ...
if (options()$heemod.verbose) message("*** Reading TM...")
tm_info <- read_file(ref$full_file[ref$data == "tm"])
trans_type <- transition_type(tm_info)
if (trans_type == "matrix") {
tm_info <- parse_multi_spec(
tm_info,
group_vars = c("from", "to"))
tab_undefined <-
dplyr::bind_rows(tm_info) %>%
dplyr::filter(is.na(.data$prob))
if (nrow(tab_undefined) > 0) {
rownames(tab_undefined) <- NULL
print(tab_undefined)
stop("Undefined probabilities in the transition matrix (see above).")
}
one_way <- setdiff(names(state_info), names(tm_info))
other_way <- setdiff(names(tm_info), names(state_info))
}
if(trans_type == "part_surv"){
use_fits_file <- ref[ref$data == "use_fits", "full_file"]
use_fits <- read_file(use_fits_file)
tm_info <- construct_part_surv_tib(use_fits, ref, env = df_env,
state_names = state_names)
one_way <- setdiff(names(state_info), unique(tm_info$.strategy))
other_way <- setdiff(unique(tm_info$.strategy), names(state_info))
}
one_way <- setdiff(names(state_info), names(tm_info))
other_way <- setdiff(names(tm_info), names(state_info))
if (length(c(one_way, other_way))){
err_string <- "Mismatching model names between transition (TM) file and state file.\n"
if(length(one_way))
err_string <-
paste(err_string,
"In state file but not TM file:",
paste(one_way, collapse = ", "),
"\n")
if(length(other_way))
err_string <-
paste(err_string,
"In TM but not state file:",
paste(other_way, collapse = ", "),
"\n")
stop(err_string)
}
if(trans_type == "part_surv")
tm_info <-
dplyr::filter(tm_info, .data$.strategy %in% names(state_info))
else
tm_info <- tm_info[names(state_info)]
if (options()$heemod.verbose) message("*** Defining models...")
models <- lapply(
seq_along(state_info),
function(i) {
if(inherits(tm_info, "tbl_df"))
this_tm <- dplyr::filter(
tm_info,
.data$.strategy == names(state_info)[i])$part_surv[[1]]
else
this_tm <- tm_info[[i]]
create_model_from_tabular(state_info[[i]],
this_tm,
df_env = df_env)
})
names(models) <- names(state_info)
models
}
#' Create State Definitions From Tabular Input
#'
#' Transforms tabular input defining states into an
#' `heemod` object.
#'
#' Columns of state_info besides .model and state include
#' costs and utilities we want to keep track of, with
#' appropriate values (these may include parameters). For
#' any cost or utility that should be discounted, an
#' additional column with the name ".discount.\<cost\>" or
#' ".discount.\<effect\>", for the appropriate cost or effect,
#' can be included. If no discounting is desired for a
#' particular cost or effect, the corresponding column can
#' be omitted.
#'
#' A discount column can contain only a single value - a
#' cost or benefit must be discounted by the same amount in
#' each state. Discounts can be numbers or parameters (which
#' will then need to be defined like any other).
#'
#' The input data frame is expected to contain state
#' information for all the models you will use in an
#' analysis. For more information see the vignette:
#' `vignette("file-input", package = "heemod")`.
#'
#' @param state_info Result for one model of
#' [parse_multi_spec()].
#' @param df_env An environment containing external data.
#'
#' @return A state list.
#'
#' @keywords internal
create_states_from_tabular <- function(state_info,
df_env = globalenv()) {
if(! inherits(state_info, "data.frame")) {
stop("'state_info' must be a data frame.")
}
if(!(".state" %in% names(state_info))) {
stop("'.state' should be a column name of the state file.")
}
if (any(duplicated(state_info$.state))) {
stop(sprintf(
"Duplicated state names: %s.",
paste(unique(state_info$.state[duplicated(state_info$.state)]),
sep = ", ")
))
}
state_names <- state_info$.state
values <- setdiff(names(state_info), c(".model", ".state"))
discounts <- values[grep("^\\.discount", values)]
values <- setdiff(values, discounts)
discounts_clean <- gsub("^\\.discount\\.(.+)", "\\1", discounts)
num_missing_per_column <- colSums(sapply(state_info, is.na))
missing_col_names <- names(num_missing_per_column)[num_missing_per_column > 0]
## missing names are allowed for discount columns
missing_col_names <- setdiff(missing_col_names, discounts)
if(length(missing_col_names)){
stop("value",
plur(length(missing_col_names)),
" ",
paste(missing_col_names, collapse = ", "),
" for strategy '",
unique(state_info$.model),
"'",
ifelse(length(missing_col_names) == 1, " has ", " have "),
"missing values in the state file.\n",
"Please make sure all values are defined for all states ",
"(even when the value is 0)."
)
}
if (! all(discounts_clean %in% values)) {
stop(sprintf(
"Discounting rates defined for non-existing values: %s.",
paste(discounts[! discounts %in% values], collapse = ", ")
))
}
for (n in discounts) {
if (all(is.na(state_info[[n]]))) {
stop(sprintf(
"No discount values found for '%s'.", n
))
} else if (length(unique(stats::na.omit(state_info[[n]]))) > 1) {
stop(sprintf(
"Multiple discount values for '%s'.", n
))
} else {
state_info[[n]] <- stats::na.omit(state_info[[n]])[1]
}
}
for (n in discounts_clean) {
state_info[[n]] <- sprintf(
"discount(%s, %s)",
state_info[[n]],
state_info[[paste0(".discount.", n)]]
)
}
res <- define_state_list_(
stats::setNames(lapply(
state_info$.state,
function(state) {
define_state_(
list(
.dots = lazyeval::as.lazy_dots(
stats::setNames(as.character(lapply(
values,
function(value) {
state_info[[value]][state_info$.state == state]
}
)), values),
env = df_env
),
starting_values = define_starting_values()
)
)
}
), state_info$.state)
)
if (options()$heemod.verbose) print(res)
res
}
#' Create a Transition Matrix From Tabular Input
#'
#' Transforms tabular input defining a transition matrix
#' into an `heemod` object.
#'
#' The data frame `trans_probs` should have columns
#' `from`, `to`, and `prob`, where
#' `prob` is the probability of a transition from the
#' `from` state to the `to` state. Prob can be
#' defined in terms of parameters, just as when using
#' `define_transition` at the keyboard. Probabilities of 0
#' need not be specified - they will be automatically
#' inserted.
#'
#' All state names must be used in the `from` column of
#' the transition matrix (otherwise you can just get rid of
#' the state). Absorbing states should have a transition
#' from and to themselves with probability 1.
#'
#' @param trans_probs Result for one model of
#' [parse_multi_spec()].
#' @param state_names The names of the states used in the
#' transition matrix.
#' @param df_env An environment containing external data.
#'
#' @return A transition matrix.
#'
#' @keywords internal
create_matrix_from_tabular <- function(trans_probs, state_names,
df_env = globalenv()) {
if(! inherits(trans_probs, "data.frame")) {
stop("'trans_probs' must be a data frame.")
}
stopifnot(
all(c("from", "to", "prob") %in% names(trans_probs)),
length(state_names) > 0
)
unique_states <- unique(c(trans_probs$from, trans_probs$to))
## we can have an initial state where people start but
## can't get to, so we don't check trans_probs$to states
## the way we check trans_probs$from states
if (! all(trans_probs$to %in% trans_probs$from)) {
stop(sprintf(
"Some states do not have an exit probability: %s.",
paste(
unique(trans_probs$to)[! unique(trans_probs$to) %in% trans_probs$from],
sep = ", "
)
))
}
if(! all(unique_states %in% state_names)) {
stop(sprintf(
"Some states specified in the transition matrix differ from 'state_names': %s.",
unique_states[! unique_states %in% state_names]
))
}
## set up matrix of 0's, and then add the listed
## probabilities
num_states <- length(unique_states)
prob_mat <- matrix(0, nrow = num_states, ncol = num_states,
dimnames = list(state_names, state_names))
prob_mat[as.matrix(trans_probs[, c("to", "from")])] <- trans_probs$prob
res <- define_transition_(
lazyeval::as.lazy_dots(as.character(prob_mat), env = df_env),
state_names = state_names
)
if (options()$heemod.verbose) print(res)
res
}
#' Create a Parameter Definition From Tabular Input
#'
#' If specified in the tabular file, DSA and PSA can also be
#' created.
#'
#' The tabular parameter definition file can have the
#' following columns: `parameter` (the parameter name,
#' required), `value` (required), `low` and
#' `high` (if both are present, a deterministic
#' sensitivity analysis will be performed), and `psa`
#' (a definition of a distribution to use in a probabilistic
#' sensitivity analysis. Other columns will be ignored.
#'
#' @param param_defs A parameter definition file.
#' @param df_env An environment containing external data.
#'
#' @return The parameter definition.
#'
#' @keywords internal
create_parameters_from_tabular <- function(param_defs,
df_env = globalenv()) {
if(! inherits(param_defs, "data.frame"))
stop("'param_defs' must be a data frame.")
if(! "parameter" %in% names(param_defs))
stop("parameter file must include the column 'parameter'")
if (xor("low" %in% names(param_defs),
"high" %in% names(param_defs))) {
stop("Both 'low' and 'high' columns must be present in parameter file to define DSA.")
}
parameters <- define_parameters_(
lazyeval::as.lazy_dots(
stats::setNames(
as.character(lapply(param_defs$value, function(x) x)),
param_defs$parameter
),
env = df_env
)
)
dsa <- psa <- NULL
if ("low" %in% names(param_defs) &&
"high" %in% names(param_defs)) {
if (! all(is.na(param_defs$low) ==
is.na(param_defs$high))) {
stop("'low' and 'high' must be both non missing in DSA tabular definition.")
}
if (all(is.na(param_defs$low))) {
stop("No non-missing values in columns 'low' and 'high'.")
}
param_sens <- param_defs$parameter[! is.na(param_defs$low)]
low <- stats::na.omit(param_defs$low)
high <- stats::na.omit(param_defs$high)
dsa <- define_dsa_(
par_names = param_sens,
low_dots = lazyeval::as.lazy_dots(
stats::setNames(
as.character(lapply(low, function(x) x)),
param_sens
),
env = df_env
),
high_dots = lazyeval::as.lazy_dots(
stats::setNames(
as.character(lapply(high, function(x) x)),
param_sens
),
env = df_env
)
)
}
if ("psa" %in% names(param_defs)) {
if (all(is.na(param_defs$psa))) {
stop("No non-missing values in column 'psa'.")
}
param_psa <- param_defs$parameter[! is.na(param_defs$psa)]
distrib_psa <- stats::na.omit(param_defs$psa)
psa <-
do.call(define_psa,
lapply(
seq_along(param_psa),
function(i) {
substitute(
lhs ~ rhs,
list(
lhs = parse(text = param_psa[i])[[1]],
rhs = parse(text = distrib_psa[i])[[1]]))
}
)
)
}
## if there are multinomials specified,
## fix them up in the parameters and redefine
param_defs_new <-
modify_param_defs_for_multinomials(param_defs, psa)
parameters <- define_parameters_(
lazyeval::as.lazy_dots(
stats::setNames(
as.character(lapply(param_defs_new$value, function(x) x)),
param_defs_new$parameter
),
env = df_env
)
)
list(
params = parameters,
dsa_params = dsa,
psa_params = psa
)
}
#' Create Model Options From a Tabular Input
#'
#' @param opt An option data frame.
#'
#' @return A list of model options.
#'
#' @keywords internal
create_options_from_tabular <- function(opt) {
allowed_opt <- c("cost", "effect", "init",
"method", "base", "cycles", "n",
"num_cores")
if(! inherits(opt, "data.frame"))
stop("'opt' must be a data frame.")
if (any(ukn_opt <- ! opt$option %in% allowed_opt)) {
stop(sprintf(
"Unknown options: %s.",
paste(opt$option[ukn_opt], collapse = ", ")
))
}
if (any(duplicated(opt$option))) {
stop("Some option names are duplicated: ",
paste(unique(opt$option[duplicated(opt$option)]),
collapse = ", "))
}
res <- list()
for (n in opt$option) {
res <- c(res, list(opt$value[opt$option == n]))
}
names(res) <- opt$option
if (! is.null(res$init)) {
if(substr(res$init, 1, 2) == "c(" &
substr(res$init, nchar(res$init), nchar(res$init)) == ")"){
res$init <- substr(res$init, 3, nchar(res$init))
res$init <- substr(res$init, 1, nchar(res$init) - 1)
warning("initial values enclosed in c(); removing")
}
res$init <- as_numeric_safe(
strsplit(res$init, ",")[[1]]
)
}
if (! is.null(res$cycles)) {
res$cycles <- as_integer_safe(res$cycles)
}
if (! is.null(res$n)) {
res$n <- as_integer_safe(res$n)
}
if (! is.null(res$cost)) {
res$cost <- parse(text = res$cost)[[1]]
}
if (! is.null(res$effect)) {
res$effect <- parse(text = res$effect)[[1]]
}
if (! is.null(res$num_cores)){
res$num_cores <- parse(text = res$num_cores)[[1]]
}
if (options()$heemod.verbose) message(paste(
names(res), unlist(res), sep = " = ", collapse = "\n"
))
res
}
#' Create a `heemod` Model From Tabular Files Info
#'
#' Calls [create_states_from_tabular()] and
#' [create_matrix_from_tabular()].
#'
#' @param state_info A state tabular file (file path or
#' parsed file).
#' @param tm_info A transition matrix tabular file (file
#' path or parsed file).
#' @param df_env An environment containing external data.
#'
#' @return A `heemod` model as returned by
#' [define_strategy()].
#'
#' @keywords internal
create_model_from_tabular <- function(state_info,
tm_info,
df_env = globalenv()) {
if (length(tm_info) == 0) {
stop("A transition object must be defined.")
}
if (! inherits(state_info, "data.frame")) {
stop("'state_info' must be a data frame.")
}
if (!is.null(tm_info) &&
! inherits(tm_info, c("data.frame", "part_surv"))) {
stop("'tm_info' must be either a data frame ",
"defining a transition matrix or a part_surv object ",
"defining a partitioned survival model.")
}
if (options()$heemod.verbose) message("**** Defining state list...")
states <- create_states_from_tabular(state_info,
df_env = df_env)
if (options()$heemod.verbose) message("**** Defining TM...")
if (inherits(tm_info, "data.frame")) {
TM <- create_matrix_from_tabular(
tm_info, get_state_names(states),
df_env = df_env)
}
if (inherits(tm_info, "part_surv")) {
TM <- tm_info
}
define_strategy_(transition = TM, states = states,
starting_values = check_starting_values(
define_starting_values(),
get_state_value_names(states)))
}
#' Load Data From a Folder Into an Environment
#'
#' Reads files containing data frames (in tabular format)
#' from a directory, and loads them in an environment to be
#' available during an analysis.
#'
#' The files must be in .csv, .xls, or .xlsx format. A file
#' my_df.csv (or my_df.xls, or my_df.xlsx) will be loaded as
#' a data frame my_df.
#'
#' @param df_dir A directory containing the files.
#' @param df_envir An environment.
#'
#' @return The environment with the data frames.
#'
#' @keywords internal
create_df_from_tabular <- function(df_dir, df_envir) {
if(! file.exists(df_dir)) {
stop(paste(df_dir, "does not exist."))
}
all_files <- list.files(df_dir, full.names = TRUE)
## work around an annoyance around open files in windows
if(.Platform$OS.type == "windows") {
part_files <- list.files(df_dir, full.names = FALSE)
accidental_files <- grep("^~.*\\.xlsx?", part_files)
if(length(accidental_files) > 0) {
all_files <- all_files[- accidental_files]
}
}
obj_names <- all_files %>%
basename %>%
tools::file_path_sans_ext()
if(any(duplicate_names <- duplicated(obj_names)))
stop(paste("Duplicate data names:",
paste(obj_names[duplicate_names], collapse = ", ")))
## do the assignments
for(i in seq(along = all_files)){
this_val <- read_file(all_files[i])
## check for accidental commas in numbers
comma_cols <-
which(sapply(sapply(this_val, function(x){grep(",", x)}),
any)
)
for(this_comma_col in comma_cols){
try_numeric <- try(as.numeric(gsub(",", "", this_val[, this_comma_col])),
silent = TRUE)
if(!inherits(try_numeric, "try-error")){
this_val[, this_comma_col] <- try_numeric
message(paste("converting column",
names(this_val)[this_comma_col],
"from file",
basename(all_files[i]),
"to numeric despite it having commas"
)
)
}
}
assign(obj_names[i], this_val, envir = df_envir)
}
df_envir
}
#'Specify Inputs for Multiple Models From a Single File
#'
#'Parse a `data.frame` containing specifications for
#'multiple models into a list of inputs required for each
#'model.
#'
#'Each combination of values of the columns specified by
#'`group_vars` should either be unique in the file (in
#'which case it will be replicated for all values of
#'`split_on`), or must be repeated as many times as
#'unique values of `split_on`.
#'
#'`split_on` is usually the model name.
#'
#'`group_var` can be the state names, or from and to
#'lines for a matrix definition...
#'
#'@param multi_spec `data frame`.
#'@param split_on `character` of length 1, with the
#' name of the variable in `multi_spec` to be split
#' on.
#'@param group_vars `character`, one or more variable
#' names from `multi_spec` that identify a line of
#' information.
#'
#'@return A list of data frames, one for each value of
#' `split_on.`
#'
#' @keywords internal
parse_multi_spec <- function(multi_spec,
split_on = ".model",
group_vars) {
if(! inherits(multi_spec, "data.frame"))
stop("'multi_spec' must be a data frame.")
if(length(split_on) != 1) stop("'split_on' must have a length of exactly 1.")
if(any(names(multi_spec) == "")) stop("'multi_spec' can't have empty names.")
if(! all(c(split_on, group_vars) %in% names(multi_spec)))
stop("'split_on' and 'group_vars' must be column names of the input 'multi_spec'.")
remaining <- setdiff(names(multi_spec), c(split_on, group_vars))
## any line that exists for one split but not others
## needs to be duplicated for all splits
## throw an error if a parameter exists for more than one,
## but not all
unique_splits <- unique(multi_spec[, split_on])
num_splits <- length(unique_splits)
occurences <- multi_spec %>%
dplyr::group_by(!!!syms(as.list(group_vars))) %>%
dplyr::summarize(count = n())
orig_order <- unique(multi_spec[, group_vars, drop = FALSE])
wrong_number_times <-
occurences$count > 1 & occurences$count < num_splits
if(any(wrong_number_times)) {
stop("'group_var' combinations must be specified either once for all splits or once for each split.")
}
just_once <- multi_spec %>%
dplyr::group_by(!!!syms(as.list(group_vars))) %>%
dplyr::filter( n() == 1) %>%
dplyr::select(-dplyr::one_of(split_on))
just_once <- data.frame(
temp = rep(unique_splits, nrow(just_once)),
just_once[rep(seq_len(nrow(just_once)), each = num_splits), ],
stringsAsFactors = FALSE
)
names(just_once)[1] <- split_on
more_than_once <- multi_spec %>%
dplyr::group_by(!!! syms(as.list(group_vars))) %>%
dplyr::filter(n() > 1)
multi_spec <-
dplyr::bind_rows(just_once, as.data.frame(more_than_once))
rownames(multi_spec) <- NULL
list_spec <- split(multi_spec, multi_spec[, split_on])
## sort by order of appearance of split variables in multi_spec
## (making first model mentioned the base model, similar to earlier system)
orig_split_order <- unique(multi_spec[, split_on])
list_spec <- list_spec[orig_split_order]
## want to make sure everything comes out sorted
## by order of appearance of values of group_vars
lapply(list_spec, function(x) {
match_to_orig <-
sapply(group_vars, function(this_var){
match(x[, this_var], orig_order[, this_var])})
x[do.call("order", as.data.frame(match_to_orig)),]
})
}
#' Read a Demographic Table
#'
#' This function mostly checks whether the parameters are
#' correct.
#'
#' An optional `.weights` column can exist in the file.
#'
#' @param newdata A data frame.
#' @param params Parameters of a model, to check that all
#' the columns in the demographic table (other than the
#' weight column) are in the model.
#' @param ... catches other, unwanted arguments.
#'
#' @return A data frame.
#'
#' @keywords internal
create_demographic_table <- function(newdata,
params) {
weight_col <- which(names(newdata) == ".weights")
if (length(weight_col)) {
var_names <- names(newdata)[- weight_col]
} else {
var_names <- names(newdata)
}
valid_names <- var_names %in% names(params)
if(! all(valid_names)) {
invalid_names <- paste(var_names[!valid_names], collapse = ", ")
stop(sprintf(
"The following columns in the demographic table are not parameters of the model: %s.",
invalid_names
))
}
newdata
}
#' Read the accepted file formats for tabular input
#'
#' Columns starting with '.comment' are ignored.
#'
#' @param file_name File name.
#'
#' @return A `data.frame`.
#'
#' @keywords internal
read_file <- function(file_name) {
have_xls <- is_xls(file_name)
have_xlsx <- is_xlsx(file_name)
have_csv <- is_csv(file_name)
if(! have_csv & ! have_xls & ! have_xlsx) {
stop("file names must be for csv, xls, or xlsx")
}
if(have_csv) {
tab <- utils::read.csv(
file_name, header = TRUE,
stringsAsFactors = FALSE,
strip.white = TRUE,
na.strings = c(".", "NA", "")
)
} else if(have_xls | have_xlsx) {
if (! requireNamespace("readxl", quietly = TRUE)) {
stop("readxl packaged needed to read Excel files")
} else {
tab <- as.data.frame(
readxl::read_excel(file_name)
)
}
}
## get rid of "comment" columns, if any
tab <- tab[! grepl("^\\.comment", names(tab))]
## get rid of NA rows that may have come from blank rows
## in file
tab <- filter_blanks(tab)
tab
}
#' Remove Blank Rows From Table
#'
#' Remove rows were all values are `NA`.
#'
#' Some rows can be left blanks in the input table for
#' readability, this function ensures those rows are
#' removed.
#'
#' @param x A `data.frame`.
#'
#' @return A `data.frame` without blank rows.
#'
#' @keywords internal
filter_blanks <- function(x) {
x[! apply(is.na(x), 1, all), , drop = FALSE]
}
#' Check File Type
#'
#' @param x A file name.
#' @return Whether the file is (respectively)
#' csv, xlsx, or xls.
#' @rdname file-checkers
#'
#' @keywords internal
is_csv <- function(x) {
tolower(tools::file_ext(x)) == "csv"
}
#' @rdname file-checkers
is_xlsx <- function(x) {
tolower(tools::file_ext(x)) == "xlsx"
}
#' @rdname file-checkers
is_xls <- function(x) {
tolower(tools::file_ext(x)) == "xls"
}
#' Save Model Outputs
#'
#' @param outputs Result from
#' [run_model_tabular()].
#' @param output_dir Subdirectory in which to write output.
#' @param overwrite Should the outputs be overwritten?
#'
#' @return `NULL`. Used for its side effect of creating
#' files in the output directory.
#'
#' @keywords internal
save_outputs <- function(outputs, output_dir, overwrite) {
if (options()$heemod.verbose) message("* Saving outputs...")
if(is.null(output_dir)) {
warning("Output directory not defined in the specification file - the outputs will not be saved.")
return(NULL)
} else if(dir.exists(output_dir) & ! overwrite) {
warning("Output directory exists and overwrite is FALSE - the outputs will not be saved.")
return(NULL)
} else {
delete_success <- 0
if(dir.exists(output_dir)) {
delete_success <- unlink(output_dir, recursive = TRUE)
}
if (delete_success == 1) {
warning("Failed to delete output directory.")
return(NULL)
}
dir.create(output_dir)
}
## some csv files
if (options()$heemod.verbose) message("** Writing tabular outputs to files ...")
if (! is.null(outputs$demographics)) {
utils::write.csv(
summary(outputs$demographics)$scaled_results,
file = file.path(output_dir, "icer_by_group.csv"),
row.names = FALSE
)
}
utils::write.csv(
get_counts(outputs$model_runs),
file = file.path(output_dir, "state_counts.csv"),
row.names = FALSE
)
utils::write.csv(
get_values(outputs$model_runs),
file = file.path(output_dir, "cycle_values.csv"),
row.names = FALSE
)
if(!is.null(outputs$dsa)){
utils::write.csv(
summary(outputs$dsa)$res_comp,
file = file.path(output_dir, "dsa.csv"),
row.names = FALSE
)
}
utils::write.csv(
outputs$psa$psa,
file = file.path(output_dir, "psa_values.csv"),
row.names = FALSE
)
## plots about individual models
if (options()$heemod.verbose) message("** Generating plots for individual models...")
this_plot <- plot(outputs$model_runs)
this_file <- "state_count_plot"
save_graph(this_plot, output_dir, this_file)
if(!is.null(outputs$dsa)){
this_plot <- plot(outputs$dsa)
this_file <- "dsa"
save_graph(this_plot, output_dir, this_file)
}
## plots about differences between models
if (options()$heemod.verbose) message("** Generating plots with model differences...")
if(!is.null(outputs$dsa)){
this_plot <- plot(outputs$dsa, type = "difference")
this_file <- "dsa_diff"
save_graph(this_plot, output_dir, this_file)
}
if(!is.null(outputs$psa)){
this_plot <- plot(outputs$psa)
this_file <- paste("psa")
save_graph(this_plot, output_dir, this_file)
## acceptability curve
if (options()$heemod.verbose) message("** Generating acceptability curve...")
this_plot <- plot(outputs$psa, type = "ac")
save_graph(this_plot,
output_dir, "acceptability")
}
invisible(NULL)
}
save_graph <- function(plot, path, file_name) {
full_file <- file.path(path, file_name)
grDevices::png(filename = paste(full_file, "png", sep = "."))
print(plot)
grDevices::dev.off()
filename <- paste(full_file, "pdf", sep = ".")
if(capabilities("cairo")){
grDevices::cairo_pdf(filename = filename)
} else {
grDevices::pdf(file = filename)
}
print(plot)
grDevices::dev.off()
}
transition_type <- function(tm_info){
which_defines <- NULL
if(all(names(tm_info)[1:4] == c(".model", "from", "to", "prob"))){
which_defines <- "matrix"
}
else{
if(all(sort(names(tm_info)[1:10]) ==
sort(c("type", "treatment", "data_directory",
"data_file", "fit_directory", "fit_name",
"fit_file", "time_col", "treatment_col",
"censor_col"))))
which_defines <- "part_surv"
}
if(is.null(which_defines))
stop("the data frame tm_info must define ",
"either a transition matrix or a partitioned survival object")
which_defines
}
modify_param_defs_for_multinomials <- function(param_defs, psa) {
param_names <- param_defs[, 1]
multinom_pos <- grep("\\+", param_names)
multinom_vars <- lapply(
multinom_pos,
function(i) {
strsplit(param_names[[i]], "+", fixed = TRUE)[[1]]
})
multinom_vars <- lapply(
multinom_vars,
function(x) {
gsub("[[:space:]]", "", x)
})
duplicates <- sapply(
param_defs$parameter,
function(x){
x %in% unlist(multinom_vars)
})
if (any(duplicates)) {
stop("Some variables appear as individual parameters ",
"and in a multinomial: ",
paste(param_defs$parameter[duplicates], collapse = ", ")
)
}
multinom_counts <- lapply(
multinom_vars,
function(x) {
sapply(
psa$list_qdist[x],
function(y) {
eval(expression(n),
environment(y))
})
})
multinom_probs <- lapply(
multinom_counts,
function(x) {
x / sum(x)
})
replacements <- lapply(
multinom_probs,
function(x) {
zz <- data.frame(
parameter = names(x),
value = x,
stringsAsFactors = FALSE)
rownames(zz) <- NULL
zz
})
## now we need to put these where they initially appeared
## in the parameter file
param_defs <- param_defs[, c("parameter", "value")]
param_defs_old <- param_defs
for (i in rev(seq(along = multinom_pos))) {
this_pos <- multinom_pos[i]
start_index <- 1:(this_pos - 1)
end_index <-
if (this_pos == nrow(param_defs)) {
numeric(0)
} else {
(this_pos + 1):nrow(param_defs)
}
param_defs <- dplyr::bind_rows(
param_defs[start_index,],
replacements[[i]],
param_defs[end_index,])
}
param_defs
}
## make sure that survival fit specifications are
## properly formatted and have reasonable data
check_survival_specs <-
function(surv_specs){
## if there are troubles with absolute file paths,
## might want to add an "absolute" column to surv_specs
## to be explicit (if, possibly, a little redundant)
if(!is.data.frame(surv_specs) || nrow(surv_specs) == 0)
stop("surv_specs must be a data frame with at least one row")
if(!("event_code" %in% names(surv_specs))){
warning("event_code not defined in surv_specs; setting to 1 for all rows")
surv_specs$event_code <- 1
}
if(!("censor_code" %in% names(surv_specs))){
warning("censor_code not defined in surv_specs; setting to 0 for all rows")
surv_specs$censor_code <- 0
}
surv_spec_col_names <- c("type", "treatment", "data_directory", "data_file",
"fit_directory", "fit_name", "fit_file",
"time_col", "treatment_col", "censor_col",
"event_code", "censor_code"
)
if(! identical(sort(names(surv_specs)), sort(surv_spec_col_names))){
extra_names <- setdiff(names(surv_specs), surv_spec_col_names)
missing_names <- setdiff(surv_spec_col_names, names(surv_specs))
names_message <- paste("surv_ref must have column names:\n",
paste(surv_spec_col_names, collapse = ", "),
sep = "")
if(length(extra_names) > 0)
names_message <- paste(names_message, "\n", "extra names: ",
paste(extra_names, collapse = ", "),
sep = "")
if(length(missing_names) > 0)
names_message <- paste(names_message, "\n", "missing names: ",
paste(missing_names, collapse = ", "),
sep = "")
stop(names_message)
}
if(any(is.na(surv_specs) | surv_specs == ""))
stop("all elements of surv_specs must be filled in")
## sort survival specs by treatment, then PFS and OS
## our checks will make sure that we have the right entries,
## and that they are in the right order
surv_specs <-
surv_specs %>% dplyr::arrange(.data$treatment, desc(.data$type))
os_ind <- grep("os", surv_specs$type, ignore.case = TRUE)
pfs_ind <- grep("pfs", surv_specs$type, ignore.case = TRUE)
all_even_os <- identical(as.numeric(os_ind),
seq(from = 2,
to = nrow(surv_specs),
by = 2))
all_odd_pfs <- identical(as.numeric(pfs_ind),
seq(from = 1,
to = nrow(surv_specs),
by = 2))
same_treatments <-
identical(surv_specs$treatment[os_ind],
surv_specs$treatment[pfs_ind])
if(!all_even_os | !all_odd_pfs | !same_treatments)
stop("each treatment must have exactly one PFS and one OS entry")
if(any(dups <- duplicated(surv_specs[, c("type", "treatment")]))){
print(surv_specs[dups,])
stop("survival fit specification can only have one row for fitting ",
"OS or PFS for a given treatment\n")
}
if(any(dups <- duplicated(surv_specs[, c("fit_directory", "fit_file",
"time_col", "censor_col")]))){
print(surv_specs[dups,])
stop("can only specify a given data file in a given directory with ",
"the same time column and censoring column for one fit")
}
surv_specs
}
#' Load a set of survival fits
#'
#' @param location base directory
#' @param survival_specs information about fits
#' @param use_envir an environment
#'
#' @return A list with two elements: \itemize{
#' \item{`best_models`,
#' a list with the fits for each data file passed in; and}
#' \item{`envir`,
#' an environment containing the models so they can be referenced to
#' get probabilities.}
#' }
#' @export
#'
load_surv_models <- function(location, survival_specs, use_envir){
fit_files <- file.path(location,
survival_specs$fit_directory,
survival_specs$fit_file)
for(index in seq(along = fit_files)){
this_fit_file <- fit_files[index]
this_fit_name <- survival_specs$fit_name[index]
load(paste(this_fit_file, ".RData", sep = ""))
}
surv_models <- mget(survival_specs$fit_name)
names(surv_models) <- survival_specs$fit_name
list(do.call("rbind", mget(survival_specs$fit_name)),
env = use_envir)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.