Nothing
# {Main fxs} ----
prep_terms <- function(dat,
model = c("base", "uacr", "hba1c", "sdi", "full"),
units = c("mg/dL", "mmol/L")) {
model <- match.arg(model)
units <- match.arg(units)
add_const_and_unlist <- function(pred_dat) {
pred_dat %>% dplyr::mutate(constant = 1) %>% as.list() %>% unlist()
}
remove_vars_for <- function(dat, col_name_contains) {
request <- col_name_contains %>% strsplit("|", fixed = TRUE) %>% unlist()
stopifnot(request %in% c("uacr", "hba1c", "sdi"))
dat %>% dplyr::select(-matches(col_name_contains))
}
# All models centered as follows, where relevant:
# age 55
# non-HDL-C 3.5
# HDL-C 1.3
# SBP 130
# BMI 25
# eGFR 90
# HbA1c 5.3
# (no DM, not smoking, no medication)
# However, for terms involved in a piece-wise linear spline
# (sbp < vs. >= 110, bmi < vs. >= 30, egfr < vs. >= 60),
# centering occurs for the portion of the spline
# where the centering value might actually occur; for the other portion
# of the spline, the value subtracted is the value at which the knot
# was placed
pred_vals <- dat %>%
dplyr::transmute(
age = (age - 55) / 10,
# Age squared is only used by 30-yr models, but all other terms
# are the same. As such, include it in prepared terms, but remove
# during run of `run_models()` if running a 10-yr model
# More precisely, age squared is ((age - 55) / 10)^2, but this is already
# calculated for `age` immediately above, so can just do `age^2` here.
age_squared = age^2,
non_hdl_c = if(units == "mg/dL") {
convert_chol_to_mmol(total_c - hdl_c) - 3.5
} else {
(total_c - hdl_c) - 3.5
},
hdl_c = if(units == "mg/dL") {
(convert_chol_to_mmol(hdl_c) - 1.3) / 0.3
} else {
(hdl_c - 1.3) / 0.3
},
sbp_lt_110 = (min(sbp, 110) - 110) / 20,
sbp_gte_110 = (max(sbp, 110) - 130) / 20,
dm = as_numeric_binary(dm),
smoking = as_numeric_binary(smoking),
bmi_lt_30 = (min(bmi, 30) - 25) / 5,
bmi_gte_30 = (max(bmi, 30) - 30) / 5,
egfr_lt_60 = (min(egfr, 60) - 60) / -15,
egfr_gte_60 = (max(egfr, 60) - 90) / -15,
bp_tx = as_numeric_binary(bp_tx),
statin = as_numeric_binary(statin),
bp_tx_sbp_gte_110 = bp_tx * sbp_gte_110,
statin_non_hdl_c = statin * non_hdl_c,
age_non_hdl_c = age * non_hdl_c,
age_hdl_c = age * hdl_c,
age_sbp_gte_110 = age * sbp_gte_110,
age_dm = age * dm,
age_smoking = age * smoking,
age_bmi_gte_30 = age * bmi_gte_30,
age_egfr_lt_60 = age * egfr_lt_60,
sdi = if(exists("zip", dat)) get_sdi(zip) else NA,
hba1c = if(exists("hba1c", dat)) hba1c else NA,
uacr = if(exists("uacr", dat)) uacr else NA
)
if(model == "base") {
return(pred_vals %>% remove_vars_for("sdi|hba1c|uacr") %>% add_const_and_unlist())
}
pred_vals <- pred_vals %>%
dplyr::mutate(
# Add preds for sdi model variant
sdi_4_to_6 = as_numeric_binary(is_between(sdi, 4, 6)),
sdi_7_to_10 = as_numeric_binary(is_between(sdi, 7, 10)),
missing_sdi = as_numeric_binary(is.na(sdi)),
# Add preds for uacr model variant
ln_uacr = if(!is.na(uacr)) log(uacr) else 0,
missing_uacr = as_numeric_binary(is.na(uacr)),
# Add preds for hba1c model variant
hba1c_dm = if(!is.na(hba1c) && dm == 1) hba1c - 5.3 else 0,
hba1c_no_dm = if(!is.na(hba1c) && dm == 0) hba1c - 5.3 else 0,
missing_hba1c = as_numeric_binary(is.na(hba1c))
)
# Now get rid of sdi, hba1c, and uacr (base values not needed)
pred_vals <- pred_vals %>% dplyr::select(-c(sdi, hba1c, uacr))
if(model == "uacr") {
return(pred_vals %>% remove_vars_for("sdi|hba1c") %>% add_const_and_unlist())
}
if(model == "hba1c") {
return(pred_vals %>% remove_vars_for("sdi|uacr") %>% add_const_and_unlist())
}
if(model == "sdi") {
return(pred_vals %>% remove_vars_for("uacr|hba1c") %>% add_const_and_unlist())
}
# Return full model
pred_vals %>% add_const_and_unlist()
}
run_models <- function(model = c("base", "uacr", "hba1c", "sdi", "full"),
sex = c("female", "male"),
time = c("10yr", "30yr"),
pred_vals,
dp = 3) {
stopifnot(
is.numeric(dp) && length(dp) != 0 &&
dp >= 0 && round(dp) == dp
)
model <- match.arg(model)
sex <- match.arg(sex)
time <- match.arg(time)
# If running a 10-year model, drop "age_squared" value from the `pred_vals`
# vector, as that term is only used in 30-year estimates
if(time == "10yr") {
pred_vals <- pred_vals[setdiff(names(pred_vals), "age_squared")]
}
desired_models <-
get(paste0(model, "_", time)) %>%
dplyr::select(dplyr::starts_with(sex))
res <- vector("numeric", length(desired_models))
names(res) <- gsub(paste0(sex, "_"), "", names(desired_models))
for(given_model in seq_along(desired_models)) {
log_odds <- sum(desired_models[[given_model]] * pred_vals)
res[[given_model]] <- round_half_up(exp(log_odds) / (1 + exp(log_odds)), dp)
}
res %>%
as.list() %>%
append(
list(
model = model,
over_years = gsub("yr", "", time) %>% as.integer(),
# Population of problems, if relevant, handled elsewhere
input_problems = NA_character_
)
) %>%
dplyr::as_tibble()
}
#' @title
#' Estimate risk of cardiovascular events using the American Heart Association
#' (AHA) Predicting Risk of cardiovascular disease EVENTs (PREVENT) equations,
#' with optional comparison to their *de facto* predecessor, the Pooled Cohort
#' Equations (PCEs) from the AHA and American College of Cardiology (ACC)
#'
#' @description
#' `estimate_risk()` and `est_risk()` are the same function, with the latter
#' being a function synonym for those who favor syntactical brevity.
#'
#' Estimation via the PREVENT equations includes both 10- and 30-year risk of 5
#' events:
#' - Total cardiovascular disease (CVD), which includes atherosclerotic CVD
#' (ASCVD) and heart failure as defined below
#' - ASCVD, which includes coronary heart disease (CHD) and stroke as defined
#' below
#' - Heart failure (often abbreviated HF, but not herein)
#' - CHD, which includes nonfatal myocardial infarction (MI) and fatal CHD
#' - Stroke
#'
#' Estimation via the PCEs includes 10-year risk of ASCVD. The title of the
#' function focuses on the "official" version of the PCEs from the AHA/ACC, but
#' this function permits estimation via the revised PCEs released by Yadlowsky
#' and colleagues in 2018. Further details are in the "Arguments" section.
#'
#' See also the README for this package, which goes into additional detail about
#' the PREVENT equations ([site](https://martingmayer.com/preventr),
#' [GitHub](https://github.com/martingmayer/preventr)).
#'
#' @param age Numeric (required predictor variable): Age in years, from 30-79.
#' Note the PCEs have a lower age limit of 40, so for ages 30-39, the function
#' will only provide estimates for the PREVENT equations, irrespective of
#' whether a user also requests estimation via the PCEs via the `model`
#' argument (more precisely, the function will still carry out the estimation
#' from the PCEs, but will return `NA`).
#' @param sex Character (required predictor variable): Either `"female"` or
#' `"male"` (`"f"` and `"m"` are accepted abbreviations).
#' @param sbp Numeric (required predictor variable): Systolic blood pressure
#' (SBP) in mmHg, from 90-180; see the "Details" section for more information
#' about the upper bound of the range.
#' @param bp_tx Logical or numeric equivalent (required predictor variable):
#' Whether the person is on blood pressure treatment, either `TRUE` or `FALSE`
#' (1 or 0 are accepted as alternative input).
#' @param total_c Numeric (required predictor variable): Total cholesterol in
#' mg/dL or mmol/L (see `chol_unit` argument), from 130-320 (for `chol_unit =
#' "mg/dL"`) or 3.36-8.28 (for `chol_unit = "mmol/L"`).
#' @param hdl_c Numeric (required predictor variable): High-density lipoprotein
#' cholesterol (HDL-C) in mg/dL or mmol/L (see `chol_unit` argument), from
#' 20-100 (for `chol_unit = "mg/dL"`) or 0.52-2.59 (for `chol_unit =
#' "mmol/L"`).
#' @param statin Logical or numeric equivalent (required predictor variable):
#' Whether the person is taking a statin, either `TRUE` or `FALSE` (1 or 0 are
#' accepted as alternative input).
#' @param dm Logical or numeric equivalent (required predictor variable):
#' Whether the person has diabetes mellitus (DM), either `TRUE` or `FALSE` (1
#' or 0 are accepted as alternative input).
#' @param smoking Logical or numeric equivalent (required predictor variable):
#' Whether the person is currently smoking (which PREVENT defines as cigarette
#' use within the last 30 days), either `TRUE` or `FALSE` (1 or 0 are accepted
#' as alternative input).
#' @param egfr Numeric or call (required predictor variable): Estimated glomerular
#' filtration rate (eGFR) in mL/min/1.73m<sup>2</sup>, entered either as a
#' numeric from 15-140 or as a call to `calc_egfr()` or synonyms, as described
#' in the "Details" section.
#' @param bmi Numeric or call (required predictor variable): Body mass index (BMI) in
#' kg/m<sup>2</sup>, entered either as a numeric from 18.5-39.9 or as a call to
#' `calc_bmi()` or synonyms, as described in the "Details" section.
#' @param hba1c Numeric (optional predictor variable): Glycated hemoglobin
#' (HbA1c) in %, from 4.5-15; see the "Details" section for more information
#' about the lower bound of the range.
#' @param uacr Numeric (optional predictor variable): Urine
#' albumin-to-creatinine ratio (UACR) in mg/g, from 0.1-25000.
#' @param zip Character (optional predictor variable): ZIP code of the person's
#' residence, used to estimate the Social Deprivation Index (SDI); see the
#' "Details" section for more information.
#' @param model Character or list (optional behavior variable):
#' - If character, the PREVENT model to use, one of `NULL` (the default),
#' `"base"` (the base model), `"hba1c"` (the base model adding HbA1c),
#' `"uacr"` (the base model adding UACR), `"sdi"` (the base model adding SDI),
#' or `"full"` (the base model adding HbA1c, UACR, and SDI). If `NULL`, the
#' model will be determined by algorithm specified in the "Details" section,
#' and this is the intended argument for most users. The ability to specify
#' mainly exists for specific use cases (e.g., research purposes).
#' - If passing a list, the list must have the following elements unless
#' otherwise specified (any other elements in the list will be ignored):
#' - `main_model` (character): The PREVENT model to use, following the same
#' requirements specified for when `model` is character. This element is
#' required only if the user desires to specify which PREVENT model to use. It
#' can otherwise be omitted, in which case the function will set `main_model`
#' to `NULL` (which has the same impact as when `model` is `NULL`).
#' - `other_models` (character): The PCEs to use, one of
#' `"pce_orig"` (for the original PCEs released by the [ACC/AHA in
#' 2013](https://pubmed.ncbi.nlm.nih.gov/24222018)), `"pce_rev"` (for
#' the revised PCEs released by [Yadlowsky and colleagues in
#' 2018](https://pubmed.ncbi.nlm.nih.gov/29868850), but not officially endorsed by
#' ACC/AHA), or `"pce_both"` (for both).
#' - `race_eth` (character): The race and ethnicity of the person, which is
#' required by the PCEs. One of `"Black"` (for non-Hispanic Black), `"White"`
#' (for non-Hispanic White), or `"Other"` (`"B"`, `"W"`, or `"O"` are accepted
#' as alternative input, as are lowercase versions of the full word or its
#' first-letter abbreviation). See the "Details" section for further
#' discussion.
#' @param time Character or numeric (optional behavior variable): Whether to
#' estimate risk over 10 or 30 years, one of `"both"` (character; the
#' default); `10` (numeric), `"10"` (character), or `"10yr"` (character); or
#' `30` (numeric), `"30"` (character), or `"30yr"` (character). Two additional
#' things to note:
#' - If a user requests estimation over a 30-year time horizon, but the user
#' also requests estimation via the PCEs, a 10-year time horizon will
#' automatically be added, as the PCEs only estimate 10-year risk (see the
#' "Value" section for more information).
#' - If estimating over 30 years when age > 59, a warning will accompany the
#' results regarding the reliability of the estimation (see the "Value"
#' section for more information).
#' @param chol_unit Character (optional behavior variable): The unit of
#' measurement for `total_c` and `hdl_c`, either `"mg/dL"` (the default) or
#' `"mmol/L"` (`"mg"` and `"mmol"` are accepted abbreviations).
#' @param optional_strict Logical (optional behavior variable): Whether to
#' enforce strictness on optional predictor variables, either `TRUE` or
#' `FALSE` (the default). The argument itself is strict, so 1 or 0 are *not*
#' accepted (in contrast with predictor variables expecting logical input),
#' and moreover, anything other than `TRUE` will be treated as `FALSE`. If
#' `FALSE`, the function will discard invalid optional predictor variables but
#' still allow the model to run. If `TRUE`, optional predictor variables
#' entered (if any) must be valid for the function to return risk estimates.
#' See the "Value" section for more information.
#' @param quiet Logical (optional behavior variable): Whether to suppress
#' messages and warnings in the console, either `TRUE` or `FALSE`; this
#' argument is strict, so 1 or 0 are *not* accepted (in contrast with
#' predictor variables expecting logical input), and moreover, anything other
#' than `TRUE` will be treated as `FALSE`. The default is `FALSE` when
#' `use_dat` is *not* a data frame and `TRUE` when `use_dat` *is* a data
#' frame. Having `quiet = FALSE` when `use_dat` is a data frame could result
#' in a fairly noisy console, and the information contained in console-printed
#' messages and warnings regarding model selection and input problems will
#' already be in the return data frame via columns `model` and
#' `input_problems`. However, if `use_dat` receives something other than a
#' data frame or a data frame with zero rows, it will always warn the user,
#' independent of the `quiet` argument.
#' @param collapse Logical (optional behavior variable): Whether to collapse the
#' output into a single data frame if applicable, either `TRUE` or `FALSE`;
#' this argument is strict, so 1 or 0 are *not* accepted (in contrast with
#' predictor variables expecting logical input), and moreover, anything other
#' than `TRUE` will be treated as `FALSE`. The default is `FALSE` when
#' `use_dat` is *not* a data frame (this ensures backward compatibility) and
#' `TRUE` when `use_dat` *is* a data frame. More precisely, however, although
#' I have specified the default as `is.data.frame(use_dat)` for clarity in
#' behavior, this argument is actually just ignored when a user passes a data
#' frame to `use_dat`. See the description of the `use_dat` argument and the
#' "Value" section for more information.
#' @param use_dat data frame via base R's data.frame or data frame extension via
#' tibble or data.table (optional behavior variable): Whether to use a data
#' frame provided by the user, either `NULL` (the default) or a data frame.
#' More precisely, anything other than passing a data frame to `use_dat` has
#' no impact. Passing a data frame to `use_dat` modifies the behavior of the
#' function in the following manner:
#'
#' - The function expects each row in the data frame to represent a
#' candidate for risk estimation.
#' - Predictor variables must be present in the data frame passed to `use_dat`.
#' Optional predictor variables remain optional, though (for example, there is
#' no requirement for HbA1c data to be in the data frame *per se*, but if the
#' user wishes to use HbA1c data as part of predicting risk with the PREVENT
#' equations, those data must be represented in a column in the data frame).
#' Any given argument for a predictor variable may be omitted, in which case
#' the function expects the data frame to have a column with the name of the
#' omitted argument. For example, if `age` is omitted from the function call,
#' the function expects a column named `age` in the data frame passed to
#' `use_dat`; it (of course) furthermore expects column `age` to contain
#' data adhering to specifications set forth for the `age` argument (for
#' example, an age of 20 is still considered unacceptable). Alternatively, the
#' user may pass a column name for any predictor variable. Continuing with the
#' previous example, if the column containing age data were instead named
#' `years_old`, the user could pass either `age = years_old` or `age =
#' "years_old"` to the function call, and values for `age` would be extracted
#' from the `years_old` column.
#' - Optional behavior variables may *either* be in the data frame passed to
#' `use_dat` in a column with the same name as the argument *or* passed to the
#' function call as usual. If an optional behavior variable is omitted from
#' the call when a user passes a data frame to `use_dat`, the function will
#' first look for a column with the name of the optional behavior variable in
#' the data frame; if it does not find such a column, it will use the default
#' behavior for the optional behavior variable. If the user includes an
#' argument for an optional behavior variable in the call, the function will
#' always use this, irrespective of any column in the data frame that might
#' share the same name. Additionally, the following arguments are not passable
#' via the data frame: `collapse` (ignored when `use_dat` is a data frame),
#' `use_dat` (this would be self-referential), `add_to_dat` (again,
#' essentially self-referential), and `progress` (this applies to the entire
#' call when `use_dat` is a data frame). As an example, suppose a user wishes
#' to specify the model(s) to use. The user may either include a column in the
#' data frame named `model` and omit the `model` argument from the call or
#' pass the desired behavior to the `model` argument. If a column named
#' `model` exists in the data frame passed to `use_dat` *and* the user passes
#' something to the `model` argument, the function will use the argument.
#' Including an optional behavior variable in the data frame activates
#' row-by-row behavior alteration of the function. If the user wants to alter
#' the default behavior of the function, but wishes to do so in the same
#' manner across all rows, it may be easier to forego having optional behavior
#' variables in the data frame and instead pass the optional behavior
#' variables via function arguments (though this is not strictly required; one
#' can achieve the same behavior by having the same value repeated across all
#' rows of the column for the optional behavior variable).
#' - The `collapse` argument is ignored (results will always be returned as
#' a data frame when `use_dat` is a data frame).
#'
#' See `vignette("using-data-frame")` for further discussion and examples.
#' @param add_to_dat Logical (optional behavior variable): Whether to add the
#' output to the data frame passed to `use_dat`, either `TRUE` (the default) or
#' `FALSE`. This argument is only considered when `use_dat` is a data frame.
#' This argument is strict, so 1 or 0 are *not* accepted (in contrast with
#' predictor variables expecting logical input), and moreover, anything other
#' than `TRUE` will be treated as `FALSE`. See the "Value" section for more
#' information.
#'
#' See `vignette("using-data-frame")` for further discussion and examples.
#' @param progress Logical (optional behavior variable): Whether to display a
#' progress bar during computation, either `TRUE` or `FALSE`. This argument is
#' only considered when `use_dat` is a data frame, when it defaults to `TRUE`.
#' This argument is strict, so 1 or 0 are *not* accepted (in contrast with
#' predictor variables expecting logical input), and moreover, anything other
#' than `TRUE` will be treated as `FALSE`. This argument is independent of the
#' `quiet` argument. It requires the `utils` package, which is part of the R
#' distribution (i.e., outside of atypical scenarios, you should not need to
#' install the `utils` package yourself).
#'
#' @details
#' ## Why is the upper limit of the SBP range 180 mmHg?
#'
#' Some may notice the upper limit is set to 180 mmHg here, whereas the PREVENT
#' equations technically permit up to 200 mmHg. The Pooled Cohort Equations (PCEs)
#' do this as well. I have restricted to 180 mmHg, as SBP beyond 180 mmHg constitutes
#' hypertensive urgency (per [AHA's own definitions](https://pubmed.ncbi.nlm.nih.gov/29133354)),
#' and irrespective of the debate surrounding labels like hypertensive urgency
#' and emergency, it would seem clinically unreasonable to engage with the
#' PREVENT equations when someone has more pressing matters to address
#' (better blood pressure control *per se*).
#'
#' ## Why is the lower limit of the HbA1c 4.5%?
#'
#' Some may notice the lower limit is set to 4.5% here, whereas the PREVENT
#' equations technically permit down to 3%. I have restricted to 4.5%, as HbA1c
#' of 3% is neither realistic nor safe for a person. For example, using [the
#' HbA1c to estimated average glucose (eAG) converter from the American
#' Diabetes Association](https://professional.diabetes.org/glucose_calc)
#' (https://professional.diabetes.org/glucose_calc), a HbA1c of 3% corresponds
#' to an eAG of 39 mg/dL (2.2 mmol/L).
#'
#' ## Entering eGFR and BMI as a call rather than a numeric value
#'
#' The `eGFR` and `bmi` arguments can be entered as numeric values or as calls to
#' `calc_egfr()` and `calc_bmi()`, respectively. They both have synonyms as well:
#' - Synonyms for `calc_egfr()` are `calculate_egfr()`, `calc_ckd_epi()`, and
#' `calculate_ckd_epi()`, with the latter two synonyms reflecting the
#' calculation is from the CKD-EPI equations (the reparameterized version
#' without race, which is also what the PREVENT equations use).
#' - The synonym for `calc_bmi()` is `calculate_bmi()`.
#'
#' These convenience functions add value where a person might have the necessary
#' components to calculate the respective parameter but do not have handy the
#' parameter itself.
#'
#' The syntax for these convenience functions is as follows:
#' - `calc_egfr(cr, units = "mg/dL", age, sex, quiet = FALSE)`
#' - `cr` is the creatinine in whatever units are specified by `units`.
#' - `units` is the unit of measurement for `cr`, either `"mg/dL"` or `"umol/L"`,
#' with `"mg"` and `"umol"` being accepted abbreviations.
#' - `age` is the age of the person, but there is no need to enter this, as
#' the function will extract this from the `age` argument of `estimate_risk()`;
#' in fact, any argument entered here will be ignored in favor of the `age`
#' argument of `estimate_risk()`.
#' - `sex` is the sex of the person, but there is no need to enter this, as
#' the function will extract this from the `sex` argument of `estimate_risk()`;
#' in fact, any argument entered here will be ignored in favor of the `sex`
#' argument of `estimate_risk()`.
#' - `quiet` is a logical indicating whether to suppress the warning about
#' use outside of `estimate_risk()`.
#' - An example call would be `calc_egfr(1.2)` (because `units` defaults
#' to `"mg/dL"`) or `calc_egfr(88, "umol")`.
#' - `calc_bmi(weight, height, units = "nonmetric", quiet = FALSE)`
#' - `weight` is the weight in pounds if `units = "nonmetric"` or kilograms
#' if `units = "metric"`.
#' - `height` is the height in inches if `units = "nonmetric"` or centimeters
#' if `units = "metric"`.
#' - `units` is the unit of measurement for `weight` and `height`, either
#' `"nonmetric"` or `"metric"`.
#' - `quiet` is a logical indicating whether to suppress the warning about
#' use outside of `estimate_risk()`.
#' - An example call would be `calc_bmi(150, 70)` (because `units` defaults
#' to `"nonmetric"`) or `calc_bmi(68, 178, "metric")`.
#'
#' ## What is the Social Deprivation Index (SDI)?
#'
#' Read more from the [Robert Graham Center's page on the SDI](https://www.graham-center.org/maps-data-tools/social-deprivation-index.html)
#' (https://www.graham-center.org/maps-data-tools/social-deprivation-index.html)
#'
#' ## Model selection when `model = NULL`
#'
#' If `model = NULL`, the model will be determined by the following algorithm:
#' - If no optional predictor variables (HbA1c, UACR, zip code) are
#' entered, or only invalid optional variables are entered and
#' `optional_strict = FALSE`: The base model
#' - If one of the optional predictor variables is entered, or two or
#' more optional predictor variables are entered but only one is valid and
#' `optional_strict = FALSE`: The base model adding that variable (e.g., if
#' HbA1c is entered and no other optional predictor variables are entered, the
#' base model adding HbA1c; if HbA1c and UACR are entered, but HbA1c is
#' invalid and `optional_strict = FALSE`, the base model adding UACR)
#' - If two or more of the optional predictor variables are entered, or
#' all three optional variables are entered but one is invalid and
#' `optional_strict = FALSE`: The full model (the PREVENT equations include
#' a term for optional predictor variables being missing, so if one of the
#' optional predictor variables is missing in this scenario, it is treated as
#' such within the full model)
#'
#' ## What if SDI is not available for a zip code?
#'
#' Some zip codes do not have SDI data available, and the PREVENT equations
#' include a term for SDI being missing. As such, if a user enters a valid zip
#' code but no SDI data are available, the user will be notified (unless `quiet
#' = TRUE`), and the tool will then implement the missing term as part of
#' predicting risk whenever the full model is used, but SDI will otherwise be
#' removed from prediction. Specifically, the following models will predict
#' risk in the situation where the user enters a valid zip code, but no SDI
#' data are available:
#' - If the user does not enter a valid HbA1c or UACR: The base model.
#' - If the user enters valid HbA1c and UACR: The full model (treating SDI as missing).
#' - If the user enters a valid HbA1c: The base model adding HbA1c.
#' - If the user enters a valid UACR: The base model adding UACR.
#'
#' ## Race and/or ethnicity in predictive models
#'
#' The use of race and/or ethnicity in predictive (also called prognostic)
#' models is, in a word, problematic. It is problematic for a few reasons,
#' and fortunately, this has received much-needed attention in recent years.
#' The PCEs require this input as specified in the "Arguments" section of the
#' documentation. If you would like to read a bit more about this issue, see
#' [here](https://www.martingmayer.com/preventr/articles/race-ethnicity-in-predictive-models.html).
#'
#' ## Why is the risk estimation higher with the PCEs?
#'
#' The PCEs are known to overestimate risk. Indeed, this was a key motivation
#' for [Yadlowsky and colleagues](https://pubmed.ncbi.nlm.nih.gov/29868850) to
#' develop the revised PCEs, and was also a key motivation for development of
#' the [PREVENT equations](https://pubmed.ncbi.nlm.nih.gov/37947085).
#'
#' ## Why don't you export the PCEs or the convenience functions for BMI and eGFR?
#'
#' These are not exported for two main reasons:
#' - With specific regard to the PCEs, they are not the focal point of this
#' package, but they are of potential comparative interest.
#' - With regard to all these functions, I (of course) tested them for
#' accuracy and intended behavior, but they are implemented primarily for
#' internal package use or as part of estimating risk with `estimate_risk()` or
#' `est_risk()`. For example, although they implement at least basic checks of
#' input, some of the input checking and handling is delegated to other
#' processes that are invoked when using these functions in the aforementioned
#' ways. To give more concrete examples, if invoking these functions outside
#' the context of `estimate_risk()` or `est_risk()`, although implementation of
#' the PCEs checks input validity, it just returns `NA` with no messaging if it
#' finds a problem. The functions for BMI and eGFR also implement checks for
#' input validity (such as numeric inputs needing to be a number greater than
#' 0), but they do not reject extreme numeric values (aside from the `age`
#' input for eGFR, which implements some further restriction on age). Again,
#' however, the calculations have certainly been tested for accuracy, so for
#' users who are confident (1) they understand the cautions described here
#' and (2) in the fidelity of their input for the functions, they can use them
#' judiciously outside of `estimate_risk()` or `est_risk()`
#' (via `preventr:::<function>`).
#'
#' @return
#' ## Basic information about the return
#'
#' `estimate_risk()` will always return either (1) a list of length 2, with
#' each list element having a single data frame or (2) a single data frame.
#' All references herein to a data frame being returned are for a data frame
#' as a tibble (see the [tibble](https://cran.r-project.org/package=tibble)
#' package for more detail) unless `use_dat` receives a data frame, in which
#' case the return data frame will be of the same type passed to `use_dat` to
#' ensure type-stability.
#'
#' Whether the return is a list of data frames or a single data frame is
#' determined by:
#'
#' - whether the risk estimation is occurring over a single time horizon
#' - the value of the `collapse` argument
#' - whether the user has passed a data frame to the `use_dat` argument.
#'
#' When all of the following conditions are met, the function will return a
#' list of length 2, with each item in the list being a single data frame
#' containing the 10-year and 30-year estimates, in that order:
#'
#' - the user did *not* pass a data frame to `use_dat`
#' - `collapse = FALSE`
#' - either (1) `time = "both"` or (2) `time = "30yr"` *and* the user requests
#' estimation with the PCEs via the `model` argument (thus adding a 10-year
#' time horizon, as the PCEs only estimate risk at 10 years).
#'
#' In all other scenarios, the function will return a single data frame. Note
#' this includes scenarios where `collapse` will have no impact, namely when:
#' - the user passes a data frame to `use_dat` (passing a data frame to
#' `use_dat` will always result in a data frame being returned to the user)
#' - the estimation occurs over one time horizon, namely if (1) `time =
#' "30yr"` and the user does not request estimation with the PCEs or (2)
#' `time = "10yr"`.
#'
#' The data frame will have the following columns:
#' - `total_cvd`: The estimated risk of a total CVD event (column type: double)
#' - `ascvd`: The estimated risk of an ASCVD event (column type: double)
#' - `heart_failure`: The estimated risk of a HF event (column type: double)
#' - `chd`: The estimated risk of a CHD event (column type: double)
#' - `stroke`: The estimated risk of a stroke event (column type: double)
#' - `model`: The PREVENT or PCE model used (column type: character)
#' - `over_years`: The time horizon for the risk estimate (column type: integer)
#' - `input_problems`: Semicolon-separated vector of length one delineating
#' any input problems (column type: character)
#'
#' In addition, when `use_dat` is a data frame, the return data frame will
#' also have the following composition:
#' - A column named `preventr_id` (column type: integer) that acts as a unique
#' identifier for each row in the data frame passed to `use_dat`. This column
#' will always be the first column in the returned data frame. The values of
#' `preventr_id` are simply the row numbers of the data frame passed to
#' `use_dat`. So, for example, if a row has `preventr_id` equal to 5, this
#' means it is based on the input present in row 5 of the data frame passed to
#' `use_dat`.
#' - If `add_to_dat = TRUE`, the returned data frame will include the columns
#' in `use_dat`. So, the composition of the return data frame will be:
#' `preventr_id` column + columns from `use_dat` + risk estimation columns. In
#' addition, for a given row in the `use_dat` data frame with `preventr_id` *x*
#' (hereafter, "row *x*"), if *n* represents the number of models requested
#' for row *x*, then row *x* will be replicated *n* times in the output to
#' accommodate reporting the different model outputs for that row. Note also
#' *n* is determined by what the function receives for both the `model` and
#' `time` arguments (because, for example, if `model = "base"` and
#' `time = "both"`, this is a request for 2 models). For those familiar with
#' joins, the expansion described here is simply the result of a left join of
#' the data frame passed to `use_dat` with the data frame returned by
#' `estimate_risk()` (using `preventr_id` as the key). For those not familiar
#' with joins, if the above does not seem clear, the vignette about using
#' data frames (`vignette("using-data-frame")`) should help.
#' - If `add_to_dat = FALSE`, the returned data frame will *not* include the
#' columns in `use_dat`, so the composition of the return data frame will be:
#' `preventr_id` column + risk estimation columns. The replication behavior
#' described for when `add_to_dat = TRUE` will still occur. For this reason,
#' the `preventr_id` column is perhaps especially important when
#' `add_to_dat = FALSE`, as it provides a mechanism to associate the results
#' with the original data frame.
#' - If the user passes a data frame with a column named `model` (see the
#' argument specifications for `use_dat` for further detail), the function
#' will rename this column to `model_input` in the return data frame to
#' prevent name conflicts, because the return data frame will also have the
#' column `model` based on the risk estimation output.
#'
#' ## When valid input parameters exist for all required predictor variables
#'
#' The risk estimate columns are all of type double, and they are presented as
#' a proportion rounded to 3 decimal places. Halves are rounded up to align
#' with what many people likely expect, but this is in contrast to base R's
#' default rounding behavior (it is a perfectly reasonable default, but
#' perhaps somewhat unexpected for people who are not familiar with different
#' standards/conventions for rounding; see [round()] for further detail).
#'
#' The `model` column will be of type character, taking one of the following
#' values: `"base"`, `"hba1c"`, `"uacr"`, `"sdi"`, or `"full"`. If opting in
#' for comparison to the PCEs, `model` for those estimates will be one of
#' `"pce_orig"` or `"pce_rev"`.
#'
#' The `over_years` column will be of type integer, either 10 or 30.
#'
#' If `optional_strict = TRUE`, the above will only hold if the optional
#' predictor variables that are entered (if any) are valid; if any optional
#' predictor variables are entered but are invalid, the function will behave
#' in the same manner as when invalid input parameters exist for one or more
#' required variables.
#'
#' ## When invalid input parameters exist for one or more required predictor variable(s)
#'
#' The function will issue a warning about the problematic variables, unless
#' `quiet = FALSE`. A data frame will be returned with the following
#' characteristics:
#' - All risk estimates will be set to `NA_real_`
#' - The `model` column will state "none"
#' - The `over_years` column will be set to `NA_integer_`
#' - The `input_problems` column will contain a character vector of length 1
#' delineating the problematic variable(s); if multiple problematic variables
#' exist, they will be separated by semicolons
#'
#' ## When invalid input parameters exist for one or more optional predictor variable(s)
#'
#' ### When `optional_strict = TRUE`
#'
#' The function will behave similarly to when invalid input parameters exist
#' for one or more required variables, with the `input_problems` column
#' delineating the problematic variables
#'
#' ### When `optional_strict = FALSE`
#'
#' The function will issue a warning about the problematic variables, unless
#' `quiet = FALSE`. The problematic optional variables will then be
#' functionally discarded and the PREVENT equations still run, in accordance
#' with the specifications detailed in the "Details" section regarding model
#' selection. A data frame will be returned with the following
#' characteristics:
#'
#' - All estimates will be returned as specified in the valid input parameters
#' section, as will the `model` and `over_years` columns
#' - The `input_problems` column will contain a character vector of length 1
#' delineating the problematic variables (because optional predictor variables
#' are allowed to be empty, any input that is functionally empty or missing
#' (such as `NULL`, `numeric(0)`, `NA`, etc.) will not be considered
#' problematic and thus not populate in the `input_problems` column)
#'
#' ## When estimating 30-year risk and age > 59
#'
#' The function advises 30-year risk prediction for people > 59 years is
#' questionable via two warnings:
#'
#' - in the console (that can be suppressed by setting `quiet = TRUE`)
#' - in the column `input_problems` of the return tibble (`quiet` has no
#' impact here)
#'
#' ## The special case of the `zip` argument
#'
#' The above rule for optional predictor variables applies to the `zip`
#' argument as well, but with the additional reminder that there are valid zip
#' codes that do not have an SDI score. This is importantly different from an
#' invalid input for zip. See the "Details" section for more information about
#' how this is handled, but users should *not* expect anything to populate in
#' the `input_problems` column if the zip is valid, regardless of whether that
#' zip has an SDI score. As will be clear from the "Details" section, users will
#' be able to determine when a zip code does not have an SDI score based on
#' the model that was used.
#'
#' ## What about the PCEs?
#'
#' Within the broader context of the function itself, the PCEs are treated as
#' optional. Thus, as long as there is valid input for the PREVENT equations,
#' the function will run, returning risk estimates from the PREVENT equations.
#' Note, however, that valid input for the PREVENT equations requires valid
#' input for the `model` argument. Thus, if the `model` argument is invalid or
#' malformed (i.e., not adherent to the specifications delineated for that
#' argument), the function will behave as described for the circumstance when
#' invalid input exists for one or more required predictor variables.
#'
#' If a list containing elements `other_models` and `race_eth` is passed to
#' argument `model`, then within the sub-context of running additional models
#' for comparison, the elements `other_models` and `race_eth` are required.
#' Thus, if either `other_models` or `race_eth` is invalid, the returned
#' row(s) within the data frame will function comparably to what is described
#' for the circumstance when invalid input exists for one or more required
#' predictor variables for the PREVENT equations. For example, suppose someone
#' enters valid input for the PREVENT equations and passes the following
#' argument to `model`: `list(other_models = "pce_both", race_eth = NA)`. The
#' function would then run, returning risk estimates for the PREVENT
#' equations, but the user would be notified of the invalid input for argument
#' `race_eth` within the argument `model` in the console (unless `quiet =
#' TRUE`); furthermore, the return data frame for the 10-year time horizon
#' would contain two rows dedicated to the PCEs (given `other_models =
#' "pce_both"`, a valid argument), but each row would behave in the manner
#' described for the PREVENT equations when one or more required predictor
#' variable is invalid. That is, each row dedicated to the PCEs would consist
#' of `NA`s (of the appropriate type) for each column, aside from the column
#' `model`, which would say `"none"`, and the column `input_problems`, which
#' would specify there was erroneous input for the argument `race_eth`.
#' Likewise, if `other_models` were instead `"pce_orig"`, `"pce_rev"`, or an
#' invalid input, there would only be one row dedicated to the PCEs, because
#' in the first two cases, the user entered a valid argument specifying
#' interest in only one of the two options for the PCEs, and in the third
#' case, the user entered invalid input for the options for the PCEs (thus
#' becoming functionally similar to a situation if someone gave invalid input
#' for the `model` argument).
#'
#' Lastly, note the risk estimation columns `total_cvd`, `heart_failure`,
#' `chd`, and `stroke` will always be `NA_real_`, because the PCEs only
#' estimate the risk of ASCVD.
#'
#' ## Combining output into a single data frame
#'
#' Depending on the arguments to the function, the output may be a list of
#' data frames, one for each time horizon, (see the subsection "Basic
#' information about the return" within the "Value" section). The argument
#' `collapse` supports collapsing these into a single data frame, but it is
#' also easy to do outside of this package, e.g.:
#'
#' \preformatted{
#' res_dplyr <- dplyr::bind_rows(res) # Combine in dplyr
#' res_dt <- data.table::rbindlist(res) # Combine in data.table
#' res_base_r <- do.call(rbind, res) # Combine in base R
#'
#' # These all yield the same tabular output, but the attributes vary
#' # (e.g., the classes will obviously differ)
#'
#' all.equal(res_dplyr, res_dt, check.attributes = FALSE) # TRUE
#' all.equal(res_base_r, res_dplyr, check.attributes = FALSE) # TRUE
#' }
#'
#' ## Type-stability of return when passing a data frame to `use_dat`
#'
#' Importantly, the function maintains type-stability of the data frame it
#' receives via the `use_dat` argument, meaning passing a data.frame will
#' yield a data.frame, passing a tibble will yield a tibble, and passing a
#' data.table will yield a data.table. See `vignette("using-data-frame")` for
#' more information.
#'
#' @examples
#' # Example with all required predictor variables (example from Table S25
#' # in the supplemental PDF appendix of the PREVENT equations article)
#' #
#' # Optional predictor variables are all omitted (and thus take their default).
#' # `model` is also omitted (and thus takes its default, with the function
#' # selecting # the model based on the algorithm specified in the "Details"
#' # section).
#' # `time` is also omitted (and thus takes its default, with the function
#' # returning estimates for both 10- and 30-year risk as specified in the
#' # "Value" section).
#' #
#' # Expect the base model to run given absence of optional predictor variables
#' res <- estimate_risk(
#' age = 50,
#' sex = "female", # or "f"
#' sbp = 160,
#' bp_tx = TRUE, # or 1
#' total_c = 200, # default unit is "mg/dL"
#' hdl_c = 45, # default unit is "mg/dL"
#' statin = FALSE, # or 0
#' dm = TRUE, # or 1
#' smoking = FALSE, # or 0
#' egfr = 90,
#' bmi = 35
#' )
#'
#' # Based on Table S25, expect the 10-year risk for `total_cvd` to be 0.147,
#' # and based on the supplemental Excel file, also expect:
#' # 10-year risks: `ascvd`, 0.092; `heart_failure`, 0.081;
#' # `chd`, 0.044; `stroke`, 0.054
#' # 30-year risks: `total_cvd`, 0.53; `ascvd`, 0.354; `heart_failure`, 0.39;
#' # `chd`, 0.198; `stroke`, 0.221
#' res
#'
#' # Example with HbA1c
#' # (also changing required predictor variables & limiting to 10-year results)
#' estimate_risk(
#' age = 66,
#' sex = "male", # or "m"
#' sbp = 148,
#' bp_tx = FALSE,
#' total_c = 188,
#' hdl_c = 52,
#' statin = TRUE,
#' dm = TRUE,
#' smoking = TRUE,
#' egfr = 67,
#' bmi = 30,
#' hba1c = 7.5,
#' time = "10yr" # only 10-year results will show
#' )
#'
#' # Example with UACR (limited to 30-year results)
#' estimate_risk(
#' age = 66,
#' sex = "female",
#' sbp = 148,
#' bp_tx = FALSE,
#' total_c = 188,
#' hdl_c = 52,
#' statin = TRUE,
#' dm = TRUE,
#' smoking = TRUE,
#' egfr = 67,
#' bmi = 30,
#' uacr = 750,
#' time = "30yr" # only 30-year results will show
#' )
#'
#' # The remaining examples will all be limited to 10-year results unless
#' # otherwise specified
#'
#' # Example with SDI with valid zip code with SDI data available
#' estimate_risk(
#' age = 66,
#' sex = "female",
#' sbp = 148,
#' bp_tx = FALSE,
#' total_c = 188,
#' hdl_c = 52,
#' statin = TRUE,
#' dm = TRUE,
#' smoking = TRUE,
#' egfr = 67,
#' bmi = 30,
#' zip = "59043", # Lame Deer, MT (selected randomly)
#' time = 10 # Note numeric 10 (not "10yr"),
#' # just to show the option of entering this way
#' )
#'
#' # Example with SDI with valid zip code without SDI data available
#' # (base model will be used)
#' estimate_risk(
#' age = 66,
#' sex = "male",
#' sbp = 148,
#' bp_tx = FALSE,
#' total_c = 188,
#' hdl_c = 52,
#' statin = TRUE,
#' dm = TRUE,
#' smoking = TRUE,
#' egfr = 67,
#' bmi = 30,
#' zip = "00738", # Fajardo, PR
#' time = 10
#' )
#'
#' # Example with full model (even though zip does not have available SDI, full
#' # model used given availability of HbA1c and UACR; because zip is valid,
#' # column `input_problems` will be NA)
#' estimate_risk(
#' age = 66,
#' sex = "female",
#' sbp = 148,
#' bp_tx = FALSE,
#' total_c = 188,
#' hdl_c = 52,
#' statin = TRUE,
#' dm = TRUE,
#' smoking = TRUE,
#' egfr = 67,
#' bmi = 30,
#' hba1c = 9,
#' uacr = 75,
#' zip = "00738",
#' time = "10yr"
#' )
#'
#' # Example with full model (zip has SDI data available, UACR is valid, but
#' # HbA1c is not; column `input_problems` will specify problem with `hba1c`,
#' # but full model will still run given availability of the other two optional
#' # predictor variables)
#' estimate_risk(
#' age = 66,
#' sex = "male",
#' sbp = 148,
#' bp_tx = FALSE,
#' total_c = 188,
#' hdl_c = 52,
#' statin = TRUE,
#' dm = TRUE,
#' smoking = TRUE,
#' egfr = 67,
#' bmi = 30,
#' hba1c = 20,
#' uacr = 75,
#' zip = "59043",
#' time = "10yr"
#' )
#'
#' # Example of using the convenience functions `calc_bmi()` and `calc_egfr()`
#' res_convenience_fxs <- estimate_risk(
#' age = 50,
#' sex = "female",
#' sbp = 130,
#' bp_tx = TRUE,
#' total_c = 200,
#' hdl_c = 45,
#' statin = FALSE,
#' dm = TRUE,
#' smoking = FALSE,
#' egfr = calc_egfr(1), # units unspecified, so treated as 1 mg/dL; eGFR = 69
#' bmi = calc_bmi(70, 150, "metric"), # weight in kg, height in cm; BMI = 31.1
#' time = "10yr",
#' quiet = TRUE
#' )
#'
#' res_direct_entry <- estimate_risk(
#' age = 50,
#' sex = "female",
#' sbp = 130,
#' bp_tx = TRUE,
#' total_c = 200,
#' hdl_c = 45,
#' statin = FALSE,
#' dm = TRUE,
#' smoking = FALSE,
#' egfr = 69,
#' bmi = 31.1,
#' time = "10yr",
#' quiet = TRUE
#' )
#'
#' identical(res_convenience_fxs, res_direct_entry)
#'
#' # Example of using `model` argument to compare results from PREVENT equations
#' # to both versions of the PCEs
#' estimate_risk(
#' age = 50,
#' sex = "female",
#' sbp = 130,
#' bp_tx = TRUE,
#' total_c = 200,
#' hdl_c = 45,
#' statin = FALSE,
#' dm = TRUE,
#' smoking = FALSE,
#' egfr = 90,
#' bmi = 35,
#' hba1c = 8,
#' uacr = 30,
#' time = "10yr",
#' model = list(other_models = "pce_both", race_eth = "Black")
#' # Note omission of element `main_model` within the list is okay, and the
#' # element will then be treated as NULL (and thus model selection here
#' # will be "full" given availability of valid HbA1c and UACR)
#' )
#'
#' # Essentially a repeat of example immediately above, but now will specify
#' # `main_model` as "hba1c" and limit `other_models` to the revised PCEs
#' estimate_risk(
#' age = 50,
#' sex = "female",
#' sbp = 130,
#' bp_tx = TRUE,
#' total_c = 200,
#' hdl_c = 45,
#' statin = FALSE,
#' dm = TRUE,
#' smoking = FALSE,
#' egfr = 90,
#' bmi = 35,
#' hba1c = 8,
#' uacr = 30,
#' time = "10yr",
#' model = list(main_model = "hba1c", other_models = "pce_rev", race_eth = "Black")
#' )
#'
#' # Because the PCEs only give 10-year estimates, if a user specifies an
#' # interest in a 30-year time horizon but also expresses interest in
#' # comparison with the the PCEs, a 10-year time horizon must be added for the
#' # PCEs, but this will not automatically result in estimation of 10-year risk
#' # for the PREVENT equations.
#' estimate_risk(
#' age = 50,
#' sex = "female",
#' sbp = 130,
#' bp_tx = TRUE,
#' total_c = 200,
#' hdl_c = 45,
#' statin = FALSE,
#' dm = TRUE,
#' smoking = FALSE,
#' egfr = 90,
#' bmi = 35,
#' time = "30yr",
#' model = list(other_models = "pce_both", race_eth = "Black")
#' )
#'
#' # Repeat of above, but setting `collapse = TRUE`
#' res_collapsed <- estimate_risk(
#' age = 50,
#' sex = "female",
#' sbp = 130,
#' bp_tx = TRUE,
#' total_c = 200,
#' hdl_c = 45,
#' statin = FALSE,
#' dm = TRUE,
#' smoking = FALSE,
#' egfr = 90,
#' bmi = 35,
#' time = "30yr",
#' model = list(other_models = "pce_both", race_eth = "Black"),
#' collapse = TRUE
#' )
#'
#' res_collapsed
#'
#' # Can also accomplish this after the fact, as documented in "Combining output
#' # into a single data frame" within the "Value" section
#' res_uncollapsed <- estimate_risk(
#' age = 50,
#' sex = "female",
#' sbp = 130,
#' bp_tx = TRUE,
#' total_c = 200,
#' hdl_c = 45,
#' statin = FALSE,
#' dm = TRUE,
#' smoking = FALSE,
#' egfr = 90,
#' bmi = 35,
#' time = "30yr",
#' model = list(other_models = "pce_both", race_eth = "Black")
#' )
#'
#' all.equal(
#' do.call(rbind, res_uncollapsed),
#' res_collapsed,
#' check.attributes = FALSE
#' )
#' # Can also accomplish with `dplyr` and `data.table`, as detailed in the
#' # "Combining output into a single data frame" subsection of the "Value"
#' # section
#'
#'
#' # Passing a data frame to argument `use_dat`
#' if(interactive()) {
#' vignette("using-data-frame")
#' }
#'
#' # Expect table of NAs due to invalid input for `age` and `sbp`, and column
#' # `input_problems` to contain explanations about problems with `age` and `sbp`
#' res <- estimate_risk(
#' age = 8675309,
#' sex = "female",
#' sbp = 112358,
#' bp_tx = TRUE,
#' total_c = 200,
#' hdl_c = 45,
#' statin = FALSE,
#' dm = TRUE,
#' smoking = FALSE,
#' egfr = 90,
#' bmi = 35,
#' time = "10yr"
#' )
#'
#' res
#'
#' # Quiet version of the above example
#' res <- estimate_risk(
#' age = 8675309,
#' sex = "female",
#' sbp = 112358,
#' bp_tx = TRUE,
#' total_c = 200,
#' hdl_c = 45,
#' statin = FALSE,
#' dm = TRUE,
#' smoking = FALSE,
#' egfr = 90,
#' bmi = 35,
#' time = "10yr",
#' quiet = TRUE # Suppresses messages, but not column `input_problems`
#' )
#'
#' res
#'
#' # If only invalid input is for PCEs, PREVENT equations will still run
#' for(time in c("10yr", "30yr", "both")) {
#' cat(paste0("\n", "----- `time = \"", time, "\"` -----", "\n"))
#' print(
#' estimate_risk(
#' age = 38, # This age is okay for the PREVENT
#' sex = "female", # equations, but not for the PCEs
#' sbp = 144,
#' bp_tx = TRUE,
#' total_c = 200,
#' hdl_c = 45,
#' statin = FALSE,
#' dm = TRUE,
#' smoking = FALSE,
#' egfr = 90,
#' bmi = 35,
#' time = time,
#' model = list(
#' other_models = "pce_both",
#' race_eth = NA # Invalid `race_eth` for PCEs
#' ),
#' quiet = TRUE
#' )
#' )
#' }
#'
#' # Note `input_problems` column is semicolon-separated, but it is easy to
#' # print as separate lines with `gsub()` and `cat()`, e.g.:
#' cat(gsub("; ", "\n", res$input_problems))
#' @examplesIf getRversion() >= "4.1.0"
#' res$input_problems |> gsub(pattern = "; ", replacement = "\n", x = _) |> cat()
#' # ... and could, of course, also use the `magrittr` pipe `%>%` if desired
#'
#' @rdname estimate_risk
#' @export
estimate_risk <- function(age,
sex,
sbp,
bp_tx,
total_c,
hdl_c,
statin,
dm,
smoking,
egfr,
bmi,
hba1c = NULL,
uacr = NULL,
zip = NULL,
model = NULL,
time = "both",
chol_unit = "mg/dL",
optional_strict = FALSE,
quiet = is.data.frame(use_dat),
collapse = is.data.frame(use_dat),
use_dat = NULL,
add_to_dat = is.data.frame(use_dat),
progress = is.data.frame(use_dat)) {
# Get call (used for various things, e.g., calculation of eGFR and BMI)
cl <- match.call()
# Functions to generate empty tibble and messages if input probs ----
input_probs_return_tibble <- function(input_probs) {
dplyr::tibble(
total_cvd = NA_real_,
ascvd = NA_real_,
heart_failure = NA_real_,
chd = NA_real_,
stroke = NA_real_,
model = "none",
over_years = NA_integer_,
input_problems = input_probs
)
}
return_empty_tibbles <- function(
time_is_valid = time_valid,
time_requested = time,
add_pce_orig = pce_orig,
add_pce_rev = pce_rev,
var_problems = list(
required = vars_required_problem,
optional = vars_optional_problem,
comparison = vars_comparison_problem
),
optional_is_strict = optional_strict
) {
main_model_tibble <- if(optional_is_strict) {
input_probs_return_tibble(
paste0(c(var_problems$required, var_problems$optional), collapse = "; ")
)
} else {
input_probs_return_tibble(
paste0(var_problems$required, collapse = "; ")
)
}
var_problems_accounting_for_poss_age_dupe <-
if("age" %in% names(var_problems$required) && "age" %in% names(var_problems$comparison)) {
c(
# For consistency in reporting ...
# First, list the `age` var from `var_problems$comparison`
var_problems$comparison[names(var_problems$comparison) %in% "age"],
# Then, list the non-`age` vars from `var_problems$required`
var_problems$required[!names(var_problems$required) %in% "age"],
# Then, list any other vars from `var_problems$comparison`
var_problems$comparison[!names(var_problems$comparison) %in% "age"]
)
} else {
c(var_problems$required, var_problems$comparison)
}
other_models_tibble <- if(optional_is_strict) {
input_probs_return_tibble(
paste0(c(var_problems_accounting_for_poss_age_dupe, var_problems$optional), collapse = "; ")
)
} else {
input_probs_return_tibble(
paste0(var_problems_accounting_for_poss_age_dupe, collapse = "; ")
)
}
if(!any(add_pce_orig, add_pce_rev)) {
additional_tibble <- NULL
} else if(all(add_pce_orig, add_pce_rev)) {
additional_tibble <- rbind(other_models_tibble, other_models_tibble)
} else { # Only one of the two PCE models is requested
additional_tibble <- other_models_tibble
}
if(!time_is_valid) {
return(rbind(main_model_tibble, additional_tibble))
}
if(identical(time_requested, c("10yr", "30yr"))) {
res <- list(
risk_est_10yr = rbind(main_model_tibble, additional_tibble),
risk_est_30yr = main_model_tibble
)
} else if(time_requested == "30yr") {
if(!is.null(additional_tibble)) {
res <- list(
risk_est_10yr = additional_tibble,
risk_est_30yr = main_model_tibble
)
} else {
res <- main_model_tibble
}
} else if(time_requested == "10yr") {
res <- rbind(main_model_tibble, additional_tibble)
}
res
}
message_about_var <- function(var_type,
vars,
quiet) {
if(isTRUE(quiet)) return()
stopifnot(var_type %in% c("required", "optional", "other models"))
if(var_type != "other models") var_msg <- paste0(var_type, " variables")
if(var_type == "other models") var_msg <- "variables for the PCE models"
message_maybe(paste0("Please check the following ", var_msg, ": "), quiet)
message_maybe(paste0(paste0("* ", vars), collapse = "\n"), quiet)
}
message_questionable_estimation <-
"Estimating 30-year risk in people > 59 years of age is questionable"
warn_about_questionable_estimation <- function(age,
time,
quiet) {
if(isTRUE(quiet)) return()
if(any(grepl("both|30", as.character(time))) && is_valid_age(age) && age > 59) {
warn_maybe(message_questionable_estimation, quiet)
}
}
# Data frame handling ----
use_dat_is_data_frame <- isTRUE(is.data.frame(use_dat))
if(use_dat_is_data_frame) {
if(nrow(use_dat) == 0) {
empty_data_frame_msg <- "data frame passed to `use_dat` is empty."
message_maybe(paste0("The ", empty_data_frame_msg), quiet = FALSE)
print(use_dat)
return(input_probs_return_tibble(empty_data_frame_msg))
}
dat <- use_dat %>%
dplyr::mutate(preventr_id = seq_len(nrow(use_dat))) %>%
dplyr::relocate(preventr_id)
prog_bar_requested <- isTRUE(progress)
do_prog_bar <- prog_bar_requested && requireNamespace("utils", quietly = TRUE)
if(prog_bar_requested) {
# If `use_dat_is_data_frame` and `prog_bar_requested` are both `TRUE`,
# then the only way for `do_prog_bar` to be FALSE is for the `utils`
# package to not be available
if(!do_prog_bar) {
message("The `utils` package is not available, so a progress bar will not show.")
}
if(do_prog_bar) {
n_iter <- nrow(dat)
prog_bar <- utils::txtProgressBar(min = 0, max = n_iter, style = 3)
}
}
res <- lapply(
dat[["preventr_id"]],
function(x) {
if(do_prog_bar) utils::setTxtProgressBar(prog_bar, x)
preventr::est_risk(
age = determine_predictor_var("age"),
sex = determine_predictor_var("sex"),
sbp = determine_predictor_var("sbp"),
bp_tx = determine_predictor_var("bp_tx"),
total_c = determine_predictor_var("total_c"),
hdl_c = determine_predictor_var("hdl_c"),
statin = determine_predictor_var("statin"),
dm = determine_predictor_var("dm"),
smoking = determine_predictor_var("smoking"),
egfr = determine_predictor_var("egfr"),
bmi = determine_predictor_var("bmi"),
hba1c = determine_predictor_var("hba1c"),
uacr = determine_predictor_var("uacr"),
zip = determine_predictor_var("zip"),
model = determine_behavior_var("model"),
time = determine_behavior_var("time"),
chol_unit = determine_behavior_var("chol_unit"),
optional_strict = determine_behavior_var("optional_strict"),
quiet = determine_behavior_var("quiet"),
) %>%
dplyr::bind_rows() %>%
dplyr::mutate(preventr_id = dat[["preventr_id"]][x])
}
) %>%
dplyr::bind_rows() %>%
dplyr::relocate(preventr_id)
if(do_prog_bar) close(prog_bar)
return(if(isTRUE(add_to_dat)) add_to_dat(dat, res) else res)
}
# Non-data frame handling ----
if("use_dat" %in% names(cl) &&
!isTRUE(use_dat_is_data_frame) &&
!isTRUE(is.null(use_dat))
) {
warn_maybe(
"You passed something other than a data frame to `use_dat`.",
quiet = FALSE
)
}
# Just want to know whether TRUE, with anything other than TRUE == FALSE
optional_strict <- isTRUE(optional_strict)
# Handle missing args ----
missing_msg <- "nothing (missing from call)"
if(missing(age)) age <- as.symbol(missing_msg)
if(missing(sex)) sex <- as.symbol(missing_msg)
if(missing(sbp)) sbp <- as.symbol(missing_msg)
if(missing(bp_tx)) bp_tx <- as.symbol(missing_msg)
if(missing(total_c)) total_c <- as.symbol(missing_msg)
if(missing(hdl_c)) hdl_c <- as.symbol(missing_msg)
if(missing(statin)) statin <- as.symbol(missing_msg)
if(missing(dm)) dm <- as.symbol(missing_msg)
if(missing(smoking)) smoking <- as.symbol(missing_msg)
if(missing(egfr)) egfr <- as.symbol(missing_msg)
if(missing(bmi)) bmi <- as.symbol(missing_msg)
# Calculate eGFR and BMI if requested ----
if(is.call(cl[["egfr"]]) || is.call(egfr)) {
if(is_supported_egfr_call(cl[["egfr"]])) {
egfr_requested <- TRUE
# Add `age` and `sex`
cl[["egfr"]][["age"]] <- age
cl[["egfr"]][["sex"]] <- sex
# Set `quiet = TRUE` for internal call, because problems, if present, will
# be reported back via mechanisms already established for rest of fx given
# how the call is handled below
cl[["egfr"]][["quiet"]] <- TRUE
res_egfr <- eval(cl[["egfr"]])
if(is.na(res_egfr)) {
cl[["egfr"]][["on_error_return_msg"]] <- TRUE
res_egfr <- as.symbol(paste0(NA, " (", eval(cl[["egfr"]]), ")"))
}
egfr <- res_egfr
} else if(is_supported_egfr_call(egfr)) {
# See comments for handling of `if(is_supported_egfr_call(cl[["egfr"]]))`,
# which apply here as well
egfr_requested <- TRUE
egfr[["age"]] <- age
egfr[["sex"]] <- sex
egfr[["quiet"]] <- TRUE
res_egfr <- eval(egfr)
if(is.na(res_egfr)) {
egfr[["on_error_return_msg"]] <- TRUE
res_egfr <- as.symbol(paste0(NA, " (", eval(egfr), ")"))
}
egfr <- res_egfr
}
}
if(is.call(cl[["bmi"]]) || is.call(bmi)) {
if(is_supported_bmi_call(cl[["bmi"]])) {
bmi_requested <- TRUE
# See comments for handling of calls to calc eGFR, which also apply here
cl[["bmi"]][["quiet"]] <- TRUE
res_bmi <- eval(cl[["bmi"]])
if(is.na(res_bmi)) {
cl[["bmi"]][["on_error_return_msg"]] <- TRUE
res_bmi <- as.symbol(paste0(NA, " (", eval(cl[["bmi"]]), ")"))
}
bmi <- res_bmi
} else if(is_supported_bmi_call(bmi)) {
bmi_requested <- TRUE
bmi[["quiet"]] <- TRUE
res_bmi <- eval(bmi)
if(is.na(res_bmi)) {
bmi[["on_error_return_msg"]] <- TRUE
res_bmi <- as.symbol(paste0(NA, " (", eval(bmi), ")"))
}
bmi <- res_bmi
}
}
# Check `model` arg, partition as needed ----
# Start with these values, then edit if relevant
vars_comparison_problem <- NULL
race_eth <- NULL
other_models <- NULL
comparison_requested <- FALSE
valid_comparison_requested <- FALSE
pce_orig <- FALSE
pce_rev <- FALSE
if(typeof(model) == "list") {
comparison_requested <- TRUE
if("other_models" %in% names(model) && "race_eth" %in% names(model)) {
race_eth <- model[["race_eth"]]
other_models <- model[["other_models"]]
vars_comparison_check <- list(
race_eth = is_valid_race_eth(race_eth, "pce", quiet = FALSE),
other_models = is_valid_other_models(other_models, quiet = FALSE),
age = is_valid_age(age, "pce", quiet = FALSE)
)
if(isTRUE(vars_comparison_check$other_models)) {
valid_comparison_requested <- TRUE
pce_orig <- any(c("pce_both", "pce_orig") %in% other_models)
pce_rev <- any(c("pce_both", "pce_rev") %in% other_models)
}
pce_model_call <- quote(
pce_model(age, sex, race_eth, dm, smoking, total_c, hdl_c, sbp, bp_tx, chol_unit)
)
pce_rev_model_call <- pce_model_call
pce_rev_model_call[[1]] <- quote(pce_rev_model)
vars_comparison_problem <-
vars_comparison_check[
vapply(vars_comparison_check, function(x) !isTRUE(x), logical(1))
]
# Now, overwrite `model` (do this last, to prevent inability to access
# contents housed within list version of `model` arg)
model <- model[["main_model"]]
}
}
# Check required predictor variables ----
vars_required_check <- list(
age = is_valid_age(age, quiet = FALSE),
sex = is_valid_sex_expanded(sex, quiet = FALSE),
sbp = is_valid_sbp(sbp, quiet = FALSE),
bp_tx = is_valid_bp_tx(bp_tx, quiet = FALSE),
total_c = is_valid_total_c(
total_c, chol_unit, expanded_units = TRUE, quiet = FALSE
),
hdl_c = is_valid_hdl_c(
hdl_c, chol_unit, expanded_units = TRUE, quiet = FALSE
),
statin = is_valid_statin(statin, quiet = FALSE),
dm = is_valid_dm(dm, quiet = FALSE),
smoking = is_valid_smoking(smoking, quiet = FALSE),
egfr = is_valid_egfr(egfr, quiet = FALSE),
bmi = is_valid_bmi(bmi, quiet = FALSE),
model = is_valid_model(model, quiet = FALSE),
time = is_valid_time(time, quiet = FALSE)
)
# Check optional predictor variables ----
vars_optional_check <- list(
hba1c = is_valid_hba1c(hba1c, allow_empty = TRUE, quiet = FALSE),
uacr = is_valid_uacr(uacr, allow_empty = TRUE, quiet = FALSE),
zip = is_valid_zip(zip, allow_empty = TRUE, quiet = FALSE)
)
# Determine which vars have probs (if any) ----
vars_required_problem <-
vars_required_check[
vapply(vars_required_check, function(x) !isTRUE(x), logical(1))
]
vars_optional_problem <-
vars_optional_check[
vapply(vars_optional_check, function(x) !(isTRUE(x) || is.null(x)), logical(1))
]
# Handle `time` a bit earlier and a bit differently, as used earlier
time_valid <- isTRUE(vars_required_check[["time"]])
if(time_valid) {
if(time == "both") {
time <- c("10yr", "30yr")
} else {
if(as.character(time) == "10") time <- "10yr"
if(as.character(time) == "30") time <- "30yr"
}
}
# If there are problems w/ required vars ----
# ... return tibble of NAs, but tell user about problems
if(length(vars_required_problem) != 0) {
message_about_var("required", vars_required_problem, quiet)
# If `optional_strict` is TRUE, also tell user about problems
# w/ optional vars and (if relevant) other models
if(optional_strict) {
if(length(vars_optional_problem) != 0) {
message_about_var("optional", vars_optional_problem, quiet)
}
if(length(vars_comparison_problem) != 0) {
message_about_var("other models", vars_comparison_problem, quiet)
}
}
# Now, also warn about questionable estimation if relevant
warn_about_questionable_estimation(age, time, quiet)
# Return two empty tibbles if `time` is "both"; otherwise, return one
return(collapse_maybe(return_empty_tibbles(), collapse))
}
# If there are problems w/ optional vars ----
# ... action depends on `optional_strict`
# If `optional_strict` is TRUE, return tibble of NAs, but tell user
# about problems in tibble (and via messaging if not `quiet`)
# If `optional_strict` is FALSE, set problematic optional vars to NULL
# and proceed, but tell user about problems via tibble ultimately returned
# later in script (via messaging if not `quiet`)
if(length(vars_optional_problem) != 0 || length(vars_comparison_problem) != 0) {
if(optional_strict) {
# Warn about questionable estimation (if relevant) and message about
# problematic optional vars
if(length(vars_optional_problem) != 0) {
message_about_var("optional", vars_optional_problem, quiet)
}
if(length(vars_comparison_problem) != 0) {
message_about_var("other models", vars_comparison_problem, quiet)
}
warn_about_questionable_estimation(age, time, quiet)
return(collapse_maybe(return_empty_tibbles(), collapse))
}
# If `optional_strict = FALSE`, just message about problematic optional vars,
# b/c messaging about questionable estimation will occur later
if(length(vars_optional_problem) != 0) {
message_about_var(
"optional",
paste0(vars_optional_problem, " (so set to NULL)"),
quiet
)
}
if(length(vars_comparison_problem) != 0) {
message_about_var("other models", vars_comparison_problem, quiet)
}
# `optional_input_problems_for_reporting` is also used later on in script,
# for values for `input_problems` in data frame that's returned
optional_input_problems_for_reporting <-
paste0(vars_optional_problem, " (so set to NULL)", collapse = "; ")
comparison_input_problems_for_reporting <-
paste0(vars_comparison_problem, collapse = "; ")
# If optional vars are invalid or empty, set to NULL
if(!isTRUE(vars_optional_check$hba1c) || is_na_or_empty(hba1c)) hba1c <- NULL
if(!isTRUE(vars_optional_check$uacr) || is_na_or_empty(uacr)) uacr <- NULL
if(!isTRUE(vars_optional_check$zip) || is_na_or_empty(zip)) zip <- NULL
}
valid_zip_with_sdi <- is_zip_with_sdi_data(zip, quiet = FALSE)
# Standardize inputs accepting variants ----
# (logical inputs accepting 0 or 1 handled elsewhere via `as_numeric_binary()`)
if(is_valid_race_eth(race_eth, "pce")) race_eth <- standardize_race_eth(race_eth)
# Don't need to check whether `chol_unit` or `sex` are valid like `race_eth`
# due to how these inputs are handled earlier in the function
chol_unit <- standardize_chol_unit(chol_unit)
sex <- standardize_sex(sex)
# Determine model to run ----
if(is_na_or_empty(model)) model <- select_model(hba1c, uacr, zip)
stylized_model <- stylize_model_to_run(model)
if(!is_na_or_empty(zip) && !isTRUE(valid_zip_with_sdi)) {
message_maybe(valid_zip_with_sdi, quiet)
}
message_maybe(paste0("PREVENT estimates are from: ", stylized_model, "."), quiet)
warn_about_questionable_estimation(age, time, quiet)
# Do risk estimation and return result ----
pred_vals <-
dplyr::tibble(
age = age,
total_c = total_c,
hdl_c = hdl_c,
statin = as.logical(statin),
sbp = sbp,
bp_tx = as.logical(bp_tx),
dm = as.logical(dm),
smoking = as.logical(smoking),
egfr = egfr,
bmi = bmi,
sex = sex,
hba1c = na_or_empty_to_na(hba1c),
uacr = na_or_empty_to_na(uacr),
zip = na_or_empty_to_na(as.character(zip))
) %>%
prep_terms(model, chol_unit)
# Edit `time` if `pce_orig` || `pce_rev` and `time == "30yr"`
# (to accommodate the fact PCE models only have 10-year estimates)
time_edited <- FALSE
if(comparison_requested && identical(time, "30yr")) {
time <- c("10yr", "30yr")
time_edited <- TRUE
}
# Get results
add_empty_comparison_row <- function(dat, probs = comparison_input_problems_for_tibble) {
dat %>%
dplyr::add_row(model = "none", input_problems = probs)
}
res <- vector(mode = "list", length = length(time))
names(res) <- time
for(given_time in time) {
res[[given_time]] <- run_models(model, sex, given_time, pred_vals)
if(length(vars_optional_problem) != 0) {
res[[given_time]][["input_problems"]] <- optional_input_problems_for_reporting
}
if(given_time == "30yr" && age > 59) {
res[[given_time]][["input_problems"]] <-
if(length(vars_optional_problem) != 0) {
paste(
paste0("Warning: ", message_questionable_estimation),
res[[given_time]][["input_problems"]],
sep = "; "
)
} else {
paste0("Warning: ", message_questionable_estimation)
}
}
if(given_time == "10yr") {
comparison_input_problems_for_tibble <-
if(length(vars_comparison_problem) != 0) {
comparison_input_problems_for_reporting
} else {
NA_character_
}
if(comparison_requested && !valid_comparison_requested) {
res[[given_time]] <- add_empty_comparison_row(res[[given_time]])
}
if(pce_orig) {
if(length(vars_comparison_problem) == 0) {
res[[given_time]] <- res[[given_time]] %>%
dplyr::add_row(
model = "pce_orig",
ascvd = eval(pce_model_call),
over_years = 10
)
} else {
res[[given_time]] <- add_empty_comparison_row(res[[given_time]])
}
}
if(pce_rev) {
if(length(vars_comparison_problem) == 0) {
res[[given_time]] <- res[[given_time]] %>%
dplyr::add_row(
model = "pce_rev",
ascvd = eval(pce_rev_model_call),
over_years = 10
)
} else {
res[[given_time]] <- add_empty_comparison_row(res[[given_time]])
}
}
# If time has been edited, this means the user doesn't want the 10-year
# results from PREVENT, so filter to include only results where the model
# is "pce_orig", "pce_rev", or "none" (latter to ensure empty tibbles
# are returned if appropriate)
if(time_edited) {
res[[given_time]] <- res[[given_time]] %>%
dplyr::filter(model %in% c("pce_orig", "pce_rev", "none"))
}
}
}
# Remove listing structure if only one time being considered
if(length(time) == 1) return(res[[time]])
# Set names for list
names(res) <- paste0("risk_est_", time)
collapse_maybe(res, collapse)
}
#' @rdname estimate_risk
#' @export
est_risk <- estimate_risk
#' @rdname app
#'
#' @title
#' Navigate to browser-based Shiny implementation of PREVENT equations
#'
#' @description
#' This function opens a browser window (using the user's default browser) and
#' navigates to the Shiny app located at:
#'
#' [https://martingmayer.shinyapps.io/prevent-equations](https://martingmayer.shinyapps.io/prevent-equations)
#'
#' Easier-to-remember URLs:
#' - [https://tiny.cc/prevent-equations](https://martingmayer.shinyapps.io/prevent-equations)
#' - [https://tiny.cc/preventequations](https://martingmayer.shinyapps.io/prevent-equations)
#'
#' The app includes risk visualization and several options for customizing the output.
#'
#' @param ... Not used. Reserved for future use.
#'
#' @return Returns `NULL` invisibly after opening app in your default browser.
#'
#' @examples
#' app()
#'
#' @export
app <- function(...) {
if(interactive()) {
if(requireNamespace("utils", quietly = TRUE)) {
utils::browseURL("https://martingmayer.shinyapps.io/prevent-equations")
} else {
message(
paste0(
"The `utils` package is not installed, so `app()` cannot automatically ",
"launch the app in your browser. You can either install the `utils` ",
"package and try again or just visit ",
"https://martingmayer.shinyapps.io/prevent-equations in your browser. ",
"https://tiny.cc/prevent-equations or ",
"https://tiny.cc/preventequations will also work."
)
)
}
}
invisible(NULL)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.