Nothing
#' Find the maximum likelihood estimate of the incidence rate parameter
#'
#' This function models seroincidence using maximum likelihood estimation; that is, it finds the value of the seroincidence parameter which maximizes the likelihood (i.e., joint probability) of the data.
#' @inheritParams log_likelihood
#' @inheritParams stats::nlm
#' @param pop_data a [data.frame] with cross-sectional serology data per antibody and age, and additional columns
#' @param lambda_start starting guess for incidence rate, in years/event.
#' @param antigen_isos Character vector with one or more antibody names. Values must match `pop_data`
#' @param build_graph whether to graph the log-likelihood function across a range of incidence rates (lambda values)
#' @param print_graph whether to display the log-likelihood curve graph in the course of running `est.incidence()`
#' @param stepmin A positive scalar providing the minimum allowable relative step length.
#' @inheritDotParams stats::nlm -f -p -hessian -print.level -steptol
#' @returns a `"seroincidence"` object, which is a [stats::nlm()] fit object with extra meta-data attributes `lambda_start`, `antigen_isos`, and `ll_graph`
#' @export
#' @examples
#'
#' library(dplyr)
#'
#' xs_data <-
#' sees_pop_data_pk_100
#'
#' curve <-
#' typhoid_curves_nostrat_100 %>%
#' filter(antigen_iso %in% c("HlyE_IgA", "HlyE_IgG"))
#'
#' noise <-
#' example_noise_params_pk
#'
#' est1 <- est.incidence(
#' pop_data = xs_data,
#' curve_params = curve,
#' noise_params = noise,
#' antigen_isos = c("HlyE_IgG", "HlyE_IgA"),
#' )
#'
#' summary(est1)
est.incidence <- function(
pop_data,
curve_params,
noise_params,
antigen_isos = pop_data$antigen_iso %>% unique(),
lambda_start = 0.1,
stepmin = 1e-8,
stepmax = 3,
verbose = FALSE,
build_graph = FALSE,
print_graph = build_graph & verbose,
...) {
if (verbose > 1) {
message("inputs to `est.incidence()`:")
print(environment() %>% as.list())
}
.errorCheck(
data = pop_data,
antigen_isos = antigen_isos,
curve_params = curve_params
)
pop_data <- pop_data %>%
dplyr::filter(.data$antigen_iso %in% antigen_isos) %>%
dplyr::select(
pop_data %>% get_value_var(),
pop_data %>% get_age_var(),
"antigen_iso"
) %>%
filter(if_all(everything(), ~!is.na(.x)))
curve_params <- curve_params %>%
ungroup() %>%
dplyr::mutate(
alpha = .data$alpha * 365.25,
d = .data$r - 1
) %>%
dplyr::filter(.data$antigen_iso %in% antigen_isos) %>%
dplyr::select("y1", "alpha", "d", "antigen_iso") %>%
droplevels()
noise_params <- noise_params %>%
dplyr::filter(.data$antigen_iso %in% antigen_isos) %>%
droplevels()
# incidence can not be calculated if there are zero observations.
if (nrow(pop_data) == 0) {
stop("No data provided.")
}
if (verbose) {
message("nrow(curve_params) = ", nrow(curve_params))
}
if (nrow(noise_params) != length(antigen_isos)) {
stop("too many rows of noise parameters.")
}
pop_data <- pop_data %>% split(~antigen_iso)
curve_params <- curve_params %>% split(~antigen_iso)
noise_params <- noise_params %>% split(~antigen_iso)
# First, check if we find numeric results...
res <- .nll(
pop_data = pop_data,
log.lambda = log(lambda_start),
antigen_isos = antigen_isos,
curve_params = curve_params,
noise_params = noise_params,
verbose = verbose,
...
)
if (is.na(res)) {
warning("Could not calculate the log-likelihood with starting parameter value.")
return(NULL)
}
if (verbose) {
message("Initial negative log-likelihood: ", res)
}
if (build_graph) {
if (verbose) message("building likelihood graph")
graph <- graph_loglik(
highlight_points = lambda_start,
highlight_point_names = "lambda_start",
pop_data = pop_data,
antigen_isos = antigen_isos,
curve_params = curve_params,
noise_params = noise_params
)
if (print_graph) {
print(
graph +
ggplot2::scale_x_log10(
labels = scales::label_comma()
)
)
}
} else {
graph <- NULL
}
# [stats::nlm()] expects an objective function `f` "returning a single numeric value",
# but [.nll()] is vectorized via its subfunction [f_dev()].
# The vectorization doesn't appear to cause a problem for [nlm()].
if (verbose) message("about to call `nlm()`")
# Estimate lambda
time <-
{
fit <- nlm(
f = .nll,
p = log(lambda_start),
pop_data = pop_data,
antigen_isos = antigen_isos,
curve_params = curve_params,
noise_params = noise_params,
hessian = TRUE,
stepmax = stepmax,
steptol = stepmin,
verbose = verbose,
print.level = ifelse(verbose, 2, 0),
...
)
} %>%
system.time()
code_text <- nlm_exit_codes[fit$code]
message1 <- "\n`nlm()` completed with the following convergence code:\n"
if (fit$code %in% 3:5) {
warning(
"`nlm()` may not have reached the maximum likelihood estimate.",
message1,
code_text
)
}
if (verbose) {
message("\nElapsed time: ")
print(time)
}
if (build_graph) {
graph <-
graph %>%
add_point_to_graph(
fit = fit,
pop_data = pop_data,
antigen_isos = antigen_isos,
curve_params = curve_params,
noise_params = noise_params
)
if (print_graph) {
print(
graph +
ggplot2::scale_x_log10(
labels = scales::label_comma()
)
)
}
}
fit <- fit %>%
structure(
class = union("seroincidence", class(fit)),
lambda_start = lambda_start,
antigen_isos = antigen_isos,
ll_graph = graph
)
return(fit)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.