Nothing
#' Extract the empirical age distribution of contacts from a survey
#'
#' Returns a data.frame of (age, proportion) pairs representing how
#' contact ages are distributed in the survey. This can be passed to
#' [assign_age_groups()] as `estimated_contact_age` to impute ages
#' from ranges using this distribution rather than uniform sampling.
#'
#' @param survey a [survey()] object
#' @returns a data.frame with columns `age` (integer) and `proportion` (numeric,
#' summing to 1)
#' @examples
#' data(polymod)
#' dist <- contact_age_distribution(polymod)
#' head(dist)
#' plot(dist$age, dist$proportion, type = "h",
#' xlab = "Age", ylab = "Proportion")
#'
#' @export
#' @autoglobal
contact_age_distribution <- function(survey) {
check_if_contact_survey(survey)
contacts <- data.table::copy(survey$contacts)
# Use exact ages only to avoid including previously imputed values
age_col <- if ("cnt_age_exact" %in% colnames(contacts)) {
"cnt_age_exact"
} else if ("cnt_age" %in% colnames(contacts)) {
"cnt_age"
} else {
cli::cli_abort("No contact age column found in survey.")
}
# Convert factor levels to their numeric values (not factor codes)
contacts <- convert_factor_to_integer(contacts, age_col)
ages <- suppressWarnings(as.numeric(contacts[[age_col]]))
ages <- ages[!is.na(ages)]
if (length(ages) == 0) {
cli::cli_abort("No non-missing contact ages found in survey.")
}
if (!all(is.finite(ages))) {
cli::cli_abort("Contact ages must be finite.")
}
if (any(ages < 0)) {
cli::cli_abort("Contact ages must be non-negative.")
}
if (any(ages %% 1 != 0)) {
cli::cli_abort("Contact ages must be whole numbers.")
}
ages <- as.integer(ages)
counts <- data.table::data.table(age = ages)[, .N, by = age]
counts[, proportion := N / sum(N)]
counts <- counts[order(age)]
data.frame(age = counts$age, proportion = counts$proportion)
}
#' Validate an age distribution data.frame
#' @param x object to validate
#' @returns `x` invisibly, or errors
#' @keywords internal
validate_age_distribution <- function(x) {
if (!is.data.frame(x)) {
cli::cli_abort(
"{.arg estimated_contact_age} must be a string or a data.frame."
)
}
required <- c("age", "proportion")
missing_cols <- setdiff(required, colnames(x))
if (length(missing_cols) > 0) {
cli::cli_abort(
"Age distribution must have column{?s} {.val {missing_cols}}."
)
}
if (!is.numeric(x$age) || !is.numeric(x$proportion)) {
cli::cli_abort(
"Columns {.val age} and {.val proportion} must be numeric."
)
}
if (!all(is.finite(x$age)) || !all(is.finite(x$proportion))) {
cli::cli_abort(
"Columns {.val age} and {.val proportion} must not contain missing or \\
non-finite values."
)
}
if (any(x$age < 0) || any(x$age %% 1 != 0)) {
cli::cli_abort("Column {.val age} must contain non-negative integer ages.")
}
if (any(x$proportion < 0)) {
cli::cli_abort("Column {.val proportion} must not contain negative values.")
}
# Normalise proportions to sum to 1 if they don't already
total <- sum(x$proportion)
if (total <= 0) {
cli::cli_abort("Column {.val proportion} must have a positive sum.")
}
if (abs(total - 1) > 1e-8) {
cli::cli_warn("Proportions do not sum to 1; normalising.")
x$proportion <- x$proportion / total
}
invisible(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.