Nothing
# nolint start. Don't lint Edwin's code as we want to keep it as was
reaggregate_counts_edwin_unweighted <- function(bounds, counts, new_bounds) {
# bounds <- c(0, 80, 150, 180)
# counts <- c(10, 20, 30, 40)
# new_bounds <- c(0, 60, 150, 160, 180)
# stop()
# As far as I can tell we need this test! Note this is not in the current reaggregate_rates?
stopifnot(max(bounds) >= max(new_bounds))
tibble::tibble(lower = bounds, counts = counts) -> dat
all_lower <- sort(unique(c(bounds, new_bounds)))
cut_ages(all_lower, breaks = bounds) |>
dplyr::left_join(dat, by = dplyr::join_by(lower)) |>
dplyr::mutate(all_lower = all_lower, fraction = (dplyr::lead(all_lower) - all_lower) / (upper - lower)) |>
# Last age group
dplyr::mutate(fraction = ifelse(is.na(fraction), 1, fraction)) |>
dplyr::mutate(count = fraction * counts) -> dat
dat |>
dplyr::select(all_lower, count) -> dat0
cut_ages(all_lower, breaks = new_bounds) |>
dplyr::mutate(count = dat0$count) |>
dplyr::summarise(count = sum(count), .by = c(lower))
}
reaggregate_counts_edwin_weighted <- function(bounds, counts, new_bounds, population_bounds, population_weights) {
# bounds <- c(0, 80, 150, 180)
# counts <- c(10, 20, 30, 40)
# new_bounds <- c(0, 60, 150, 160, 180)
# population_bounds <- c(0, 60, 150, 160, 175, 180)
# population_weights <- c(10, 20, 30, 40, 50, 60)
# library(tidyverse)
# stop()
all_lower <- sort(unique(c(bounds, new_bounds, population_bounds)))
cut_ages(all_lower, breaks = bounds) -> dat10
dat10 |>
dplyr::left_join(
tibble::tibble(lower = bounds, count = counts),
by = dplyr::join_by(lower)) -> dat0
dat1 <- reaggregate_counts_edwin_unweighted(population_bounds, population_weights, all_lower)
# c_k = c_i N_k/N_i, where i is old bounds, k is new bounds
cut_ages(all_lower, breaks = new_bounds) -> dat3
dat0 |>
dplyr::mutate(lower = all_lower) |>
dplyr::left_join(dplyr::rename(dat1, w = count), by = dplyr::join_by(lower)) |>
dplyr::mutate(i = dat0$lower) |>
dplyr::mutate(ck = count * w/sum(w), .by = c(i)) |>
dplyr::mutate(lower = dat3$lower) |>
dplyr::summarise(count = sum(ck), .by = c(lower)) -> dat5
stopifnot(isTRUE(all.equal(sum(dat5$count), sum(counts))))
dat5
}
reaggregate_rates_edwin_weighted <- function(bounds, rates, new_bounds, population_bounds, population_weights) {
# bounds = c(0, 80, 150, 180)
# rates = c(0.1, 0.2, 0.3, .4)
# new_bounds = c(0, 60, 150, 160, 180)
# population_bounds = c(0, 60, 150, 160, 175, 180)
# population_weights = c(10, 20, 30, 40, 50, 60)
# stop()
# NOTE: For Tim: I don't think we need a test here, because we can assume the rate is the same for all above the max(bounds) even if max(new_bounds) > max(bounds)
# Instead we do need the test that max(population_bounds) < max(new_bounds). Note that I am not checking that, because that will be checked by
# reaggregate_counts_edwin_unweighted
tibble::tibble(lower = bounds, rates = rates) -> dat
all_lower <- sort(unique(c(bounds, new_bounds, population_bounds)))
cut_ages(all_lower, breaks = bounds) |>
dplyr::left_join(dat, by = dplyr::join_by(lower)) -> dat1
dat2 <- reaggregate_counts_edwin_unweighted(population_bounds, population_weights, all_lower)
dat1 |>
dplyr::mutate(weight = dat2$count) -> dat3
cut_ages(all_lower, breaks = new_bounds) -> dat4
dat3 |>
dplyr::mutate(lower = dat4$lower) |>
dplyr::summarise(rate = sum(rates * weight) / sum(weight), .by = c(lower))
}
# nolint end
reaggregate_counts_fast <- function(
bounds,
counts,
new_bounds,
...,
population_bounds = NULL,
population_weights = NULL
) {
rlang::check_dots_empty0(...)
# lower bounds checks
if (!all(is.finite(bounds)))
stop("`bounds` must be a finite, numeric vector.")
if (!length(bounds))
stop("`bounds` must be of non-zero length.")
if (is.unsorted(bounds, na.rm = FALSE, strictly = TRUE))
stop("`bounds` must be in strictly ascending order")
if (bounds[1L] < 0)
stop("`bounds` must be non-negative.")
# rates checks
if (!is.numeric(counts))
stop("`counts` must be numeric.")
if (length(counts) != length(bounds))
stop("`counts` must be the same length as `bounds`.")
# new bounds checks
if (!all(is.finite(new_bounds)))
stop("`new_bounds` must be a finite, numeric vector.")
if (!length(new_bounds))
stop("`new_bounds` must be of non-zero length.")
if (is.unsorted(new_bounds, na.rm = FALSE, strictly = TRUE))
stop("`new_bounds` must be in strictly ascending order")
if (new_bounds[1L] < 0)
stop("`new_bounds` must be non-negative.")
# population bounds checks
if (is.null(population_bounds)) {
if (!is.null(population_weights) && length(population_weights) != length(new_bounds)) {
stop("When `population_bounds` is not specified, `population_weights` must be the same length as `new_bounds`.") # nolint: line_length_linter.
}
if (max(bounds) < max(new_bounds)) {
stop("Where `population_bounds` are not specified the maximum value of `new_bounds` must be less than or equal to that of `bounds`.") # nolint: line_length_linter.
}
population_bounds <- new_bounds
} else {
if (!all(is.finite(population_bounds)))
stop("`population_bounds` must be a finite, numeric vector.")
if (!length(population_bounds))
stop("`population_bounds` must be of non-zero length.")
if (is.unsorted(population_bounds, na.rm = FALSE, strictly = TRUE))
stop("`population_bounds` must be in strictly ascending order")
if (population_bounds[1L] < 0)
stop("`population_bounds} must be non-negative.")
if (max(bounds) > max(population_bounds)) {
stop(
"The maximum value of `bounds` must be less than or equal to that of `population_bounds`." # nolint: line_length_linter.
)
}
}
# population_weights check
if (!is.null(population_weights)) {
if (!all(is.finite(population_weights)) || any(population_weights < 0))
stop("`population_weights` must be numeric, non-negative and finite.")
if (length(population_weights) != length(population_bounds))
stop("`population_weights` must be the same length as `population_bounds`."
)
if (sum(population_weights) == 0)
stop("At least one `population_weight` must be non-zero.")
}
# Ensure bounds start at zero and adjust counts accordingly
if (bounds[1L] != 0) {
bounds <- c(0, bounds)
counts <- c(0, counts)
}
# Ensure new bounds start at zero
if (new_bounds[1L] != 0)
new_bounds <- c(0, new_bounds)
# Ensure population_bounds start at zero and adjust weights accordingly
if (population_bounds[1L] != 0) {
population_bounds <- c(0, population_bounds)
if (!is.null(population_weights))
population_weights <- c(0, population_weights)
}
# calculate the old and new upper bounds
old_upper <- c(bounds[-1L], Inf)
pop_upper <- c(population_bounds[-1L], Inf)
new_upper <- c(new_bounds[-1L], Inf)
# calculate the combined bounds
all_lower <- sort(unique(c(bounds, new_bounds, population_bounds)))
all_upper <- c(all_lower[-1L], Inf)
if (is.null(population_weights))
population_weights <- pop_upper - population_bounds
# we need to keep track where the combined bits would fit in the old and
# new bounds. This information is stored in the old_container and
# new_container vectors respectively.
new_container <- old_container <- pop_container <- integer(length(all_upper))
new_index <- old_index <- pop_index <- 1L
for (i in seq_along(old_container)) {
old_index <- old_index + (all_upper[i] > old_upper[old_index])
new_index <- new_index + (all_upper[i] > new_upper[new_index])
pop_index <- pop_index + (all_upper[i] > pop_upper[pop_index])
old_container[i] <- old_index
new_container[i] <- new_index
pop_container[i] <- pop_index
}
result <- counts[old_container]
all_diff <- all_upper - all_lower
pop_diff <- (pop_upper[pop_container] - population_bounds[pop_container])
ratio <- all_diff / pop_diff
ratio[all_diff == Inf & pop_diff == Inf] <- 1
pop_weights <- population_weights[pop_container] * ratio
pop_weights <- pop_weights / ave(pop_weights, old_container, FUN = sum)
result <- counts[old_container] * pop_weights
result[length(result)] <- sum(counts) - sum(result[-length(result)])
out <- numeric(length(new_bounds))
idx <- 1L
for (i in seq_along(new_container)) {
if (new_container[i] != idx)
idx <- idx + 1L
out[idx] <- out[idx] + result[i]
}
out[idx] <- sum(counts) - sum(out[-idx])
interval <- sprintf("[%.f, %.f)", new_bounds, new_upper)
interval <- factor(interval, levels = interval, ordered = TRUE)
tibble::new_tibble(
list(
interval = interval,
lower = new_bounds,
upper = new_upper,
count = out
)
)
}
reaggregate_rates_fast <- function(
bounds,
rates,
new_bounds,
...,
population_bounds = NULL,
population_weights = NULL
) {
rlang::check_dots_empty0(...)
# lower bounds checks
if (!all(is.finite(bounds)))
stop("`bounds` must be a finite, numeric vector.")
if (!length(bounds))
stop("`bounds` must be of non-zero length.")
if (is.unsorted(bounds, na.rm = FALSE, strictly = TRUE))
stop("`bounds` must be in strictly ascending order")
if (bounds[1L] < 0)
stop("`bounds` must be non-negative.")
# rates checks
if (!is.numeric(rates))
stop("`rates` must be numeric.")
if (length(rates) != length(bounds))
stop("`rates` must be the same length as `bounds`.")
# new bounds checks
if (!all(is.finite(new_bounds)))
stop("`new_bounds` must be a finite, numeric vector.")
if (!length(new_bounds))
stop("`new_bounds` must be of non-zero length.")
if (is.unsorted(new_bounds, na.rm = FALSE, strictly = TRUE))
stop("`new_bounds` must be in strictly ascending order")
if (new_bounds[1L] < 0)
stop("`new_bounds` must be non-negative.")
# population bounds checks
if (is.null(population_bounds)) {
if (!is.null(population_weights) && (length(population_weights) != length(new_bounds))) {
stop("When `population_bounds` is not specified, `population_weights` must be the same length as `new_bounds`.") # nolint: line_length_linter.
}
if (max(bounds) < max(new_bounds)) {
stop("Where `population_bounds` are not specified the maximum value of `new_bounds` must be less than or equal to that of `bounds`." # nolint: line_length_linter
)
}
population_bounds <- new_bounds
} else {
if (!all(is.finite(population_bounds)))
stop("`population_bounds` must be a finite, numeric vector.")
if (!length(population_bounds))
stop("`population_bounds` must be of non-zero length.")
if (is.unsorted(population_bounds, na.rm = FALSE, strictly = TRUE))
stop("`population_bounds` must be in strictly ascending order")
if (population_bounds[1L] < 0)
stop("`population_bounds` must be non-negative.")
if (max(population_bounds) < max(new_bounds)) {
stop("The maximum value of `new_bounds` must be less than or equal to that of `population_bounds`.") # nolint: line_length_linter
}
}
# population_weights check
if (!is.null(population_weights)) {
if (!all(is.finite(population_weights)) || any(population_weights < 0))
stop("`population_weights` must be numeric, non-negative and finite.")
if (length(population_weights) != length(population_bounds))
stop("`population_weights` must be the same length as `population_bounds`.")
if (sum(population_weights) == 0)
stop("At least one `population_weight` must be non-zero.")
}
# Ensure bounds start at zero and adjust rates accordingly
if (bounds[1L] != 0) {
bounds <- c(0, bounds)
rates <- c(0, rates)
}
# Ensure new bounds start at zero
if (new_bounds[1L] != 0)
new_bounds <- c(0, new_bounds)
# Ensure population_bounds start at zero and adjust weights accordingly
if (population_bounds[1L] != 0) {
population_bounds <- c(0, population_bounds)
if (!is.null(population_weights))
population_weights <- c(0, population_weights)
}
# calculate the old and new upper bounds
old_upper <- c(bounds[-1L], Inf)
pop_upper <- c(population_bounds[-1L], Inf)
new_upper <- c(new_bounds[-1L], Inf)
# calculate the combined bounds
all_lower <- sort(unique(c(bounds, new_bounds, population_bounds)))
all_upper <- c(all_lower[-1L], Inf)
# TODO - explain this!!!
if (is.null(population_weights)) {
population_weights <- pop_upper - population_bounds
population_weights[length(population_weights)] <- 1
}
# we need to keep track where the combined bits would fit in the old and
# new bounds. This information is stored in the old_container and
# new_container vectors respectively.
new_container <- old_container <- pop_container <- integer(length(all_upper))
new_index <- old_index <- pop_index <- 1L
for (i in seq_along(old_container)) {
old_index <- old_index + (all_upper[i] > old_upper[old_index])
new_index <- new_index + (all_upper[i] > new_upper[new_index])
pop_index <- pop_index + (all_upper[i] > pop_upper[pop_index])
old_container[i] <- old_index
new_container[i] <- new_index
pop_container[i] <- pop_index
}
all_diff <- all_upper - all_lower
pop_diff <- (pop_upper[pop_container] - population_bounds[pop_container])
ratio <- all_diff / pop_diff
ratio[all_diff == Inf & pop_diff == Inf] <- 1
pop_weights <- population_weights[pop_container] * ratio
result <- rates[old_container] * pop_weights
out <- numeric(length(new_bounds))
idx <- 1L
weight <- 0
for (i in seq_along(new_container)) {
if (new_container[i] != idx) {
out[idx] <- out[idx] / weight
idx <- idx + 1L
weight <- 0
}
weight <- weight + pop_weights[i]
out[idx] <- out[idx] + result[i]
}
out[length(out)] <- out[length(out)] / weight
interval <- sprintf("[%.f, %.f)", new_bounds, new_upper)
interval <- factor(interval, levels = interval, ordered = TRUE)
tibble::new_tibble(
list(
interval = interval,
lower = new_bounds,
upper = new_upper,
rate = out
)
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.