tests/testthat/helpers.R

reaggregate_counts_edwin_unweighted <- function(bounds, counts, new_bounds) {
    # bounds <- c(0, 80, 150, 180)
    # counts <- c(10, 20, 30, 40)
    # new_bounds <- c(0, 60, 150, 160, 180)
    # stop()
    # As far as I can tell we need this test! Note this is not in the current reaggregate_rates?
    stopifnot(max(bounds) >= max(new_bounds))
    tibble::tibble(lower_bound = bounds, counts = counts) -> dat

    all_lower <- sort(unique(c(bounds, new_bounds)))
    cut_ages(all_lower, breaks = bounds) |>
        dplyr::left_join(dat, by = dplyr::join_by(lower_bound)) |>
        dplyr::mutate(all_lower = all_lower, fraction = (dplyr::lead(all_lower) - all_lower) / (upper_bound - lower_bound)) |>
        # Last age group
        dplyr::mutate(fraction = ifelse(is.na(fraction), 1, fraction)) |>
        dplyr::mutate(count = fraction * counts) -> dat

    dat |>
        dplyr::select(all_lower, count) -> dat0

    cut_ages(all_lower, breaks = new_bounds) |>
        dplyr::mutate(count = dat0$count) |>
        dplyr::summarise(count = sum(count), .by = c(lower_bound))
}

reaggregate_counts_edwin_weighted <- function(bounds, counts, new_bounds, population_bounds, population_weights) {
    # bounds <- c(0, 80, 150, 180)
    # counts <- c(10, 20, 30, 40)
    # new_bounds <- c(0, 60, 150, 160, 180)
    # population_bounds <- c(0, 60, 150, 160, 175, 180)
    # population_weights <- c(10, 20, 30, 40, 50, 60)
    # library(tidyverse)
    # stop()

    all_lower <- sort(unique(c(bounds, new_bounds, population_bounds)))
    cut_ages(all_lower, breaks = bounds) -> dat10
    dat10 |>
        dplyr::left_join(
            tibble::tibble(lower_bound = bounds, count = counts),
            by = dplyr::join_by(lower_bound)) -> dat0

    dat1 <- reaggregate_counts_edwin_unweighted(population_bounds, population_weights, all_lower)
    # c_k = c_i N_k/N_i, where i is old bounds, k is new bounds
    cut_ages(all_lower, breaks = new_bounds) -> dat3
    dat0 |>
        dplyr::mutate(lower_bound = all_lower) |>
        dplyr::left_join(dplyr::rename(dat1, w = count), by = dplyr::join_by(lower_bound)) |>
        dplyr::mutate(i = dat0$lower_bound) |>
        dplyr::mutate(ck = count * w/sum(w), .by = c(i)) |>
        dplyr::mutate(lower_bound = dat3$lower_bound) |>
        dplyr::summarise(count = sum(ck), .by = c(lower_bound)) -> dat5

    stopifnot(sum(dat5$count) == sum(counts))
    dat5
}

reaggregate_rates_edwin_weighted <- function(bounds, rates, new_bounds, population_bounds, population_weights) {
    # bounds = c(0, 80, 150, 180)
    # rates = c(0.1, 0.2, 0.3, .4)
    # new_bounds = c(0, 60, 150, 160, 180)
    # population_bounds = c(0, 60, 150, 160, 175, 180)
    # population_weights = c(10, 20, 30, 40, 50, 60)
    # stop()
    # NOTE: For Tim: I don't think we need a test here, because we can assume the rate is the same for all above the max(bounds) even if max(new_bounds) > max(bounds)
    # Instead we do need the test that max(population_bounds) < max(new_bounds). Note that I am not checking that, because that will be checked by
    # reaggregate_counts_edwin_unweighted
    tibble::tibble(lower_bound = bounds, rates = rates) -> dat
    all_lower <- sort(unique(c(bounds, new_bounds, population_bounds)))
    cut_ages(all_lower, breaks = bounds) |>
        dplyr::left_join(dat, by = dplyr::join_by(lower_bound)) -> dat1
    dat2 <- reaggregate_counts_edwin_unweighted(population_bounds, population_weights, all_lower)
    dat1 |>
        dplyr::mutate(weight = dat2$count) -> dat3
    cut_ages(all_lower, breaks = new_bounds) -> dat4
    dat3 |>
        dplyr::mutate(lower_bound = dat4$lower_bound) |>
        dplyr::summarise(rate = sum(rates * weight) / sum(weight), .by = c(lower_bound))
}

Try the ageutils package in your browser

Any scripts or data that you put into this service are public.

ageutils documentation built on Sept. 11, 2024, 7:54 p.m.