tests/testthat/helpers.R

# nolint start. Don't lint Edwin's code as we want to keep it as was

reaggregate_counts_edwin_unweighted <- function(bounds, counts, new_bounds) {
    # bounds <- c(0, 80, 150, 180)
    # counts <- c(10, 20, 30, 40)
    # new_bounds <- c(0, 60, 150, 160, 180)
    # stop()
    # As far as I can tell we need this test! Note this is not in the current reaggregate_rates?
    stopifnot(max(bounds) >= max(new_bounds))
    tibble::tibble(lower = bounds, counts = counts) -> dat

    all_lower <- sort(unique(c(bounds, new_bounds)))
    cut_ages(all_lower, breaks = bounds) |>
        dplyr::left_join(dat, by = dplyr::join_by(lower)) |>
        dplyr::mutate(all_lower = all_lower, fraction = (dplyr::lead(all_lower) - all_lower) / (upper - lower)) |>
        # Last age group
        dplyr::mutate(fraction = ifelse(is.na(fraction), 1, fraction)) |>
        dplyr::mutate(count = fraction * counts) -> dat

    dat |>
        dplyr::select(all_lower, count) -> dat0

    cut_ages(all_lower, breaks = new_bounds) |>
        dplyr::mutate(count = dat0$count) |>
        dplyr::summarise(count = sum(count), .by = c(lower))
}

reaggregate_counts_edwin_weighted <- function(bounds, counts, new_bounds, population_bounds, population_weights) {
    # bounds <- c(0, 80, 150, 180)
    # counts <- c(10, 20, 30, 40)
    # new_bounds <- c(0, 60, 150, 160, 180)
    # population_bounds <- c(0, 60, 150, 160, 175, 180)
    # population_weights <- c(10, 20, 30, 40, 50, 60)
    # library(tidyverse)
    # stop()

    all_lower <- sort(unique(c(bounds, new_bounds, population_bounds)))
    cut_ages(all_lower, breaks = bounds) -> dat10
    dat10 |>
        dplyr::left_join(
            tibble::tibble(lower = bounds, count = counts),
            by = dplyr::join_by(lower)) -> dat0

    dat1 <- reaggregate_counts_edwin_unweighted(population_bounds, population_weights, all_lower)
    # c_k = c_i N_k/N_i, where i is old bounds, k is new bounds
    cut_ages(all_lower, breaks = new_bounds) -> dat3
    dat0 |>
        dplyr::mutate(lower = all_lower) |>
        dplyr::left_join(dplyr::rename(dat1, w = count), by = dplyr::join_by(lower)) |>
        dplyr::mutate(i = dat0$lower) |>
        dplyr::mutate(ck = count * w/sum(w), .by = c(i)) |>
        dplyr::mutate(lower = dat3$lower) |>
        dplyr::summarise(count = sum(ck), .by = c(lower)) -> dat5

    stopifnot(isTRUE(all.equal(sum(dat5$count), sum(counts))))
    dat5
}

reaggregate_rates_edwin_weighted <- function(bounds, rates, new_bounds, population_bounds, population_weights) {
    # bounds = c(0, 80, 150, 180)
    # rates = c(0.1, 0.2, 0.3, .4)
    # new_bounds = c(0, 60, 150, 160, 180)
    # population_bounds = c(0, 60, 150, 160, 175, 180)
    # population_weights = c(10, 20, 30, 40, 50, 60)
    # stop()
    # NOTE: For Tim: I don't think we need a test here, because we can assume the rate is the same for all above the max(bounds) even if max(new_bounds) > max(bounds)
    # Instead we do need the test that max(population_bounds) < max(new_bounds). Note that I am not checking that, because that will be checked by
    # reaggregate_counts_edwin_unweighted
    tibble::tibble(lower = bounds, rates = rates) -> dat
    all_lower <- sort(unique(c(bounds, new_bounds, population_bounds)))
    cut_ages(all_lower, breaks = bounds) |>
        dplyr::left_join(dat, by = dplyr::join_by(lower)) -> dat1
    dat2 <- reaggregate_counts_edwin_unweighted(population_bounds, population_weights, all_lower)
    dat1 |>
        dplyr::mutate(weight = dat2$count) -> dat3
    cut_ages(all_lower, breaks = new_bounds) -> dat4
    dat3 |>
        dplyr::mutate(lower = dat4$lower) |>
        dplyr::summarise(rate = sum(rates * weight) / sum(weight), .by = c(lower))
}

# nolint end

reaggregate_counts_fast <- function(
    bounds,
    counts,
    new_bounds,
    ...,
    population_bounds = NULL,
    population_weights = NULL
) {

    rlang::check_dots_empty0(...)

    # lower bounds checks
    if (!all(is.finite(bounds)))
        stop("`bounds` must be a finite, numeric vector.")

    if (!length(bounds))
        stop("`bounds` must be of non-zero length.")

    if (is.unsorted(bounds, na.rm = FALSE, strictly = TRUE))
        stop("`bounds` must be in strictly ascending order")

    if (bounds[1L] < 0)
        stop("`bounds` must be non-negative.")

    # rates checks
    if (!is.numeric(counts))
        stop("`counts` must be numeric.")

    if (length(counts) != length(bounds))
        stop("`counts` must be the same length as `bounds`.")

    # new bounds checks
    if (!all(is.finite(new_bounds)))
        stop("`new_bounds` must be a finite, numeric vector.")

    if (!length(new_bounds))
        stop("`new_bounds` must be of non-zero length.")

    if (is.unsorted(new_bounds, na.rm = FALSE, strictly = TRUE))
        stop("`new_bounds` must be in strictly ascending order")

    if (new_bounds[1L] < 0)
        stop("`new_bounds` must be non-negative.")

    # population bounds checks
    if (is.null(population_bounds)) {

        if (!is.null(population_weights) && length(population_weights) != length(new_bounds)) {
                stop("When `population_bounds` is not specified, `population_weights` must be the same length as `new_bounds`.") # nolint: line_length_linter.
        }

        if (max(bounds) < max(new_bounds)) {
            stop("Where `population_bounds` are not specified the maximum value of `new_bounds` must be less than or equal to that of `bounds`.") # nolint: line_length_linter.
        }

        population_bounds <- new_bounds

    } else {

        if (!all(is.finite(population_bounds)))
            stop("`population_bounds` must be a finite, numeric vector.")

        if (!length(population_bounds))
            stop("`population_bounds` must be of non-zero length.")

        if (is.unsorted(population_bounds, na.rm = FALSE, strictly = TRUE))
            stop("`population_bounds` must be in strictly ascending order")

        if (population_bounds[1L] < 0)
            stop("`population_bounds} must be non-negative.")

        if (max(bounds) > max(population_bounds)) {
            stop(
                "The maximum value of `bounds` must be less than or equal to that of `population_bounds`." # nolint: line_length_linter.
            )
        }

    }

    # population_weights check
    if (!is.null(population_weights)) {

        if (!all(is.finite(population_weights)) || any(population_weights < 0))
            stop("`population_weights` must be numeric, non-negative and finite.")

        if (length(population_weights) != length(population_bounds))
            stop("`population_weights` must be the same length as `population_bounds`."
            )

        if (sum(population_weights) == 0)
            stop("At least one `population_weight` must be non-zero.")
    }

    # Ensure bounds start at zero and adjust counts accordingly
    if (bounds[1L] != 0) {
        bounds <- c(0, bounds)
        counts <- c(0, counts)
    }

    # Ensure new bounds start at zero
    if (new_bounds[1L] != 0)
        new_bounds <- c(0, new_bounds)

    # Ensure population_bounds start at zero and adjust weights accordingly
    if (population_bounds[1L] != 0) {
        population_bounds <- c(0, population_bounds)
        if (!is.null(population_weights))
            population_weights <- c(0, population_weights)
    }

    # calculate the old and new upper bounds
    old_upper <- c(bounds[-1L], Inf)
    pop_upper <- c(population_bounds[-1L], Inf)
    new_upper <- c(new_bounds[-1L], Inf)

    # calculate the combined bounds
    all_lower <- sort(unique(c(bounds, new_bounds, population_bounds)))
    all_upper <- c(all_lower[-1L], Inf)

    if (is.null(population_weights))
        population_weights <- pop_upper - population_bounds

    # we need to keep track where the combined bits would fit in the old and
    # new bounds. This information is stored in the old_container and
    # new_container vectors respectively.
    new_container <- old_container <- pop_container <- integer(length(all_upper))
    new_index <- old_index <- pop_index <- 1L

    for (i in seq_along(old_container)) {
        old_index <- old_index + (all_upper[i] > old_upper[old_index])
        new_index <- new_index + (all_upper[i] > new_upper[new_index])
        pop_index <- pop_index + (all_upper[i] > pop_upper[pop_index])

        old_container[i] <- old_index
        new_container[i] <- new_index
        pop_container[i] <- pop_index
    }

    result <- counts[old_container]

    all_diff <- all_upper - all_lower
    pop_diff <- (pop_upper[pop_container] - population_bounds[pop_container])
    ratio <- all_diff / pop_diff
    ratio[all_diff == Inf & pop_diff == Inf] <- 1
    pop_weights <- population_weights[pop_container] * ratio
    pop_weights <- pop_weights / ave(pop_weights, old_container, FUN = sum)
    result <- counts[old_container] * pop_weights
    result[length(result)] <- sum(counts) - sum(result[-length(result)])

    out <- numeric(length(new_bounds))
    idx <- 1L
    for (i in seq_along(new_container)) {
        if (new_container[i] != idx)
            idx <- idx + 1L
        out[idx] <- out[idx] + result[i]
    }
    out[idx] <- sum(counts) - sum(out[-idx])

    interval <- sprintf("[%.f, %.f)", new_bounds, new_upper)
    interval <- factor(interval, levels = interval, ordered = TRUE)

    tibble::new_tibble(
        list(
            interval = interval,
            lower = new_bounds,
            upper = new_upper,
            count = out
        )
    )

}

reaggregate_rates_fast <- function(
        bounds,
        rates,
        new_bounds,
        ...,
        population_bounds = NULL,
        population_weights = NULL
) {

    rlang::check_dots_empty0(...)

    # lower bounds checks
    if (!all(is.finite(bounds)))
        stop("`bounds` must be a finite, numeric vector.")
    if (!length(bounds))
        stop("`bounds` must be of non-zero length.")
    if (is.unsorted(bounds, na.rm = FALSE, strictly = TRUE))
        stop("`bounds` must be in strictly ascending order")
    if (bounds[1L] < 0)
        stop("`bounds` must be non-negative.")

    # rates checks
    if (!is.numeric(rates))
        stop("`rates` must be numeric.")
    if (length(rates) != length(bounds))
        stop("`rates` must be the same length as `bounds`.")

    # new bounds checks
    if (!all(is.finite(new_bounds)))
        stop("`new_bounds` must be a finite, numeric vector.")
    if (!length(new_bounds))
        stop("`new_bounds` must be of non-zero length.")
    if (is.unsorted(new_bounds, na.rm = FALSE, strictly = TRUE))
        stop("`new_bounds` must be in strictly ascending order")
    if (new_bounds[1L] < 0)
        stop("`new_bounds` must be non-negative.")

    # population bounds checks
    if (is.null(population_bounds)) {

        if (!is.null(population_weights) && (length(population_weights) != length(new_bounds))) {
            stop("When `population_bounds` is not specified, `population_weights` must be the same length as `new_bounds`.") # nolint: line_length_linter.
        }

        if (max(bounds) < max(new_bounds)) {
            stop("Where `population_bounds` are not specified the maximum value of `new_bounds` must be less than or equal to that of `bounds`." # nolint: line_length_linter
            )
        }

        population_bounds <- new_bounds

    } else {

        if (!all(is.finite(population_bounds)))
            stop("`population_bounds` must be a finite, numeric vector.")

        if (!length(population_bounds))
            stop("`population_bounds` must be of non-zero length.")

        if (is.unsorted(population_bounds, na.rm = FALSE, strictly = TRUE))
            stop("`population_bounds` must be in strictly ascending order")

        if (population_bounds[1L] < 0)
            stop("`population_bounds` must be non-negative.")

        if (max(population_bounds) < max(new_bounds)) {
            stop("The maximum value of `new_bounds` must be less than or equal to that of `population_bounds`.") # nolint: line_length_linter
        }

    }

    # population_weights check
    if (!is.null(population_weights)) {
        if (!all(is.finite(population_weights)) || any(population_weights < 0))
            stop("`population_weights` must be numeric, non-negative and finite.")
        if (length(population_weights) != length(population_bounds))
            stop("`population_weights` must be the same length as `population_bounds`.")
        if (sum(population_weights) == 0)
            stop("At least one `population_weight` must be non-zero.")
    }

    # Ensure bounds start at zero and adjust rates accordingly
    if (bounds[1L] != 0) {
        bounds <- c(0, bounds)
        rates <- c(0, rates)
    }

    # Ensure new bounds start at zero
    if (new_bounds[1L] != 0)
        new_bounds <- c(0, new_bounds)

    # Ensure population_bounds start at zero and adjust weights accordingly
    if (population_bounds[1L] != 0) {
        population_bounds <- c(0, population_bounds)
        if (!is.null(population_weights))
            population_weights <- c(0, population_weights)
    }

    # calculate the old and new upper bounds
    old_upper <- c(bounds[-1L], Inf)
    pop_upper <- c(population_bounds[-1L], Inf)
    new_upper <- c(new_bounds[-1L], Inf)

    # calculate the combined bounds
    all_lower <- sort(unique(c(bounds, new_bounds, population_bounds)))
    all_upper <- c(all_lower[-1L], Inf)

    # TODO - explain this!!!
    if (is.null(population_weights)) {
        population_weights <- pop_upper - population_bounds
        population_weights[length(population_weights)] <- 1
    }

    # we need to keep track where the combined bits would fit in the old and
    # new bounds. This information is stored in the old_container and
    # new_container vectors respectively.
    new_container <- old_container <- pop_container <- integer(length(all_upper))
    new_index <- old_index <- pop_index <- 1L

    for (i in seq_along(old_container)) {
        old_index <- old_index + (all_upper[i] > old_upper[old_index])
        new_index <- new_index + (all_upper[i] > new_upper[new_index])
        pop_index <- pop_index + (all_upper[i] > pop_upper[pop_index])

        old_container[i] <- old_index
        new_container[i] <- new_index
        pop_container[i] <- pop_index
    }

    all_diff <- all_upper - all_lower
    pop_diff <- (pop_upper[pop_container] - population_bounds[pop_container])
    ratio <- all_diff / pop_diff
    ratio[all_diff == Inf & pop_diff == Inf] <- 1
    pop_weights <- population_weights[pop_container] * ratio

    result <- rates[old_container] * pop_weights
    out <- numeric(length(new_bounds))
    idx <- 1L
    weight <- 0
    for (i in seq_along(new_container)) {
        if (new_container[i] != idx) {
            out[idx] <- out[idx] / weight
            idx <- idx + 1L
            weight <- 0
        }
        weight <- weight + pop_weights[i]
        out[idx] <- out[idx] + result[i]
    }
    out[length(out)] <- out[length(out)] / weight

    interval <- sprintf("[%.f, %.f)", new_bounds, new_upper)
    interval <- factor(interval, levels = interval, ordered = TRUE)

    tibble::new_tibble(
        list(
            interval = interval,
            lower = new_bounds,
            upper = new_upper,
            rate = out
        )
    )

}

Try the ageutils package in your browser

Any scripts or data that you put into this service are public.

ageutils documentation built on Nov. 5, 2025, 5:33 p.m.