R/ISRate.R
In PHEindicatormethods: Common Public Health Statistics and their Confidence Intervals

Documented in calculate_ISRate

# -------------------------------------------------------------------------------------------------
#' Calculate Indirectly Standardised Rates using calculate_ISRate
#'
#' Calculates indirectly standardised rates with confidence limits using Byar's
#' (1) or exact (2) CI method.
#'
#' @param data data.frame containing the data to be standardised, pre-grouped if
#'   multiple ISRs required; unquoted string; no default
#' @param x field name from data containing the observed number of events for
#'   each standardisation category (eg ageband) within each grouping set (eg
#'   area). Alternatively, if not providing age breakdowns for observed events,
#'   field name from observed_totals containing the observed number of events
#'   within each grouping set ; unquoted string; no default
#' @param x_ref the observed number of events in the reference population for
#'   each standardisation category (eg age band); unquoted string referencing a
#'   numeric vector or field name from data depending on value of refpoptype; no
#'   default
#' @param n_ref the reference population for each standardisation category (eg
#'   age band); unquoted string referencing a numeric vector or field name from
#'   data depending on value of refpoptype; no default
#' @param refpoptype whether x_ref and n_ref have been specified as vectors or a
#'   field name from data; quoted string "field" or "vector"; default = "vector"
#' @param observed_totals data.frame containing total observed events for each
#'   group, if not provided with age-breakdowns in data. Must only contain the
#'   count field (x) plus grouping columns required to join to data using the
#'   same grouping column names; default = NULL
#'
#' @inheritParams phe_dsr
#'
#' @import dplyr
#' @importFrom stats qchisq
#' @export
#'
#' @return When type = "full", returns a tibble of observed events, expected
#'   events, indirectly standardised rate, lower confidence limit, upper
#'   confidence limit, confidence level, statistic and method for each grouping
#'   set
#'
#' @examples
#' library(dplyr)
#' df <- data.frame(indicatorid = rep(c(1234, 5678, 91011, 121314), each = 19 * 2 * 5),
#'                  year = rep(2006:2010, each = 19 * 2),
#'                  sex = rep(rep(c("Male", "Female"), each = 19), 5),
#'                  ageband = rep(c(0,5,10,15,20,25,30,35,40,45,
#'                                  50,55,60,65,70,75,80,85,90), times = 10),
#'                  obs = sample(200, 19 * 2 * 5 * 4, replace = TRUE),
#'                  pop = sample(10000:20000, 19 * 2 * 5 * 4, replace = TRUE))
#'
#' refdf <- data.frame(refcount = sample(200, 19, replace = TRUE),
#'                     refpop = sample(10000:20000, 19, replace = TRUE))
#'
#' ## calculate multiple ISRs in single execution
#' df %>%
#'     group_by(indicatorid, year, sex) %>%
#'     calculate_ISRate(obs, pop, refdf$refcount, refdf$refpop)
#'
#' ## execute without outputting metadata fields
#' df %>%
#'     group_by(indicatorid, year, sex) %>%
#'     calculate_ISRate(obs, pop, refdf$refcount, refdf$refpop, type="standard", confidence=99.8)
#'
#' ## calculate 95% and 99.8% CIs in single execution
#' df %>%
#'     group_by(indicatorid, year, sex) %>%
#'     calculate_ISRate(obs, pop, refdf$refcount, refdf$refpop, confidence = c(0.95, 0.998))
#'
#' ## Calculate ISR when observed totals aren't available with age-breakdowns
#' observed_totals <- data.frame(indicatorid = rep(c(1234, 5678, 91011, 121314), each = 10),
#'                        year = rep(rep(2006:2010, each = 2),4),
#'                        sex = rep(rep(c("Male", "Female"), each = 1),20),
#'                        observed = sample(1500:2500, 40))
#'
#' df %>%
#'     group_by(indicatorid, year, sex) %>%
#'     calculate_ISRate(observed, pop, refdf$refcount, refdf$refpop,
#'     observed_totals = observed_totals)
#'
#'
#' @section Notes: User MUST ensure that x, n, x_ref and n_ref vectors are all
#'   ordered by the same standardisation category values as records will be
#'   matched by position. \cr  \cr For numerators >= 10 Byar's method (1) is
#'   applied using the internal byars_lower and byars_upper functions.  For
#'   small numerators Byar's method is less accurate and so an exact method (2)
#'   based on the Poisson distribution is used. \cr  \cr This function directly
#'   replaced phe_isr which was fully deprecated in package version 2.0.0 due to
#'   ambiguous naming
#'
#' @references
#' (1) Breslow NE, Day NE. Statistical methods in cancer research,
#'  volume II: The design and analysis of cohort studies. Lyon: International
#'  Agency for Research on Cancer, World Health Organisation; 1987. \cr \cr
#' (2) Armitage P, Berry G. Statistical methods in medical research (4th edn).
#'   Oxford: Blackwell; 2002.
#'
#' @family PHEindicatormethods package functions
# -------------------------------------------------------------------------------------------------

calculate_ISRate <- function(data, x, n, x_ref, n_ref, refpoptype = "vector",
                    type = "full", confidence = 0.95, multiplier = 100000,
                    observed_totals = NULL) {

    # check required arguments present
    if (missing(data)|missing(x)|missing(n)|missing(x_ref)|missing(n_ref)) {
        stop("function calculate_ISRate requires at least 5 arguments: data, x, n, x_ref and n_ref")
    }


    # check same number of rows per group
    if (n_distinct(select(ungroup(count(data)),n)) != 1) {
      stop("data must contain the same number of rows for each group")
    }

    # check x is in data/observed_totals
    if (!is.null(observed_totals)) {
      if (!(deparse(substitute(x)) %in% colnames(observed_totals))) {
        stop("observed_totals is provided but x is not a field name in it")
      }
    } else {
      if (!(deparse(substitute(x)) %in% colnames(data))) {
        stop("x is not in data")
      }
    }

    # check ref pops are valid and append to data
    if (!(refpoptype %in% c("vector","field"))) {
        stop("valid values for refpoptype are vector and field")

    } else if (refpoptype == "vector") {
        if (pull(slice(select(ungroup(count(data)),n),1)) != length(x_ref)) {
            stop("x_ref length must equal number of rows in each group within data")
        } else if (pull(slice(select(ungroup(count(data)),n),1)) != length(n_ref)) {
            stop("n_ref length must equal number of rows in each group within data")
        }
        data <- mutate(data,xrefpop_calc = x_ref,
                            nrefpop_calc = n_ref)

    } else if (refpoptype == "field") {
         if (deparse(substitute(x_ref)) %in% colnames(data)) {
            if(deparse(substitute(n_ref)) %in% colnames(data)) {
                data <- mutate(data,xrefpop_calc = {{ x_ref }},
                                    nrefpop_calc = {{ n_ref }})
            } else stop("n_ref is not a field name from data")
        } else stop("x_ref is not a field name from data")
    }


    # validate arguments
    if (is.null(observed_totals)) {
      if (any(pull(data, {{ x }}) < 0, na.rm=TRUE)) {
        stop("numerators must all be greater than or equal to zero")
      }
    } else {
      if (any(pull(observed_totals, {{ x }}) < 0, na.rm=TRUE)) {
        stop("numerators must all be greater than or equal to zero")
      }
    }


    if (any(pull(data, {{ n }}) < 0, na.rm=TRUE)) {
        stop("denominators must all be greater than or equal to zero")
    } else if (!(type %in% c("value", "lower", "upper", "standard", "full"))) {
        stop("type must be one of value, lower, upper, standard or full")
    } else if (length(confidence) >2) {
        stop("a maximum of two confidence levels can be provided")
    } else if (length(confidence) == 2) {
        if (!(confidence[1] == 0.95 & confidence[2] == 0.998)) {
            stop("two confidence levels can only be produced if they are specified as 0.95 and 0.998")
        }
    } else if ((confidence < 0.9)|(confidence > 1 & confidence < 90)|(confidence > 100)) {
        stop("confidence level must be between 90 and 100 or between 0.9 and 1")
    }

    # Identify join columns if observed events provided as totals
    if (!is.null(observed_totals)) {
      observed_total_join_cols <- base::intersect(colnames(data),
                                                  colnames(observed_totals))
    }

    # calculate the isr and populate metadata fields
    if (length(confidence) == 2) {

        # if two confidence levels requested
        conf1 <- confidence[1]
        conf2 <- confidence[2]

        # calculate isr and CIs
        if (!is.null(observed_totals)) {
          ISRate <- data %>%
            mutate(exp_x = na.zero(.data$xrefpop_calc) / .data$nrefpop_calc * na.zero({{ n }})) %>%
            summarise(expected = sum(.data$exp_x),
                      ref_rate = sum(.data$xrefpop_calc, na.rm = TRUE) / sum(.data$nrefpop_calc) * multiplier,
                      .groups  = "keep") %>%
            left_join(observed_totals, by = observed_total_join_cols) %>%
            rename("observed" = {{ x }}) %>%
            select("observed", "expected", "ref_rate")
        } else {
          ISRate <- data %>%
            mutate(exp_x = na.zero(.data$xrefpop_calc)/.data$nrefpop_calc * na.zero({{ n }})) %>%
            summarise(observed = sum({{ x }}, na.rm=TRUE),
                      expected = sum(.data$exp_x),
                      ref_rate = sum(.data$xrefpop_calc, na.rm=TRUE) / sum(.data$nrefpop_calc) * multiplier,
                      .groups  = "keep")
        }

        ISRate <- ISRate %>%
          mutate(value     = .data$observed / .data$expected * .data$ref_rate,
               lower95_0cl = if_else(.data$observed<10, qchisq((1-conf1)/2,2*.data$observed)/2/.data$expected * .data$ref_rate,
                                 byars_lower(.data$observed,conf1)/.data$expected * .data$ref_rate),
               upper95_0cl = if_else(.data$observed<10, qchisq(conf1+(1-conf1)/2,2*.data$observed+2)/2/.data$expected * .data$ref_rate,
                                 byars_upper(.data$observed,conf1)/.data$expected * .data$ref_rate),
               lower99_8cl = if_else(.data$observed<10, qchisq((1-conf2)/2,2*.data$observed)/2/.data$expected * .data$ref_rate,
                                     byars_lower(.data$observed,conf2)/.data$expected * .data$ref_rate),
               upper99_8cl = if_else(.data$observed<10, qchisq(conf2+(1-conf2)/2,2*.data$observed+2)/2/.data$expected * .data$ref_rate,
                                     byars_upper(.data$observed,conf2)/.data$expected * .data$ref_rate),
               confidence  = "95%, 99.8%",
               statistic   = paste("indirectly standardised rate per",format(multiplier,scientific=F)),
               method      = if_else(.data$observed<10,"Exact","Byars"))

        # drop fields not required based on value of type argument
        if (type == "lower") {
            ISRate <- ISRate %>%
                select(!c("observed", "expected", "ref_rate", "value", "upper95_0cl", "upper99_8cl", "confidence", "statistic", "method"))
        } else if (type == "upper") {
            ISRate <- ISRate %>%
                select(!c("observed", "expected", "ref_rate", "value", "lower95_0cl", "lower99_8cl", "confidence", "statistic", "method"))
        } else if (type == "value") {
            ISRate <- ISRate %>%
                select(!c("observed", "expected", "ref_rate", "lower95_0cl", "lower99_8cl", "upper95_0cl", "upper99_8cl", "confidence", "statistic", "method"))
        } else if (type == "standard") {
            ISRate <- ISRate %>%
                select(!c("confidence", "statistic", "method"))
        }

    } else {

        # scale confidence level
        if (confidence >= 90) {
            confidence <- confidence/100
        }

        # calculate isr with a single CI
      if (!is.null(observed_totals)) {
        ISRate <- data %>%
          mutate(exp_x = na.zero(.data$xrefpop_calc)/.data$nrefpop_calc * na.zero({{ n }})) %>%
          summarise(expected = sum(.data$exp_x),
                    ref_rate = sum(.data$xrefpop_calc, na.rm=TRUE) / sum(.data$nrefpop_calc) * multiplier,
                    .groups = "keep") %>%
          left_join(observed_totals, by=observed_total_join_cols) %>%
          rename("observed" = {{ x }}) %>%
          select("observed", "expected", "ref_rate")
      } else {
        ISRate <- data %>%
          mutate(exp_x = na.zero(.data$xrefpop_calc)/.data$nrefpop_calc * na.zero({{ n }})) %>%
          summarise(observed  = sum({{ x }}, na.rm=TRUE),
                    expected  = sum(.data$exp_x),
                    ref_rate  = sum(.data$xrefpop_calc, na.rm=TRUE) / sum(.data$nrefpop_calc) * multiplier,
                    .groups   = "keep")
      }
      ISRate <- ISRate %>%
        mutate(value      = .data$observed / .data$expected * .data$ref_rate,
               lowercl    = if_else(.data$observed<10, qchisq((1-confidence)/2,2*.data$observed)/2/.data$expected * .data$ref_rate,
                                 byars_lower(.data$observed,confidence)/.data$expected * .data$ref_rate),
               uppercl    = if_else(.data$observed<10, qchisq(confidence+(1-confidence)/2,2*.data$observed+2)/2/.data$expected * .data$ref_rate,
                                 byars_upper(.data$observed,confidence)/.data$expected * .data$ref_rate),
               confidence = paste(confidence*100,"%", sep=""),
               statistic  = paste("indirectly standardised rate per",format(multiplier,scientific=F)),
               method     = if_else(.data$observed<10,"Exact","Byars"))
        # drop fields not required based on value of type argument
        if (type == "lower") {
            ISRate <- ISRate %>%
                select(!c("observed", "expected", "ref_rate", "value", "uppercl", "confidence", "statistic", "method"))
        } else if (type == "upper") {
            ISRate <- ISRate %>%
                select(!c("observed", "expected", "ref_rate", "value", "lowercl", "confidence", "statistic", "method"))
        } else if (type == "value") {
            ISRate <- ISRate %>%
                select(!c("observed", "expected", "ref_rate", "lowercl", "uppercl", "confidence", "statistic", "method"))
        } else if (type == "standard") {
            ISRate <- ISRate %>%
                select(!c("confidence", "statistic", "method"))
        }

    }

    return(ISRate)
}
Any scripts or data that you put into this service are public.
PHEindicatormethods documentation built on May 29, 2024, 12:20 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
PHEindicatormethods
Common Public Health Statistics and their Confidence Intervals

R/ISRate.R
In PHEindicatormethods: Common Public Health Statistics and their Confidence Intervals

Defines functions calculate_ISRate

Documented in calculate_ISRate

Try the PHEindicatormethods package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PHEindicatormethods Common Public Health Statistics and their Confidence Intervals

R/ISRate.R In PHEindicatormethods: Common Public Health Statistics and their Confidence Intervals

Defines functions calculate_ISRate

Documented in calculate_ISRate

Try the PHEindicatormethods package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PHEindicatormethods
Common Public Health Statistics and their Confidence Intervals

R/ISRate.R
In PHEindicatormethods: Common Public Health Statistics and their Confidence Intervals