R/DSR.R
In PHEindicatormethods: Common Public Health Statistics and their Confidence Intervals

Documented in phe_dsr

# -------------------------------------------------------------------------------------------------
#' Calculate Directly Standardised Rates using phe_dsr
#'
#' Calculates directly standardised rates with confidence limits using Byar's
#' method (1) with Dobson method adjustment (2).
#'
#' @param data data.frame containing the data to be standardised, pre-grouped if
#'   multiple DSRs required; unquoted string; no default
#' @param x field name from data containing the observed number of events for
#'   each standardisation category (eg ageband) within each grouping set (eg
#'   area); unquoted string; no default
#' @param n field name from data containing the populations for each
#'   standardisation category (eg ageband) within each grouping set (eg area);
#'   unquoted string; no default
#' @param stdpop the standard populations for each standardisation category (eg
#'   age band); unquoted string referencing a numeric vector or field name from
#'   data depending on value of stdpoptype; default = esp2013
#' @param stdpoptype whether the stdpop has been specified as a vector or a
#'   field name from data; quoted string "field" or "vector"; default = "vector"
#' @param type defines the data and metadata columns to be included in output;
#'   can be "value", "lower", "upper", "standard" (for all data) or "full" (for
#'   all data and metadata); quoted string; default = "full"
#' @param confidence the required level of confidence expressed as a number
#'   between 0.9 and 1 or a number between 90 and 100 or can be a vector of 0.95
#'   and 0.998, for example, to output both 95 percent and 99.8 percent percent CIs; numeric;
#'   default 0.95
#' @param multiplier the multiplier used to express the final values (eg 100,000
#'   = rate per 100,000); numeric; default 100,000
#'
#' @return When type = "full", returns a tibble of total counts, total
#'   populations, directly standardised rates, lower confidence limits, upper
#'   confidence limits, confidence level, statistic and method for each grouping
#'   set
#'
#' @importFrom rlang sym quo_name
#' @import dplyr
#' @export
#'
#' @examples
#' library(dplyr)
#' df <- data.frame(indicatorid = rep(c(1234, 5678, 91011, 121314),
#'                  each = 19 * 2 * 5),
#'                  year = rep(2006:2010, each = 19 * 2),
#'                  sex = rep(rep(c("Male", "Female"), each = 19), 5),
#'                  ageband = rep(c(0,5,10,15,20,25,30,35,40,45,
#'                                  50,55,60,65,70,75,80,85,90), times = 10),
#'                  obs = sample(200, 19 * 2 * 5 * 4, replace = TRUE),
#'                  pop = sample(10000:20000, 19 * 2 * 5 * 4, replace = TRUE))
#'
#' ## default execution
#' df %>%
#'     group_by(indicatorid, year, sex) %>%
#'     phe_dsr(obs, pop)
#'
#' ## calculate both 95% and 99.8% CIs in single execution
#' df %>%
#'     group_by(indicatorid, year, sex) %>%
#'     phe_dsr(obs, pop, confidence = c(0.95, 0.998))
#'
#' ## calculate DSRs for multiple grouping sets in single execution
#'
#' df %>%
#'     group_by(indicatorid, year, sex) %>%
#'     phe_dsr(obs, pop, type = "standard")
#'
#' @section Notes: User MUST ensure that x, n and stdpop vectors are all ordered
#'   by the same standardisation category values as records will be matched by
#'   position. \cr \cr For total counts >= 10 Byar's method (1) is applied using
#'   the internal byars_lower and byars_upper functions.
#'   When the total count is < 10 DSRs are not reliable and will therefore not
#'   be calculated.
#'
#' @references
#' (1) Breslow NE, Day NE. Statistical methods in cancer research,
#'  volume II: The design and analysis of cohort studies. Lyon: International
#'  Agency for Research on Cancer, World Health Organisation; 1987. \cr \cr
#' (2) Dobson A et al. Confidence intervals for weighted sums of Poisson parameters. Stat Med 1991;10:457-62.
#'
#'
#' @family PHEindicatormethods package functions
# -------------------------------------------------------------------------------------------------

# define the DSR function using Dobson method
phe_dsr <- function(data, x, n, stdpop = esp2013, stdpoptype = "vector",
                    type = "full", confidence = 0.95, multiplier = 100000) {

    # check required arguments present
    if (missing(data)|missing(x)|missing(n)) {
        stop("function phe_dsr requires at least 3 arguments: data, x, n")
    }

    # check same number of rows per group
    if (n_distinct(select(ungroup(count(data)),n)) != 1) {
        stop("data must contain the same number of rows for each group")
    }

    # check stdpop is valid and append to data
    if (!(stdpoptype %in% c("vector","field"))) {
        stop("valid values for stdpoptype are vector and field")

    } else if (stdpoptype == "vector") {
        if (pull(slice(select(ungroup(count(data)),n),1)) != length(stdpop)) {
            stop("stdpop length must equal number of rows in each group within data")
        }
    data <- mutate(data,stdpop_calc = stdpop)

    } else if (stdpoptype == "field") {
        if (deparse(substitute(stdpop)) %in% colnames(data)) {
            data <- mutate(data,stdpop_calc = {{ stdpop }} )
        } else stop("stdpop is not a field name from data")
    }


    # validate arguments
    if (any(pull(data, {{ x }}) < 0, na.rm=TRUE)) {
        stop("numerators must all be greater than or equal to zero")
    } else if (any(pull(data, {{ n }}) <= 0)) {
        stop("denominators must all be greater than zero")
    } else if (!(type %in% c("value", "lower", "upper", "standard", "full"))) {
        stop("type must be one of value, lower, upper, standard or full")
    } else if (length(confidence) >2) {
        stop("a maximum of two confidence levels can be provided")
    } else if (length(confidence) == 2) {
        if (!(confidence[1] == 0.95 & confidence[2] == 0.998)) {
            stop("two confidence levels can only be produced if they are specified as 0.95 and 0.998")
        }
    } else if ((confidence < 0.9)|(confidence > 1 & confidence < 90)|(confidence > 100)) {
        stop("confidence level must be between 90 and 100 or between 0.9 and 1")
    }


    # calculate DSR and CIs
    if (length(confidence) == 2) {

        # if two confidence levels requested
        conf1 <- confidence[1]
        conf2 <- confidence[2]

        # calculate DSR and CIs
        phe_dsr <- data %>%
            mutate(wt_rate = na.zero({{ x }}) *  .data$stdpop_calc / ({{ n }}),
                   sq_rate = na.zero({{ x }}) * (.data$stdpop_calc / ({{ n }}))^2, na.rm=TRUE) %>%
            summarise(total_count = sum({{ x }}, na.rm=TRUE),
                      total_pop = sum({{ n }}),
                      value = sum(.data$wt_rate) / sum(.data$stdpop_calc) * multiplier,
                      vardsr = 1/sum(.data$stdpop_calc)^2 * sum(.data$sq_rate),
                      lower95_0cl = .data$value + sqrt((.data$vardsr/sum({{ x }}, na.rm=TRUE)))*
                          (byars_lower(sum({{ x }}, na.rm=TRUE), conf1) - sum({{ x }}, na.rm=TRUE)) * multiplier,
                      upper95_0cl = .data$value + sqrt((.data$vardsr/sum({{ x }}, na.rm=TRUE)))*
                          (byars_upper(sum({{ x }}, na.rm=TRUE), conf1) - sum({{ x }}, na.rm=TRUE)) * multiplier,
                      lower99_8cl = .data$value + sqrt((.data$vardsr/sum({{ x }}, na.rm=TRUE)))*
                          (byars_lower(sum({{ x }}, na.rm=TRUE), conf2) - sum({{ x }}, na.rm=TRUE)) * multiplier,
                      upper99_8cl = .data$value + sqrt((.data$vardsr/sum({{ x }}, na.rm=TRUE)))*
                          (byars_upper(sum({{ x }}, na.rm=TRUE), conf2) - sum({{ x }}, na.rm=TRUE)) * multiplier,
                      .groups = "keep") %>%
            select(!c("vardsr")) %>%
            mutate(confidence = "95%, 99.8%",
                   statistic = paste("dsr per",format(multiplier,scientific=F)),
                   method = "Dobson")

        # remove DSR calculation for total counts < 10
        phe_dsr <- phe_dsr %>%
          mutate(across(c("value", "upper95_0cl", "lower95_0cl",
                          "upper99_8cl", "lower99_8cl"),
                        function(x) if_else(.data$total_count < 10, NA_real_, x)),
                 statistic = if_else(.data$total_count < 10,
                                     "dsr NA for total count < 10",
                                     .data$statistic))


        # drop fields not required based on value of type argument
        if (type == "lower") {
            phe_dsr <- phe_dsr %>%
                select(!c("total_count", "total_pop", "value", "upper95_0cl", "upper99_8cl",
                       "confidence", "statistic", "method"))
        } else if (type == "upper") {
            phe_dsr <- phe_dsr %>%
                select(!c("total_count", "total_pop", "value", "lower95_0cl", "lower99_8cl",
                       "confidence", "statistic", "method"))
        } else if (type == "value") {
            phe_dsr <- phe_dsr %>%
                select(!c("total_count", "total_pop", "lower95_0cl", "lower99_8cl", "upper95_0cl", "upper99_8cl",
                       "confidence", "statistic", "method"))
        } else if (type == "standard") {
            phe_dsr <- phe_dsr %>%
                select(!c("confidence", "statistic", "method"))
        }

    } else {

        # scale confidence level if single value specified
        if (confidence >= 90) {
            confidence <- confidence/100
        }


        # calculate DSR with a single CI
        phe_dsr <- data %>%
            mutate(wt_rate = na.zero({{ x }}) *  .data$stdpop_calc / ({{ n }}),
                   sq_rate = na.zero({{ x }}) * (.data$stdpop_calc / ({{ n }}))^2, na.rm=TRUE) %>%
            summarise(total_count = sum({{ x }},na.rm=TRUE),
                      total_pop = sum({{ n }}),
                      value = sum(.data$wt_rate) / sum(.data$stdpop_calc) * multiplier,
                      vardsr = 1/sum(.data$stdpop_calc)^2 * sum(.data$sq_rate),
                      lowercl = .data$value +
                        sqrt((.data$vardsr / sum({{ x }},na.rm=TRUE))) *
                        (byars_lower(sum({{ x }},na.rm=TRUE),
                                     confidence) - sum({{ x }},na.rm=TRUE)) * multiplier,
                      uppercl = .data$value + sqrt((.data$vardsr / sum({{ x }}, na.rm=TRUE))) *
                        (byars_upper(sum({{ x }},na.rm=TRUE),
                                     confidence) - sum({{ x }},na.rm=TRUE)) * multiplier,
                      .groups = "keep") %>%
            select(!c("vardsr")) %>%
            mutate(confidence = paste(confidence*100,"%",sep=""),
                   statistic = paste("dsr per",format(multiplier,scientific=F)),
                   method = "Dobson")

        # remove DSR calculation for total counts < 10
        phe_dsr <- phe_dsr %>%
          mutate(across(c("value", "uppercl", "lowercl"),
                        function(x) if_else(.data$total_count < 10, NA_real_, x)),
                 statistic = if_else(.data$total_count < 10,
                                     "dsr NA for total count < 10",
                                     .data$statistic))

        # drop fields not required based on value of type argument
        if (type == "lower") {
         phe_dsr <- phe_dsr %>%
            select(!c("total_count", "total_pop", "value", "uppercl", "confidence", "statistic", "method"))
        } else if (type == "upper") {
            phe_dsr <- phe_dsr %>%
            select(!c("total_count", "total_pop", "value", "lowercl", "confidence", "statistic", "method"))
        } else if (type == "value") {
            phe_dsr <- phe_dsr %>%
            select(!c("total_count", "total_pop", "lowercl", "uppercl", "confidence", "statistic", "method"))
        } else if (type == "standard") {
            phe_dsr <- phe_dsr %>%
            select(!c("confidence", "statistic", "method"))
        }

    }
    return(phe_dsr)

}

Any scripts or data that you put into this service are public.

PHEindicatormethods documentation built on May 29, 2024, 12:20 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

PHEindicatormethods
Common Public Health Statistics and their Confidence Intervals

R/DSR.R
In PHEindicatormethods: Common Public Health Statistics and their Confidence Intervals

Defines functions phe_dsr

Documented in phe_dsr

Try the PHEindicatormethods package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PHEindicatormethods Common Public Health Statistics and their Confidence Intervals

R/DSR.R In PHEindicatormethods: Common Public Health Statistics and their Confidence Intervals

Defines functions phe_dsr

Documented in phe_dsr

Try the PHEindicatormethods package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

PHEindicatormethods
Common Public Health Statistics and their Confidence Intervals

R/DSR.R
In PHEindicatormethods: Common Public Health Statistics and their Confidence Intervals