# -------------------------------------------------------------------------------------------------
#' Calculate Indirectly Standardised Rates using calculate_ISRate
#'
#' Calculates indirectly standardised rates with confidence limits using Byar's
#' (1) or exact (2) CI method.
#'
#' @param data data.frame containing the data to be standardised, pre-grouped if
#' multiple ISRs required; unquoted string; no default
#' @param x field name from data containing the observed number of events for
#' each standardisation category (eg ageband) within each grouping set (eg
#' area). Alternatively, if not providing age breakdowns for observed events,
#' field name from observed_totals containing the observed number of events
#' within each grouping set ; unquoted string; no default
#' @param x_ref the observed number of events in the reference population for
#' each standardisation category (eg age band); unquoted string referencing a
#' numeric vector or field name from data depending on value of refpoptype; no
#' default
#' @param n_ref the reference population for each standardisation category (eg
#' age band); unquoted string referencing a numeric vector or field name from
#' data depending on value of refpoptype; no default
#' @param refpoptype whether x_ref and n_ref have been specified as vectors or a
#' field name from data; quoted string "field" or "vector"; default = "vector"
#' @param observed_totals data.frame containing total observed events for each
#' group, if not provided with age-breakdowns in data. Must only contain the
#' count field (x) plus grouping columns required to join to data using the
#' same grouping column names; default = NULL
#'
#' @inheritParams calculate_dsr
#'
#' @import dplyr
#' @importFrom stats qchisq
#' @export
#'
#' @return When type = "full", returns a tibble of observed events, expected
#' events, indirectly standardised rate, lower confidence limit, upper
#' confidence limit, confidence level, statistic and method for each grouping
#' set
#'
#' @examples
#' library(dplyr)
#' df <- data.frame(indicatorid = rep(c(1234, 5678, 91011, 121314), each = 19 * 2 * 5),
#' year = rep(2006:2010, each = 19 * 2),
#' sex = rep(rep(c("Male", "Female"), each = 19), 5),
#' ageband = rep(c(0,5,10,15,20,25,30,35,40,45,
#' 50,55,60,65,70,75,80,85,90), times = 10),
#' obs = sample(200, 19 * 2 * 5 * 4, replace = TRUE),
#' pop = sample(10000:20000, 19 * 2 * 5 * 4, replace = TRUE))
#'
#' refdf <- data.frame(refcount = sample(200, 19, replace = TRUE),
#' refpop = sample(10000:20000, 19, replace = TRUE))
#'
#' ## calculate multiple ISRs in single execution
#' df %>%
#' group_by(indicatorid, year, sex) %>%
#' calculate_ISRate(obs, pop, refdf$refcount, refdf$refpop)
#'
#' ## execute without outputting metadata fields
#' df %>%
#' group_by(indicatorid, year, sex) %>%
#' calculate_ISRate(obs, pop, refdf$refcount, refdf$refpop, type="standard", confidence=99.8)
#'
#' ## calculate 95% and 99.8% CIs in single execution
#' df %>%
#' group_by(indicatorid, year, sex) %>%
#' calculate_ISRate(obs, pop, refdf$refcount, refdf$refpop, confidence = c(0.95, 0.998))
#'
#' ## Calculate ISR when observed totals aren't available with age-breakdowns
#' observed_totals <- data.frame(indicatorid = rep(c(1234, 5678, 91011, 121314), each = 10),
#' year = rep(rep(2006:2010, each = 2),4),
#' sex = rep(rep(c("Male", "Female"), each = 1),20),
#' observed = sample(1500:2500, 40))
#'
#' df %>%
#' group_by(indicatorid, year, sex) %>%
#' calculate_ISRate(observed, pop, refdf$refcount, refdf$refpop,
#' observed_totals = observed_totals)
#'
#'
#' @section Notes: User MUST ensure that x, n, x_ref and n_ref vectors are all
#' ordered by the same standardisation category values as records will be
#' matched by position. \cr \cr For numerators >= 10 Byar's method (1) is
#' applied using the internal byars_lower and byars_upper functions. For
#' small numerators Byar's method is less accurate and so an exact method (2)
#' based on the Poisson distribution is used. \cr \cr This function directly
#' replaced phe_isr which was fully deprecated in package version 2.0.0 due to
#' ambiguous naming
#'
#' @references
#' (1) Breslow NE, Day NE. Statistical methods in cancer research,
#' volume II: The design and analysis of cohort studies. Lyon: International
#' Agency for Research on Cancer, World Health Organisation; 1987. \cr \cr
#' (2) Armitage P, Berry G. Statistical methods in medical research (4th edn).
#' Oxford: Blackwell; 2002.
#'
#' @family PHEindicatormethods package functions
# -------------------------------------------------------------------------------------------------
calculate_ISRate <- function(data, x, n, x_ref, n_ref, refpoptype = "vector",
type = "full", confidence = 0.95, multiplier = 100000,
observed_totals = NULL) {
# check required arguments present
if (missing(data)|missing(x)|missing(n)|missing(x_ref)|missing(n_ref)) {
stop("function calculate_ISRate requires at least 5 arguments: data, x, n, x_ref and n_ref")
}
# check same number of rows per group
if (n_distinct(select(ungroup(count(data)),n)) != 1) {
stop("data must contain the same number of rows for each group")
}
# check x is in data/observed_totals
if (!is.null(observed_totals)) {
if (!(deparse(substitute(x)) %in% colnames(observed_totals))) {
stop("observed_totals is provided but x is not a field name in it")
}
} else {
if (!(deparse(substitute(x)) %in% colnames(data))) {
stop("x is not in data")
}
}
# check ref pops are valid and append to data
if (!(refpoptype %in% c("vector","field"))) {
stop("valid values for refpoptype are vector and field")
} else if (refpoptype == "vector") {
if (pull(slice(select(ungroup(count(data)),n),1)) != length(x_ref)) {
stop("x_ref length must equal number of rows in each group within data")
} else if (pull(slice(select(ungroup(count(data)),n),1)) != length(n_ref)) {
stop("n_ref length must equal number of rows in each group within data")
}
data <- mutate(data,xrefpop_calc = x_ref,
nrefpop_calc = n_ref)
} else if (refpoptype == "field") {
if (deparse(substitute(x_ref)) %in% colnames(data)) {
if(deparse(substitute(n_ref)) %in% colnames(data)) {
data <- mutate(data,xrefpop_calc = {{ x_ref }},
nrefpop_calc = {{ n_ref }})
} else stop("n_ref is not a field name from data")
} else stop("x_ref is not a field name from data")
}
# validate arguments
if (is.null(observed_totals)) {
if (any(pull(data, {{ x }}) < 0, na.rm=TRUE)) {
stop("numerators must all be greater than or equal to zero")
}
} else {
if (any(pull(observed_totals, {{ x }}) < 0, na.rm=TRUE)) {
stop("numerators must all be greater than or equal to zero")
}
}
if (any(pull(data, {{ n }}) < 0, na.rm=TRUE)) {
stop("denominators must all be greater than or equal to zero")
} else if (!(type %in% c("value", "lower", "upper", "standard", "full"))) {
stop("type must be one of value, lower, upper, standard or full")
} else if (length(confidence) >2) {
stop("a maximum of two confidence levels can be provided")
} else if (length(confidence) == 2) {
if (!(confidence[1] == 0.95 & confidence[2] == 0.998)) {
stop("two confidence levels can only be produced if they are specified as 0.95 and 0.998")
}
} else if ((confidence < 0.9)|(confidence > 1 & confidence < 90)|(confidence > 100)) {
stop("confidence level must be between 90 and 100 or between 0.9 and 1")
}
# Identify join columns if observed events provided as totals
if (!is.null(observed_totals)) {
observed_total_join_cols <- base::intersect(colnames(data),
colnames(observed_totals))
}
# calculate the isr and populate metadata fields
if (length(confidence) == 2) {
# if two confidence levels requested
conf1 <- confidence[1]
conf2 <- confidence[2]
# calculate isr and CIs
if (!is.null(observed_totals)) {
ISRate <- data %>%
mutate(exp_x = na.zero(.data$xrefpop_calc) / .data$nrefpop_calc * na.zero({{ n }})) %>%
summarise(expected = sum(.data$exp_x),
ref_rate = sum(.data$xrefpop_calc, na.rm = TRUE) / sum(.data$nrefpop_calc) * multiplier,
.groups = "keep") %>%
left_join(observed_totals, by = observed_total_join_cols) %>%
rename("observed" = {{ x }}) %>%
select("observed", "expected", "ref_rate")
} else {
ISRate <- data %>%
mutate(exp_x = na.zero(.data$xrefpop_calc)/.data$nrefpop_calc * na.zero({{ n }})) %>%
summarise(observed = sum({{ x }}, na.rm=TRUE),
expected = sum(.data$exp_x),
ref_rate = sum(.data$xrefpop_calc, na.rm=TRUE) / sum(.data$nrefpop_calc) * multiplier,
.groups = "keep")
}
ISRate <- ISRate %>%
mutate(value = .data$observed / .data$expected * .data$ref_rate,
lower95_0cl = if_else(.data$observed<10, qchisq((1-conf1)/2,2*.data$observed)/2/.data$expected * .data$ref_rate,
byars_lower(.data$observed,conf1)/.data$expected * .data$ref_rate),
upper95_0cl = if_else(.data$observed<10, qchisq(conf1+(1-conf1)/2,2*.data$observed+2)/2/.data$expected * .data$ref_rate,
byars_upper(.data$observed,conf1)/.data$expected * .data$ref_rate),
lower99_8cl = if_else(.data$observed<10, qchisq((1-conf2)/2,2*.data$observed)/2/.data$expected * .data$ref_rate,
byars_lower(.data$observed,conf2)/.data$expected * .data$ref_rate),
upper99_8cl = if_else(.data$observed<10, qchisq(conf2+(1-conf2)/2,2*.data$observed+2)/2/.data$expected * .data$ref_rate,
byars_upper(.data$observed,conf2)/.data$expected * .data$ref_rate),
confidence = "95%, 99.8%",
statistic = paste("indirectly standardised rate per",format(multiplier,scientific=F)),
method = if_else(.data$observed<10,"Exact","Byars"))
# drop fields not required based on value of type argument
if (type == "lower") {
ISRate <- ISRate %>%
select(!c("observed", "expected", "ref_rate", "value", "upper95_0cl", "upper99_8cl", "confidence", "statistic", "method"))
} else if (type == "upper") {
ISRate <- ISRate %>%
select(!c("observed", "expected", "ref_rate", "value", "lower95_0cl", "lower99_8cl", "confidence", "statistic", "method"))
} else if (type == "value") {
ISRate <- ISRate %>%
select(!c("observed", "expected", "ref_rate", "lower95_0cl", "lower99_8cl", "upper95_0cl", "upper99_8cl", "confidence", "statistic", "method"))
} else if (type == "standard") {
ISRate <- ISRate %>%
select(!c("confidence", "statistic", "method"))
}
} else {
# scale confidence level
if (confidence >= 90) {
confidence <- confidence/100
}
# calculate isr with a single CI
if (!is.null(observed_totals)) {
ISRate <- data %>%
mutate(exp_x = na.zero(.data$xrefpop_calc)/.data$nrefpop_calc * na.zero({{ n }})) %>%
summarise(expected = sum(.data$exp_x),
ref_rate = sum(.data$xrefpop_calc, na.rm=TRUE) / sum(.data$nrefpop_calc) * multiplier,
.groups = "keep") %>%
left_join(observed_totals, by=observed_total_join_cols) %>%
rename("observed" = {{ x }}) %>%
select("observed", "expected", "ref_rate")
} else {
ISRate <- data %>%
mutate(exp_x = na.zero(.data$xrefpop_calc)/.data$nrefpop_calc * na.zero({{ n }})) %>%
summarise(observed = sum({{ x }}, na.rm=TRUE),
expected = sum(.data$exp_x),
ref_rate = sum(.data$xrefpop_calc, na.rm=TRUE) / sum(.data$nrefpop_calc) * multiplier,
.groups = "keep")
}
ISRate <- ISRate %>%
mutate(value = .data$observed / .data$expected * .data$ref_rate,
lowercl = if_else(.data$observed<10, qchisq((1-confidence)/2,2*.data$observed)/2/.data$expected * .data$ref_rate,
byars_lower(.data$observed,confidence)/.data$expected * .data$ref_rate),
uppercl = if_else(.data$observed<10, qchisq(confidence+(1-confidence)/2,2*.data$observed+2)/2/.data$expected * .data$ref_rate,
byars_upper(.data$observed,confidence)/.data$expected * .data$ref_rate),
confidence = paste(confidence*100,"%", sep=""),
statistic = paste("indirectly standardised rate per",format(multiplier,scientific=F)),
method = if_else(.data$observed<10,"Exact","Byars"))
# drop fields not required based on value of type argument
if (type == "lower") {
ISRate <- ISRate %>%
select(!c("observed", "expected", "ref_rate", "value", "uppercl", "confidence", "statistic", "method"))
} else if (type == "upper") {
ISRate <- ISRate %>%
select(!c("observed", "expected", "ref_rate", "value", "lowercl", "confidence", "statistic", "method"))
} else if (type == "value") {
ISRate <- ISRate %>%
select(!c("observed", "expected", "ref_rate", "lowercl", "uppercl", "confidence", "statistic", "method"))
} else if (type == "standard") {
ISRate <- ISRate %>%
select(!c("confidence", "statistic", "method"))
}
}
return(ISRate)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.