# ------------------------------------------------------------------------------
#' Assign Quantiles using phe_quantile
#'
#' Assigns data to quantiles based on numeric data rankings.
#'
#' @param data a data frame containing the quantitative data to be assigned to
#' quantiles. If pre-grouped, separate sets of quantiles will be assigned for
#' each grouping set; unquoted string; no default
#' @param values field name from data containing the numeric values to rank data
#' by and assign quantiles from; unquoted string; no default
#' @param nquantiles the number of quantiles to separate each grouping set into;
#' numeric; default=10L
#' @param invert whether the quantiles should be directly (FALSE) or inversely
#' (TRUE) related to the numerical value order; logical (to apply same value
#' to all grouping sets) OR unquoted string referencing field name from data
#' that stores logical values for each grouping set; default = TRUE (ie
#' highest values assigned to quantile 1)
#' @param inverttype whether the invert argument has been specified as a logical
#' value or a field name from data; quoted string "field" or "logical";
#' default = "logical"
#' @param type defines whether to include metadata columns in output to
#' reference the arguments passed; can be "standard" or "full"; quoted string;
#' default = "full"
#'
#' @import dplyr
#' @importFrom rlang sym quo_name
#' @export
#'
#' @return When type = "full", returns the original data.frame with quantile
#' (quantile value), nquantiles (number of quantiles requested), groupvars
#' (grouping sets quantiles assigned within) and invert (indicating direction
#' of quantile assignment) fields appended.
#'
#' @section Notes: See [OHID Technical Guide - Assigning Deprivation Categories](https://fingertips.phe.org.uk/profile/guidance/supporting-information/PH-methods)
#' for methodology. In particular, note that this function strictly applies
#' the algorithm defined but some manual review, and potentially adjustment,
#' is advised in some cases where multiple small areas with equal rank fall
#' across a natural quantile boundary.
#'
#' @examples
#' df <- data.frame(
#' region = as.character(rep(c("Region1","Region2","Region3","Region4"),
#' each=250)),
#' smallarea = as.character(paste0("Area",seq_along(1:1000))),
#' vals = as.numeric(sample(200, 1000, replace = TRUE)),
#' stringsAsFactors = FALSE)
#'
#' # assign small areas to deciles across whole data frame
#' phe_quantile(df, vals)
#'
#' # assign small areas to deciles within regions by pre-grouping the data frame
#' library(dplyr)
#' df_grp <- df %>% group_by(region)
#' phe_quantile(df_grp, vals)
#'
#' # assign small areas to quintiles, where highest value = highest quantile
#' phe_quantile(df, vals, nquantiles = 5L, invert=FALSE)
#'
#' @family PHEindicatormethods package functions
# -------------------------------------------------------------------------------------------------
# create phe_quantile function using PHE method
phe_quantile <- function(data,
values,
nquantiles = 10L,
invert = TRUE,
inverttype = "logical",
type = "full") {
# check required arguments present
if (missing(data) | missing(values)) {
stop(paste0("function phe_quantile requires at least 2 arguments: ",
"data and values"))
}
# check invert is valid and append to data
if (!(inverttype %in% c("logical", "field"))) {
stop("valid values for inverttype are logical and field")
} else if (inverttype == "logical") {
if (!(invert %in% c(TRUE, FALSE))) {
stop("invert expressed as a logical must equal TRUE or FALSE")
}
data <- mutate(data, invert_calc = invert)
} else if (inverttype == "field") {
if (deparse(substitute(invert)) %in% colnames(data)) {
data <- mutate(data, invert_calc = {{ invert }})
} else stop("invert is not a field name from data")
}
# error handling for valid data types and values
if (!(is.numeric(pull(data, {{ values }})))) {
stop("values argument must be a numeric field from data")
}
#check all invert values are identical within groups
if (!n_groups(data) == nrow(unique(select(data, "invert_calc")))) {
stop(paste0("invert field values must take the same logical value for ",
"each data grouping set"))
}
# assign quantiles - unless number of small areas within a group is less
# than number of quantiles requested. Ignore areas with no value present
phe_quantile <- data %>%
mutate(num_small_areas = sum(!is.na({{ values }})),
rank = case_when(
.data$invert_calc == TRUE ~ rank(- {{ values }},
ties.method = "min",
na.last = "keep"),
.default = rank({{ values }},
ties.method = "min",
na.last = "keep")
),
quantile = if_else(.data$num_small_areas < nquantiles,
NA_real_,
floor((nquantiles + 1) - ceiling(((.data$num_small_areas + 1) - rank) /
(.data$num_small_areas / nquantiles)
)
)
),
quantile = if_else(.data$quantile == 0, 1, .data$quantile)
) %>%
select(!c("num_small_areas", "rank")) %>%
mutate(nquantiles= nquantiles,
groupvars = paste0(group_vars(data), collapse = ", "),
qinverted = if_else(.data$invert_calc == TRUE,
"lowest quantile represents highest values",
"lowest quantile represents lowest values"))
# warn if any groups had too few snall areas with values to assign quantiles
if (nrow(filter(phe_quantile, all(is.na(.data$quantile)))) > 0
) {
warning(paste0("One or more groups had too few small areas with values to ",
"allow quantiles to be assigned"))
}
# remove columns if not required based on value of type argument
if (type == "standard") {
phe_quantile <- phe_quantile %>%
select(!c("nquantiles", "groupvars", "qinverted", "invert_calc"))
} else {
phe_quantile <- phe_quantile %>%
select(!c("invert_calc"))
}
return(phe_quantile)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.