#' Get likelihood category for a p-value
#'
#' Given a probabiliy p it returns the term used to describe the category this
#' probability belongs to. It ensures the levels of the output are ordered
#' appropietly.
#'
#' This function uses the data contained in the \link{statsnz_likelihood_scale}
#' and \link{ipcc_likelihood_scale} tables to determine the category of a given
#' probability (usually a p-value). It uses
#' \code{\link{order_likelihood_levels}} to ensure that levels of the output are
#' ordered correctly.
#'
#' @param p the probability (or percentage) used to caclulate the category
#' @param scale whether to base the categories using the Statistics New Zealand
#' likelihood scale ("statsnz") or the IPCC scale ("ipcc")
#' @param term_type when \code{scale = "statsnz"} it allows to modify the terms
#' depending on what the probability \code{p} indicates. \code{term_type =
#' "worsening-improving"} indicates that a large \code{p} corresponds to a
#' worsening trend; this is the default. \code{term_type =
#' "improving-worsening"} indicates that a large \code{p} corresponds to an
#' improving trend. \code{term_type = "increasing-decreasing"} indicates that
#' the output should range from "Very likely increasing" to "Very likely
#' decreasing". \code{term_type = "likely-unlikely"} indicates that the output
#' should range from "Very likely" to "Very unlikely".
#' @param p_is whether \code{p} is a probability (from 0 to 1) or a percentage
#' (from 0 to 100)
#'
#' @return a character
#' @family likelihood functions
#' @export
#'
#' @examples
#' p <- seq(0, 1, length.out = 11)
#'
#' # In most water quality metrics an increasing trend (large p) corresponds to
#' # a worsening trend
#' get_likelihood_category(p, term_type = "worsening-improving") %>%
#' order_likelihood_levels()
#'
#' # In climate metrics we usually prefer an increasing-decreasing scale
#' get_likelihood_category(p, term_type = "increasing-decreasing") %>%
#' order_likelihood_levels()
#'
#' # Also works when p is a percentages
#' get_likelihood_category(p*100, p_is = "percentage") %>%
#' order_likelihood_levels()
#'
#' # We can also get terms used by ipcc if desired
#' get_likelihood_category(p, scale = "ipcc") %>%
#' order_likelihood_levels()
get_likelihood_category <- function(p,
scale = c("statsnz", "ipcc"),
term_type = c("worsening-improving",
"improving-worsening",
"increasing-decreasing",
"decreasing-increasing",
"likely-unlikely"),
p_is = c("probability", "percentage")) {
scale <- match.arg(scale)
term_type <- match.arg(term_type)
p_is <- match.arg(p_is)
# determine the desired scale
scale_frame <- switch(scale,
statsnz = er.helpers::statsnz_likelihood_scale,
ipcc = er.helpers::ipcc_likelihood_scale
)
terms <- scale_frame$term
if (scale == "statsnz") {
terms <- get_likelihood_terms(terms, term_type)
}
if (p_is == "percentage") {
p <- p / 100
}
if (all(is.na(p))){
warning("All values in p are NA")
return(rep(NA, length(p)))
}
if (any(is.na(p)))
warning("NA values found in p")
range_p <- range(p, na.rm = TRUE)
if (!(min(range_p) >= 0 & max(range_p) <= 1))
stop("p should be between 0 and 1 if is a probabiliy or between 0 and 100 if it's a percentage")
terms_index <- scale_frame %>%
split(scale_frame$term) %>%
purrr::map_dfr(function(x) {
in_interval(p,
x$left_break, x$right_break,
x$left_open, x$right_open)}) %>%
as.matrix() %>%
apply(1, which) %>%
# Recover if p is NA
purrr::modify_if(function(x) length(x) == 0, function(p) NA) %>%
unlist()
as.character(terms[terms_index])
}
#' Wether a value is within a specified interval
#'
#' @param x the value to test
#' @param lower lower limit
#' @param upper upper limit
#' @param left_open whether the lower limit is an open interval
#' @param right_open whether the upper limit is an open interval
#'
#' @return logical
#'
in_interval <- function(x, lower, upper, left_open, right_open){
left_compare <- ifelse(left_open, yes = `>`, no = `>=`)
right_compare <- ifelse(right_open, yes = `<`, no = `<=`)
left_compare(x, lower) & right_compare(x, upper)
}
#' Translate likely-unlikely to a specified term scale
#'
#' @param terms terms to translate
#' @inheritParams get_likelihood_category
#'
#' @return a factor
#'
get_likelihood_terms <- function(terms,
term_type = c("worsening-improving",
"improving-worsening",
"increasing-decreasing",
"likely-unlikely")){
if (term_type == "likely-unlikely") {
return(terms)
} else if (term_type == "increasing-decreasing") {
translation <- likelihood_terms$statsnz %>%
magrittr::set_names(rev(likelihood_terms$statsnz_increasing))
} else if (term_type == "decreasing-increasing") {
translation <- likelihood_terms$statsnz %>%
magrittr::set_names(likelihood_terms$statsnz_increasing)
} else if (term_type == "improving-worsening") {
translation <- likelihood_terms$statsnz %>%
magrittr::set_names(likelihood_terms$statsnz_improving)
} else if (term_type == "worsening-improving") {
translation <- likelihood_terms$statsnz %>%
magrittr::set_names(rev(likelihood_terms$statsnz_improving))
}
purrr::lift_dv(forcats::fct_recode)(.f = terms, translation) %>%
order_likelihood_levels()
}
#' Orders the levels of a likelihood category factor
#'
#' @param x the unordered factor. Levels must be one of those found in
#' er.helpers:::likelihood_terms
#'
#' @return a factor with ordered levels
#' @family likelihood functions
#' @export
#'
#' @examples
#' \dontrun{
#' library(ggplot2)
#' library(tibble)
#' trends <- tribble(
#' ~ trend, ~n,
#' "Indeterminate", 2,
#' "Very likely worsening", 3,
#' "Likely improving", 2)
#' # unordered plot
#' qplot(trend, n, fill = trend, data = trends, geom = "col")
#'
#' ordered_trends <- dplyr::mutate(trends,
#' trend = order_likelihood_levels(trend))
#' # nice ordered plot
#' qplot(trend, n, fill = trend, data = ordered_trends, geom = "col")
#' }
order_likelihood_levels <- function(x) {
# If it's not a factor make it one
if (!is.factor(x))
x <- factor(x)
already_ordered <- likelihood_terms %>%
purrr::map_lgl(~ identical(levels(x), .)) %>%
any()
if (already_ordered)
return(x)
# terms that only appear in the ipcc terms
ipcc_characteristic_terms <- likelihood_terms$ipcc[c(1, 2, 5, 8, 9)]
if (any(ipcc_characteristic_terms %in% x)) {
factor(x, levels = likelihood_terms$ipcc)
} else {
statsnz_likelihood_terms <- likelihood_terms[-1]
new_levels_index <- statsnz_likelihood_terms %>%
# remove the Indeterminate level for the comparison as it appears everywhere
purrr::map_lgl(~ any(.[-3] %in% x)) %>%
which()
factor(x, levels = statsnz_likelihood_terms[[new_levels_index]])
}
}
#' Simplify likelihood levels
#'
#' Collapse levels on a likelihood factor into smaller categories. For example
#' "Likely improving" and "Very likely improving" are collapsed into a single
#' level "Likely or very likely improving".
#'
#' @param x The vector with the likelihoods. Must be a factor or a character
#' string
#'
#' @return A factor with simple categories
#' @family likelihood functions
#' @export
#'
#' @examples
#' p <- seq(0, 1, length.out = 11)
#'
#' get_likelihood_category(p) %>%
#' order_likelihood_levels() %>%
#' simplify_likelihood_levels()
#'
simplify_likelihood_levels <- function(x){
y <- order_likelihood_levels(x)
levels_x <- levels(y)
# terms that only appear in the ipcc terms
ipcc_characteristic_terms <- likelihood_terms$ipcc[c(1, 2, 5, 8, 9)]
if (any(ipcc_characteristic_terms %in% levels_x)) {
# factor(x, levels = likelihood_terms$ipcc)
message("Simplification for ipcc likelihood scale has not yet been implemented")
y
} else {
statsnz_likelihood_terms <- likelihood_terms[-1]
statsnz_simple_likelihood_terms <- simple_likelihood_terms[-1]
new_levels_index <- statsnz_likelihood_terms %>%
# remove the Indeterminate level for the comparison as it appears everywhere
purrr::map_lgl(~ any(.[-3] %in% x)) %>%
which()
purrr::lift_dl(forcats::fct_collapse)(.f = y, statsnz_simple_likelihood_terms[[new_levels_index]])
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.