R/spine_chart_utils.R

Defines functions round2 spine_rescaler create_datatable spine_data_check spine_preprocess

Documented in create_datatable round2 spine_data_check spine_preprocess spine_rescaler

#' Preprocess  data for spine chart
#'
#' Returns a data frame with the latest time period of data for each indicator
#' name.
#' @details This processing only takes place on the indicator field and the time
#'   period field provided. If the data contains multiple sexes or age groups
#'   for an indicator, make sure the indicator field reflects this.
#' @inheritParams area_profiles
#' @param indicator unquoted field name for indicators. This should be what is
#'   presented as the label for the final spine chart, hence should be unique
#'   for each vertabra. Be careful the indicator doesn't have sub-categories
#'   based on other fields, such as sex (male, female, persons) or age group
#' @param timeperiod_sortable unquoted field name containing the time period
#'   that is numeric and sortable, such that higher values are a later time
#'   period
#' @import dplyr
#' @return A processed data frame for latest time periods of given indicators
#' @export
spine_preprocess <- function(data, indicator, timeperiod_sortable) {
        # timeperiod_sortable <- enquo(timeperiod_sortable)
        # indicator <- enquo(indicator)
        data <- data %>%
                group_by({{ indicator }}) %>%
                filter({{ timeperiod_sortable }} ==
                               max({{ timeperiod_sortable }})) %>%
                ungroup()
}

#' Check function for multiple values for an area in an indicator for spine chart
#' @inheritParams area_profiles
spine_data_check <- function(data, indicator, area_code) {
        data <- data %>%
                count({{ indicator }}, {{ area_code }}) %>%
                filter(n > 1)
        if (nrow(data) > 0) {
                area <- data %>%
                        slice(1) %>%
                        pull({{ area_code }})
                indicatorname <- data %>%
                        slice(1) %>%
                        pull({{ indicator }}) %>%
                        as.character()
                message <- paste("Some areas have multiple values for an indicator. An example is",
                                 area,
                                 "for the indicator",
                                 indicatorname)
        } else {
                message <- NA
        }
        return(message)

}

#' Data table supporting information
#'
#' Returns a data frame containing the data that sits next to the spine chart
#' @inheritParams area_profiles
#' @param dps number of decimal places to use in the data table
#' @param header_width x dimension of chart to be used for normalising the arrow
#'   length when horizonal
#' @import dplyr
#' @importFrom tidyr pivot_wider
#' @importFrom rlang  .data as_name enquo `:=`
#' @importFrom scales comma
#' @return A data frame containing the information that sits alongside the spine
#'   chart
create_datatable <- function(data, indicator,
                             area_code, timeperiod,
                             trend,
                             count, value,
                             local_area_code,
                             median_line_area_code,
                             comparator_area_code,
                             dps = 1,
                             header_width,
                             horizontal_arrow_multiplier) {
        if (is.na(comparator_area_code)) {
                area_codes <- c(local_area_code, median_line_area_code)
        } else {
                area_codes <- c(local_area_code, median_line_area_code, comparator_area_code)
        }
        data_temp <- data %>%
                filter({{ area_code }} %in% area_codes) %>%
                mutate({{ area_code }} :=
                               case_when({{ area_code }} == local_area_code ~ "Area_value",
                                         {{ area_code }} == median_line_area_code ~ "Median_value",
                                         {{ area_code }} == comparator_area_code ~ "Comparator_value",
                                         TRUE ~ "Error"),
                       {{ value }} := as.character({{ value }}),
                       dps_2b_removed = {{ dps }},
                       {{ value }} := suppressWarnings(case_when(
                               is.na(as.numeric({{ value }})) ~ {{ value }},
                               TRUE ~ ifelse(
                                       !is.na(.data$dps_2b_removed), paste0("\'",
                                                                            format(comma(round2(as.numeric({{ value }}), dps),
                                                                                         accuracy = 1 / (10 ^ dps)),
                                                                                   nsmall = 1),
                                                                            "\'"),
                                       paste0("\'",
                                              formatC(as.numeric({{ value }}),
                                                      format = "f",
                                                      big.mark = ",",
                                                      drop0trailing = TRUE),
                                              "\'")
                                       )))) %>%
                select({{ indicator }}, {{ area_code }}, {{ timeperiod }}, {{ value }}) %>%
                tidyr::pivot_wider(names_from = {{ area_code }},
                                   values_from = {{ value }})
        data_count <- data %>%
                filter({{ area_code }} == local_area_code) %>%
                select({{ indicator }}, {{ count }}) %>%
                mutate({{ count }} :=
                               suppressWarnings(ifelse(is.na(as.integer({{ count }})),
                                                       {{ count }},
                                                       paste0("'",
                                                              scales::comma(round2(as.numeric({{ count }}), 0),
                                                                            accuracy = 1),
                                                              "'"))))
        if (is.character(rlang::eval_tidy(enquo(trend), data)) |
            is.factor(rlang::eval_tidy(enquo(trend), data))) {

                data_trend <- data %>%
                        filter(({{ area_code }}) == local_area_code) %>%
                        select({{ indicator }}, {{ trend }}) %>%
                        mutate(direction = case_when(
                                grepl("decreasing", tolower({{ trend }})) ~ pi,
                                grepl("increasing", tolower({{ trend }})) ~ 0,
                                grepl("no significant change", tolower({{ trend }})) ~ pi / 2,
                                TRUE ~ NA_real_),
                               trend_sig = case_when(
                                       grepl("better", tolower({{ trend }})) ~ "Better",
                                       grepl("worse", tolower({{ trend }})) ~ "Worse",
                                       grepl("no significant change", tolower({{ trend }})) ~ "Similar",
                                       grepl("increasing", tolower({{ trend }})) ~ "Higher",
                                       grepl("decreasing", tolower({{ trend }})) ~ "Lower",
                                       TRUE ~ "Not compared"),
                               radius = case_when(
                                       grepl("no significant change", tolower({{ trend }})) ~ 0.1 * header_width * horizontal_arrow_multiplier / (n() * 1.5),
                                       grepl("increasing|decreasing", tolower({{ trend }})) ~ 0.1,
                                       TRUE ~ NA_real_
                                       )) %>%
                        select(-{{ trend }})
        } else {
                data_trend <- data %>%
                        select({{ indicator }}) %>%
                        unique() %>%
                        mutate(direction = NA,
                               trend_sig = "",
                               radius = NA)

        }
        data_temp <- merge(data_temp, data_count,
                           by = rlang::as_name(rlang::enquo(indicator)),
                           all.x = TRUE) %>%
                merge(data_trend,
                      by = rlang::as_name(rlang::enquo(indicator)),
                      all.x = TRUE) %>%
                select({{ indicator }},
                       "direction",
                       "trend_sig",
                       {{ timeperiod }},
                       {{ count }},
                       everything())


        return(data_temp)
}


#' Rescale spine data
#'
#' Rescales data so it can be plotted on a spine chart
#' @return A list containing "bars" and "points" which contains data that can be
#'   passed to the phe_spine_chart function
#' @inheritParams area_profiles
#' @inheritParams create_datatable
#' @import dplyr
#' @importFrom tibble column_to_rownames rownames_to_column
#' @importFrom purrr map map_df pmap
#' @importFrom stringr str_trim str_locate str_extract
#' @importFrom tidyr pivot_longer
#' @importFrom stats quantile
#' @importFrom scales comma
#' @importFrom rlang as_name enquo .data `:=`
spine_rescaler <- function(data,
                           area_code,
                           indicator,
                           significance,
                           polarity,
                           area_type,
                           value,
                           timeperiod,
                           local_area_code,
                           median_line_area_code,
                           comparator_area_code = NA,
                           percent_display,
                           dps = 1) {
        # make sure value field is numeric and doesn't contain annotations (based on Health Profiles annotations)
        data <- data %>%
                mutate({{ value }} :=
                               suppressWarnings(as.character({{ value }})),
                       {{ value }} :=
                               ifelse(grepl("^[0-9]", {{ value }}),
                                      str_extract({{ value }}, "^\\d+\\.*\\d*"),
                                      NA),
                       {{ value }} :=
                               suppressWarnings(as.numeric({{ value }})))

        areatype <- filter(data, {{ area_code }} == local_area_code) %>%
                pull({{ area_type }}) %>%
                unique
        remove_data <- data %>%
                filter({{ area_type }} == areatype) %>%
                group_by({{ indicator }}) %>%
                summarise(percent_na = sum(is.na({{ value }})) / n()) %>%
                filter(.data$percent_na >= percent_display) %>%
                pull({{ indicator }})

        # convert indicators to remove to na
        if (length(remove_data) > 0) {
                data <- data %>%
                        mutate({{ value }} :=
                                       ifelse({{ indicator }} %in% remove_data,
                                              NA,
                                              {{ value }}))
        }

        create_point_data <- function(data, areacode){
                if (areacode %in% c(median_line_area_code, comparator_area_code)){
                        data <- data %>%
                                filter({{ area_code }} == areacode) %>%
                                select({{ indicator }}, {{ value }}) %>%
                                rename(regionalvalue = {{ value }})
                } else {
                        data <- data %>%
                                filter({{ area_code }} == areacode) %>%
                                select({{ indicator }}, {{ significance }},
                                       {{ polarity }}, {{ value }}) %>%
                                rename(areavalue = {{ value }},
                                       Significance = {{ significance }})
                }
                data <- data.frame(data)
        }
        if (!is.na(comparator_area_code))
                parentdata <- create_point_data(data, comparator_area_code)
        areadata <- create_point_data(data, local_area_code)
        mean <- create_point_data(data, median_line_area_code)
        data <- filter(data, {{ area_type }} == areatype)

        quantiles <- data %>%
                split(pull(data, {{ indicator }})) %>%
                purrr::map(rlang::as_name(rlang::enquo(value))) %>%
                map_df(quantile, na.rm = TRUE, .id = rlang::as_name(rlang::enquo(indicator))) %>%
                data.frame()

        names(quantiles)[-1] <- c(paste0("Q", 100 * seq(0, 1, by = 0.25)))
        quantiles[, "Q50"] <- NULL
        quantiles <- quantiles %>%
                merge(mean,
                      by = rlang::as_name(rlang::enquo(indicator)),
                      all.x = TRUE) %>%
                rename(mean = "regionalvalue")

        scaled_spine_inputs <- function(IndicatorName, Q0, Q25, mean, Q75, Q100, Significance, Polarity, areavalue, regionalvalue) {
                Polarity <- stringr::str_trim(Polarity)
                quantiles <- structure(as.numeric(c(Q0, Q25, mean, Q75, Q100)),
                                       names = c("0%", "25%", "mean", "75%", "100%"))
                areavalue <- as.numeric(areavalue)
                if (!is.na(regionalvalue))
                        regionalvalue <- as.numeric(regionalvalue)
                if (grepl("Low is good",Polarity)) {
                        quantiles <- rev(quantiles)
                }
                scale_min <- ifelse(quantiles["mean"] - quantiles["0%"] >
                                            quantiles["100%"] - quantiles["mean"],
                                    quantiles["0%"],
                                    quantiles["mean"] - (quantiles["100%"] - quantiles["mean"]))
                scale_max <- ifelse(scale_min == quantiles["0%"],
                                    quantiles["mean"] + (quantiles["mean"] - quantiles["0%"]),
                                    quantiles["100%"])

                rescale <- function(val){
                        rescale <- (val - scale_min) / (scale_max - scale_min)
                        return(rescale)
                }
                quantiles <- rescale(quantiles[names(quantiles) != "mean"])
                if (!is.na(regionalvalue)) {
                        pointdata <- rescale(c(areavalue,regionalvalue))
                        names(pointdata) <- c("area","region")
                } else {
                        pointdata <- rescale(areavalue)
                        names(pointdata) <- "area"
                }

                if (grepl("Low is good",Polarity)) {
                        quantiles <- 1 - quantiles
                        quantiles <- diff(c(0,quantiles))
                        pointdata <- 1 - pointdata
                } else {
                        quantiles <- diff(c(0,quantiles))
                }

                graphpoints <- c("Worst","Q25","Q75","Best")
                scaled_spine_inputs <- list(bars = tibble({{ indicator }} := IndicatorName,
                                                          quantiles = quantiles,
                                                          GraphPoint = factor(graphpoints, levels = rev(graphpoints))),
                                            points = tibble({{ indicator }} := IndicatorName,
                                                            significance = Significance,
                                                            area = pointdata[1],
                                                            region = pointdata[2]))
        }

        dfgraph <- merge(quantiles, areadata,
                         by = rlang::as_name(rlang::enquo(indicator)),
                         all.x = TRUE)
        if (!is.na(comparator_area_code)) {
                dfgraph <- dfgraph %>%
                        merge(parentdata,
                              by = rlang::as_name(rlang::enquo(indicator)),
                              all.x =TRUE)
        } else {
                dfgraph <- dfgraph %>%
                        mutate(regionalvalue = NA)
        }

        dfgraph <- dfgraph %>%
                rename(IndicatorName = {{ indicator }},
                       Polarity = {{ polarity }}) %>%
                lapply(purrr::map, .f = as.character) %>%
                pmap(scaled_spine_inputs)
        dfgraphfinal <- list(bars = suppressWarnings(map_df(dfgraph, "bars")),
                             points = suppressWarnings(map_df(dfgraph, "points")))
        dfpolarity <- areadata %>%
                select({{ indicator }}, {{ polarity }})
        dfannotate <- quantiles %>%
                merge(dfpolarity,
                      by = rlang::as_name(rlang::enquo(indicator)),
                      all.x =TRUE) %>%
                mutate(reverse = ifelse(grepl("Low is good", {{ polarity }}),
                                        TRUE,
                                        FALSE),
                       Worst = ifelse(.data$reverse == TRUE, .data$Q100, .data$Q0),
                       Best = ifelse(.data$reverse == TRUE, .data$Q0, .data$Q100)) %>%
                select({{ indicator }}, "Best", "Worst") %>%
                tidyr::pivot_longer(names_to = "GraphPoint",
                                    values_to = "label",
                                    cols = "Best":"Worst") %>%
                mutate(y = ifelse(.data$GraphPoint == "Best", 1.05, -0.05),
                       dps_2b_removed = dps,
                       label = ifelse(is.na(.data$label),
                                      NA,
                                      ifelse(!is.na(.data$dps_2b_removed),
                                             format(comma(round2(as.numeric(.data$label), dps),
                                                          accuracy = 1 / (10 ^ dps)), nsmall = 1),
                                             formatC(as.numeric(.data$label),
                                                     format = "f",
                                                     big.mark = ",",
                                                     drop0trailing = TRUE))),
                       GraphPoint = factor(.data$GraphPoint, levels = c("Best","Q75","Q25","Worst"))) %>%
                select(!c("dps_2b_removed"))

        timeperiod <- data %>%
                select({{ indicator }}, {{ timeperiod }}) %>%
                ungroup %>%
                unique %>%
                mutate({{ indicator }} := as.character({{ indicator }}))

        areadata <- areadata %>%
                select({{ indicator }}, "areavalue")
        mean <- mean %>%
                rename(England = "regionalvalue")

        if (!is.na(comparator_area_code))
                mean <- merge(parentdata, mean,
                              by = rlang::as_name(rlang::enquo(indicator)),
                              all =TRUE)
        dfannotatepoints <- merge(mean, areadata,
                                  by = rlang::as_name(rlang::enquo(indicator)),
                                  all =TRUE) %>%
                merge(timeperiod,
                      by = rlang::as_name(rlang::enquo(indicator)),
                      all = TRUE) %>%
                mutate(England = as.character(.data$England),
                       England = case_when(
                        is.na(as.numeric(.data$England)) ~ .data$England,
                        TRUE ~ format(round2(as.numeric(.data$England), dps), nsmall = 1)),
                       areavalue = as.character(.data$areavalue),
                       areavalue = case_when(
                               is.na(as.numeric(.data$areavalue)) ~ .data$areavalue,
                               TRUE ~ format(round2(as.numeric(.data$areavalue), dps), nsmall = 1)))
        if (!is.na(comparator_area_code))
                dfannotatepoints <- dfannotatepoints %>%
                mutate(regionalvalue = as.character(.data$regionalvalue),
                       regionalvalue = case_when(
                        is.na(as.numeric(.data$regionalvalue)) ~ .data$regionalvalue,
                        TRUE ~ format(round2(as.numeric(.data$regionalvalue), dps), nsmall = 1)))

        dfgraphfinal$bars <- merge(dfgraphfinal$bars,
                                       dfannotate,
                                       by = c(rlang::as_name(rlang::enquo(indicator)), "GraphPoint"),
                                       all.x =TRUE)
        dfgraphfinal$points <- merge(dfgraphfinal$points,
                                         dfannotatepoints,
                                         by = rlang::as_name(rlang::enquo(indicator)),
                                         all.x =TRUE) %>%
                rename({{ significance }} := significance)
        if (is.na(comparator_area_code)) dfgraphfinal$points$region <- NULL
        return(dfgraphfinal)
}

#' Proper rounding of values
#' @param val numeric value to round
#' @param dps numeric, number of decimal places
#' @details function taken from this link (\url{https://stackoverflow.com/questions/12688717/round-up-from-5})
round2 <- function(val, dps) {
        posneg = sign(val)
        z = abs(val)*10^dps
        z = z + 0.5
        z = trunc(z)
        z = z/10^dps
        z*posneg
}
PublicHealthEngland/fingertipscharts documentation built on Sept. 21, 2023, 10:31 p.m.