#' Preparation data for waterfall plot
#'
#' This function creates a scalar variable summarizing the variable sum1 of a tbl
#' with groups group1, group2 and/or group 3 created by group_by
#'
#' First define the tbl filtered on the data choosen for the report
#' and add one, two or three grouping variables names. After grouping
#' on the variables create scalar variable summarizing the variable.
#'
#' @param data a tbl.
#' @param lump1 variable name of tbl to group on.
#' @param lump2 numeric variable where most common value is calculated.
#' @param number number of the most common n occurences. Default is 5
#' @param summary_vars variable name of tbl to create scalar variable
#' @return an object of the same class as data.
#'
GroupSummaryWaterfall <- function(data, lump1, lump2, number = 5, summary_vars) {
# Quoting step: User is allowed to refer to data frame columns directly
lump1 <- enquo(lump1)
lump_name <- quo_name(lump1)
lump2 <- enquo(lump2)
# Detect and prefix unnamed arguments:
unnamed <- names(summary_vars) == ""
# Add the default names:
summary_vars <- rlang::quos_auto_name(summary_vars)
prefixed_nms <- paste0( names(summary_vars)[unnamed])
names(summary_vars)[unnamed] <- prefixed_nms
# Expand the argument _after_ giving the list its default names
summary_vars <- purrr::map(summary_vars, function(var) {
expr(sum(!!var, na.rm = TRUE))
})
# Unquoting step with !! where variables are passed to quoting functions
# In order to get the most common 5 feautes arranged by value, the lumped
# variable "others" has to be removed and added in a second step
z <- data %>%
group_by(!!lump_name := fct_lump(!!lump1, n = number, w = abs(!!lump2))) %>%
summarise(!!!summary_vars) %>%
slice(1:number) %>%
arrange(desc(!!lump2))
data %>%
# add arranged 5 most features to lumped feature "others"
group_by(!!lump_name := fct_lump(!!lump1, n = number, w = !!lump2)) %>%
summarise(!!!summary_vars) %>%
slice(.,(number+1)) %>%
rbind(z,.) %>%
mutate(!!lump_name := as.character(!!lump1))%>%
# # prepare tbl for plotting as waterfall plot
mutate(id = seq_along(!!lump1)) %>%
mutate(end = cumsum(!!lump2)) %>%
mutate(start = c(0, head(end, -1)))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.