R/GroupSummaryWaterfall.R

Defines functions GroupSummaryWaterfall

#' Preparation data for waterfall plot
#'
#' This function creates a scalar variable summarizing the variable sum1 of a tbl
#' with groups group1, group2 and/or group 3 created by group_by
#'
#' First define the tbl filtered on the data choosen for the report
#' and add one, two or three grouping variables names. After grouping
#' on the variables create scalar variable summarizing the variable.
#'
#' @param data a tbl.
#' @param lump1 variable name of tbl to group on.
#' @param lump2 numeric variable where most common value is calculated.
#' @param number number of the most common n occurences. Default is 5
#' @param summary_vars variable name of tbl to create scalar variable
#' @return an object of the same class as data.
#'


GroupSummaryWaterfall <- function(data, lump1, lump2, number = 5, summary_vars) {
  # Quoting step: User is allowed to refer to data frame columns directly
	lump1 <- enquo(lump1)
  lump_name <- quo_name(lump1)
  lump2 <- enquo(lump2)
  
  # Detect and prefix unnamed arguments:
  unnamed <- names(summary_vars) == ""
  
  # Add the default names:
  summary_vars <- rlang::quos_auto_name(summary_vars)
  
  prefixed_nms <- paste0( names(summary_vars)[unnamed])
  names(summary_vars)[unnamed] <- prefixed_nms
  
  # Expand the argument _after_ giving the list its default names
  summary_vars <- purrr::map(summary_vars, function(var) {
  	expr(sum(!!var, na.rm = TRUE))
  })
 
  # Unquoting step with !! where variables are passed to quoting functions
  	# In order to get the most common 5 feautes arranged by value, the lumped 
  	# variable "others" has to be removed and added in a second step
  	z <- data %>%
    group_by(!!lump_name := fct_lump(!!lump1, n = number, w = abs(!!lump2))) %>%
  	summarise(!!!summary_vars) %>%
  	slice(1:number) %>%
  	arrange(desc(!!lump2)) 
  	
    data %>%
  	# add arranged 5 most features to lumped feature "others"
    	group_by(!!lump_name := fct_lump(!!lump1, n = number, w = !!lump2)) %>%
    	summarise(!!!summary_vars) %>%
  	slice(.,(number+1)) %>%
    rbind(z,.) %>%
    mutate(!!lump_name := as.character(!!lump1))%>%
    # # prepare tbl for plotting as waterfall plot
    mutate(id = seq_along(!!lump1)) %>%
    mutate(end = cumsum(!!lump2)) %>%
    mutate(start = c(0, head(end, -1)))
}
irisweyermenkhoff/toyota-idv-functions documentation built on March 4, 2020, 9:57 a.m.