R/extract_distribution.R

Defines functions extract_distribution

#' Group attributes by given score in numbers
#'
#' Get the distribution of numbers for an attribute in the score classes and plot
#' the distribution. To generate the plot, this function uses the 
#' \code{\link{plot_barplot}} function.
#' 
#' The score value for an attribute (e.g. car age) is divided in defined intervals.
#' This function obtains the distribution of observations within score values for 
#' the chosen attributes.
#'  
#'
#' @param df  A data frame
#' @param vec A vector with attribute names
#' @return A data frame with attribute name, score, numbers and distribution.
#'    Additionaly. the distribution is given as a bar plot.
#' 
#' @author C. Sahin
#' @note Version 0, Creation 22.10.2019
#' 
#' @examples
#' df  <- res_data
#' vec <- c("age_score", "car_age_score")
#' extract_number(df, vec)
#' @export


extract_distribution <- function(.df_in, .vec_names){
  
  # create defined data frame structure to pass results 
  .df_out <- data.frame(matrix(NA, nrow = 0, ncol=4))
  colnames(.df_out) <- c('attribute', 'group', 'n', 'share_pct')
  
  # get total number of observation
  total_numbers <- dim(.df_in)[1]

  # retrieve score, numbers and distributions per attribute
  for (val in .vec_names){
    .newDF <- .df_in %>% group_by(!!rlang::sym(val)) %>% summarise(n=n())
    .newDF <- cbind(.newDF, distribution = .newDF %>% pull('n') / total_numbers)
    .newDF <- add_column(.newDF, 'attribute'= val, .before = 1)
    colnames(.newDF) <- c('attribute', 'group', 'n', 'share_pct')
    
    # pass values to write in a csv file
    .df_out <- rbind(.df_out, .newDF)
  }
  return(.df_out)
}
irisweyermenkhoff/toyota-idv-functions documentation built on March 4, 2020, 9:57 a.m.