#' Group attributes by given score in numbers
#'
#' Get the distribution of numbers for an attribute in the score classes and plot
#' the distribution. To generate the plot, this function uses the
#' \code{\link{plot_barplot}} function.
#'
#' The score value for an attribute (e.g. car age) is divided in defined intervals.
#' This function obtains the distribution of observations within score values for
#' the chosen attributes.
#'
#'
#' @param df A data frame
#' @param vec A vector with attribute names
#' @return A data frame with attribute name, score, numbers and distribution.
#' Additionaly. the distribution is given as a bar plot.
#'
#' @author C. Sahin
#' @note Version 0, Creation 22.10.2019
#'
#' @examples
#' df <- res_data
#' vec <- c("age_score", "car_age_score")
#' extract_number(df, vec)
#' @export
extract_distribution <- function(.df_in, .vec_names){
# create defined data frame structure to pass results
.df_out <- data.frame(matrix(NA, nrow = 0, ncol=4))
colnames(.df_out) <- c('attribute', 'group', 'n', 'share_pct')
# get total number of observation
total_numbers <- dim(.df_in)[1]
# retrieve score, numbers and distributions per attribute
for (val in .vec_names){
.newDF <- .df_in %>% group_by(!!rlang::sym(val)) %>% summarise(n=n())
.newDF <- cbind(.newDF, distribution = .newDF %>% pull('n') / total_numbers)
.newDF <- add_column(.newDF, 'attribute'= val, .before = 1)
colnames(.newDF) <- c('attribute', 'group', 'n', 'share_pct')
# pass values to write in a csv file
.df_out <- rbind(.df_out, .newDF)
}
return(.df_out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.