ff_summ_percentiles <- function(df = iris, bl_statsasrows = TRUE, col2varname = FALSE) {
#' Summarize each variable in a dataset
#'
#' @description
#' Generate similar statistics as what is generated by distributional statistics
#' calculator from dynamic asset webpage's distributional codes: \url{https://fanwangecon.github.io/CodeDynaAsset/}
#'
#' @param df dataframe input dataframe of interest
#' @param col2varname boolean if true drop var names
#' @param bl_statsasrows boolean if true then rotate table
#' @return a dataframe with summary statistics.
#' @author Fan Wang, \url{http://fanwangecon.github.io}
#' @references
#' \url{https://fanwangecon.github.io/REconTools/reference/ff_summ_percentiles.html}
#' \url{https://github.com/FanWangEcon/REconTools/blob/master/R/ff_summ_percentiles.R}
#' @export
#' @import dplyr tidyr tibble
#' @examples
#' ff_summ_percentiles(iris)
#' ff_summ_percentiles(iris, FALSE)
# The code only works with numeric columns, so select
df <- df %>% select_if(is.numeric)
# if there are variables with underscore in names, replace by dot
names(df) <- gsub("_", ".", names(df))
# compute relevant statistics
tb_summ_stats <- df %>% ungroup() %>%
summarise_if(
is.numeric, funs(
n = n(), unique = length(unique(.)),
NAobs = sum(is.na(.) == 1), ZEROobs = sum(. == 0),
mean = mean(., na.rm = TRUE), min = min(., na.rm = TRUE), max = max(., na.rm = TRUE),
sd = sd(., na.rm = TRUE), cv = sd(., na.rm = TRUE)/mean(., na.rm = TRUE),
p01 = quantile(., probs = c(0.01), na.rm = TRUE),
p05 = quantile(., probs = c(0.05), na.rm = TRUE),
p10 = quantile(., probs = c(0.1), na.rm = TRUE),
p25 = quantile(., probs = c(0.25), na.rm = TRUE),
p50 = quantile(., probs = c(0.5), na.rm = TRUE),
p75 = quantile(., probs = c(0.75), na.rm = TRUE),
p90 = quantile(., probs = c(0.9), na.rm = TRUE),
p95 = quantile(., probs = c(0.95), na.rm = TRUE),
p99 = quantile(., probs = c(0.99), na.rm = TRUE)))
# Summ stats to tibble and reshape
tb_summ_stats <- as.tibble(tb_summ_stats) %>%
gather(variable, value) %>%
separate(variable, c("var", "stats"), sep = "_") %>%
spread(stats, value) %>%
select(var, n, unique, NAobs, ZEROobs, mean, sd, cv, min, p01, p05, p10, p25, p50, p75, p90, p95, p99, max)
# first column to row names, not encouraged in tibble
if (col2varname) {
tb_summ_stats <- column_to_rownames(tb_summ_stats, var = "var")
}
# Show stats as rows and variables as columns
if (bl_statsasrows) {
tb_summ_stats <- as_tibble(cbind(nms = names(tb_summ_stats), t(tb_summ_stats)))
names(tb_summ_stats) <- tb_summ_stats %>% slice(1) %>% unlist()
tb_summ_stats <- tb_summ_stats %>% slice(-1)
tb_summ_stats <- tb_summ_stats %>% rename(stats = var)
}
return(tb_summ_stats)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.