#' @title Summary Statistics
#' @description Easy summary statistics. Hmisc didn't do it for me.
#' @param df data.table from which summary statistics will come
#' @param minFreq either a percentage or an integer. Minimum samples/percentage of df needed to show unique values
#' @param vars A vector of variables to show summary stats for
#' @param sumInChar Tack on a column that you would like to see the sum grouped by the character variables
#' @param avgInChar Tack on a column that you would like to see the average grouped by the character variables
#' @param decimals How many decimals to show
#' @return Frequency Encoded Object
#' @importFrom utils stack
#' @importFrom data.table data.table setnames :=
#' @export
summaryStats <- function(df, minFreq = 0.01, vars = NULL, sumInChar = NULL, avgInChar = NULL, decimals = 2) {
# df <- copy(RawQW)
# vars <- c("Homeowner","Gender","LicenseSuspended","Age","SourceName","ClickBid")
# var <- vars[[3]]
# var <- "InsuranceCarrier"
# sumInChar <- "PolicyBound"
if(is.null(vars)) vars <- names(df)
if(sum(is.na(match(vars,names(df)))) > 0) stop("Some vars not in df names")
if (minFreq < 1) minFreq <- round(nrow(df)*minFreq)
df <- copy(df)
for(var in vars) {
dc <- df[,get(var)]
cl <- class(dc)
if (cl %in% c("character")) {
ret <- data.table(stack(table(dc, useNA = "ifany")))
names(ret) <- c("Count",var)
ret <- ret[,c(2,1)]
if (!is.null(sumInChar)) ret <- merge(ret, df[,lapply(.SD,function(x) round(sum(x, na.rm = TRUE),decimals)),.SDcols = sumInChar, by = var], by = var)
if (!is.null(avgInChar)) ret <- merge(ret, df[,lapply(.SD,function(x) round(mean(x, na.rm = TRUE),decimals)),.SDcols = avgInChar, by = var], by = var)
ret <- ret[Count >= minFreq,][order(-Count)]
ns <- length(unique(dc))-nrow(ret)
if(nrow(ret) == 0){
cat("\n\n------------------------------------------------------------\n")
cat(var," - ",cl,"\n\n")
cat("Nothing to show here")
} else {
cat("\n\n------------------------------------------------------------\n")
cat(var," - ",cl,"\n\n")
print(ret)
if (ns > 0) cat("\nValues Not Shown: ",ns)
}
} else if(cl %in% c("integer","numeric")){ # Numeric Variables
cat("\n\n------------------------------------------------------------\n")
cat(var," - ",cl,"\n\n")
cat("Sum of Col: ", round(sum(dc, na.rm = TRUE), decimals),"\n")
cat("Average Value: ", round(mean(dc, na.rm = TRUE), decimals),"\n")
cat("Standard Deviation: ", round(var(dc, na.rm = TRUE), decimals),"\n")
cat("Missing Values: ", round(sum(is.na(dc)), decimals),"\n")
} else if(cl %in% c("Date")) {
cat("\n\n------------------------------------------------------------\n")
cat(var," - ",cl,"\n\n")
cat("Date Range:",format(max(dc) - min(dc)))
}
}
}
utils::globalVariables("Count")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.