#' Table for ggplot2 Charting
#'
#' @description Creates a summarized data frame out of a raw dataframe ready for ggplot2 charting.
#'
#' @param df A raw data frame that includes a subgroup variable, a variable to be tested, and optionally a weight variable.
#' @param x A factor or numeric variable name to be charted, in quotes.
#' @param subgroup A factor variable name that subgroups cases, in quotes.
#' @param weight Optional. A post-stratification weight variable name, in quotes.
#' @param nlabels A logical argument. Default is FALSE. If TRUE, the subgroup levels will be changed to include the unweighted sample sizes ("subgroup (n=xx)").
#' @param newline A logical argument. Default is FALSE. If TRUE, the unweighted sample sizes will be shown on a separate line.
#'
#' @return A summary data frame ready for ggplot2 charting that provides each group's sample sizes and their proportions for each level of the variable (if the variable is a factor) or their means (if the variable is a numeric).
#'
#' @export
#'
#' @examples
#' #Prepping the data first
#' gss_data1 <- dplyr::filter(gss_data, year == "2016",
#' conlegis %in% c("A GREAT DEAL", "ONLY SOME", "HARDLY ANY"))
#'
#' #Factor variable with weights
#' tbl_chart(gss_data1, "conlegis", "region", "wtssall")
#'
#' #Factor variable without weights
#' tbl_chart(gss_data1, "conlegis", "region")
#'
#' #Numeric variable with weights
#' tbl_chart(gss_data1, "coninc", "region", "wtssall")
#'
#' #Numeric variable without weights
#' tbl_chart(gss_data1, "coninc", "region")
tbl_chart <- function(df, x, subgroup, weight = NULL, nlabels = FALSE, newline = FALSE) {
#Sanity checks
if (!base::is.data.frame(df)) {
stop("First argument must be a data frame. The object provided is of class: ", base::class(df)[1])
}
if (!base::is.factor(df[[x]]) & !base::is.numeric(df[[x]])) {
stop("Testing variable ", x, " must be a factor or a numeric vector. The object provided is of class: ", base::class(df[[x]])[1])
}
if (base::sum(base::names(df) %in% x) == 0) {
stop("Testing variable ", x, " not found in data frame ", df, ".")
}
if (base::sum(base::names(df) %in% subgroup) == 0) {
stop("Subgroup variable ", subgroup, " not found in data frame ", df, ".")
}
if (!base::is.factor(df[[subgroup]])) {
stop("Subgroup variable ", subgroup, " must be a factor vector. The object provided is of class: ", base::class(df[[subgroup]])[1])
}
if (base::is.null(weight)) {
df[["weight"]] <- base::rep(1, base::nrow(df))
vars <- base::c(x, subgroup, "weight")
weight <- "weight"
} else {
if (base::sum(base::names(df) %in% weight) == 0) {
stop("Weight variable ", weight, " not found in data frame.")
}
vars <- base::c(x, subgroup, weight)
}
if (!base::is.numeric(df[[weight]])) {
stop("Weight variable ", weight, " must be a numeric vector. The object provided is of class: ", base::class(df[[weight]])[1])
}
df_test <- df[vars]
df_test <- df_test[stats::complete.cases(df_test), ]
if (!is.logical(nlabels)) {
stop("Argument ", nlabels, " must be either TRUE or FALSE.")
}
df_test[[subgroup]] <- base::droplevels(df_test[[subgroup]])
if (base::is.factor(df_test[[x]])) {
base::levels(df_test[[subgroup]]) <- base::gsub(x = base::levels(df_test[[subgroup]]), pattern = " ", replacement = "_") #Avoid whitespace and dots that can be an issue with reshape.
df_dummy <- df_test[x]
df_dummy_matrix <- stats::model.matrix(~ . - 1, data = df_dummy) #Applying dummy variables
df_dummy_matrix <- base::cbind(df_test[[weight]], df_dummy_matrix)
df_dummy_matrix[, -1] <- df_dummy_matrix[, 1] * df_dummy_matrix[, -1] #Applying weights
df_dummy_matrix <- df_dummy_matrix[, -1]
df_test <- base::cbind(df_test, df_dummy_matrix)
df_test_summ <- stats::aggregate(. ~ df_test[[subgroup]], df_test, base::sum)
df_test_summ <- df_test_summ[!base::names(df_test_summ) %in% base::c(x, subgroup)]
base::names(df_test_summ)[1] <- subgroup
df_test_summ_n <- stats::aggregate(df_test[[x]] ~ df_test[[subgroup]], df_test, base::length)
base::names(df_test_summ_n)[2] <- "Sample_Size"
for (i in 3:base::ncol(df_test_summ)) {
df_test_summ[base::ncol(df_test_summ) + 1] <- df_test_summ[i] / df_test_summ[[weight]] #Apply proportions for each level
base::names(df_test_summ)[base::ncol(df_test_summ)] <- base::paste0(base::names(df_test_summ[i]), "_prop")
}
df_test_summ_subgroup <- df_test_summ[, subgroup]
df_test_summ_prop <- df_test_summ[, grep("_prop", base::names(df_test_summ))]
df_test_summ <- base::cbind(df_test_summ_subgroup, df_test_summ_prop, df_test_summ_n[2])
names(df_test_summ) <- base::c(subgroup, base::levels(df_test[[x]]), "Sample_Size")
result <- stats::reshape(df_test_summ, varying = base::levels(df_test[[x]]), v.names = "prop", timevar = x, direction = "long", times = base::levels(df_test[[x]]))
result <- result[, -base::which(base::names(result) == "id")]
result[[x]] <- base::factor(result[[x]], levels = base::levels(df_test[[x]]))
base::levels(result[[subgroup]]) <- base::gsub(x = base::levels(result[[subgroup]]), pattern = "_", replacement = " ")
} else {
df_test_summ <- Hmisc::summarize(base::cbind(df_test[[x]], df_test[[weight]]), Hmisc::llist(df_test[[subgroup]]), function(y) Hmisc::wtd.mean(y[, 1], y[, 2]), stat.name = "wtd.mean")
base::names(df_test_summ)[1] <- subgroup
df_test_summ_n <- stats::aggregate(df_test[[x]] ~ df_test[[subgroup]], df_test, base::length)
base::names(df_test_summ_n)[2] <- "Sample_Size"
result <- base::cbind(df_test_summ, df_test_summ_n[2])
}
base::rownames(result) <- NULL
if (nlabels == TRUE) {
LEV <- base::levels(result[[subgroup]])
if (newline == TRUE) {
result[[subgroup]] <- base::paste0(result[[subgroup]], "\n(n=", base::format(result$Sample_Size, big.mark = ",", scientific = FALSE, trim = TRUE), ")")
} else {
result[[subgroup]] <- base::paste0(result[[subgroup]], " (n=", base::format(result$Sample_Size, big.mark = ",", scientific = FALSE, trim = TRUE), ")")
}
New_element <- base::unique(result[[subgroup]])
index <- base::pmatch(LEV, New_element)
result[[subgroup]] <- base::factor(result[[subgroup]], levels = New_element[index])
}
result <- result[, base::c((1:base::ncol(result))[-base::which(base::names(result) == "Sample_Size")], base::which(base::names(result) == "Sample_Size"))]
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.