R/tbl_chart.R

Defines functions tbl_chart

Documented in tbl_chart

#' Table for ggplot2 Charting
#'
#' @description Creates a summarized data frame out of a raw dataframe ready for ggplot2 charting.
#'
#' @param df A raw data frame that includes a subgroup variable, a variable to be tested, and optionally a weight variable.
#' @param x A factor or numeric variable name to be charted, in quotes.
#' @param subgroup A factor variable name that subgroups cases, in quotes.
#' @param weight Optional. A post-stratification weight variable name, in quotes.
#' @param nlabels A logical argument. Default is FALSE. If TRUE, the subgroup levels will be changed to include the unweighted sample sizes ("subgroup (n=xx)").
#' @param newline A logical argument. Default is FALSE. If TRUE, the unweighted sample sizes will be shown on a separate line.
#'
#' @return A summary data frame ready for ggplot2 charting that provides each group's sample sizes and their proportions for each level of the variable (if the variable is a factor) or their means (if the variable is a numeric).
#'
#' @export
#'
#' @examples
#' #Prepping the data first
#' gss_data1 <- dplyr::filter(gss_data, year == "2016",
#'                                       conlegis %in% c("A GREAT DEAL", "ONLY SOME", "HARDLY ANY"))
#'
#' #Factor variable with weights
#' tbl_chart(gss_data1, "conlegis", "region", "wtssall")
#'
#' #Factor variable without weights
#' tbl_chart(gss_data1, "conlegis", "region")
#'
#' #Numeric variable with weights
#' tbl_chart(gss_data1, "coninc", "region", "wtssall")
#'
#' #Numeric variable without weights
#' tbl_chart(gss_data1, "coninc", "region")

tbl_chart <- function(df, x, subgroup, weight = NULL, nlabels = FALSE, newline = FALSE) {

	#Sanity checks

	if (!base::is.data.frame(df)) {
		stop("First argument must be a data frame. The object provided is of class: ", base::class(df)[1])
	}

	if (!base::is.factor(df[[x]]) & !base::is.numeric(df[[x]])) {
		stop("Testing variable ", x, " must be a factor or a numeric vector. The object provided is of class: ", base::class(df[[x]])[1])
	}

	if (base::sum(base::names(df) %in% x) == 0) {
		stop("Testing variable ", x, " not found in data frame ", df, ".")
	}

	if (base::sum(base::names(df) %in% subgroup) == 0) {
		stop("Subgroup variable ", subgroup, " not found in data frame ", df, ".")
	}

	if (!base::is.factor(df[[subgroup]])) {
		stop("Subgroup variable ", subgroup, " must be a factor vector. The object provided is of class: ", base::class(df[[subgroup]])[1])
	}

	if (base::is.null(weight)) {
		df[["weight"]] <- base::rep(1, base::nrow(df))
		vars <- base::c(x, subgroup, "weight")
		weight <- "weight"
	} else {

		if (base::sum(base::names(df) %in% weight) == 0) {
			stop("Weight variable ", weight, " not found in data frame.")
		}

		vars <- base::c(x, subgroup, weight)
	}

	if (!base::is.numeric(df[[weight]])) {
		stop("Weight variable ", weight, " must be a numeric vector. The object provided is of class: ", base::class(df[[weight]])[1])
	}

	df_test <- df[vars]
	df_test <- df_test[stats::complete.cases(df_test), ]

	if (!is.logical(nlabels)) {
		stop("Argument ", nlabels, " must be either TRUE or FALSE.")
	}

	df_test[[subgroup]] <- base::droplevels(df_test[[subgroup]])

	if (base::is.factor(df_test[[x]])) {

		base::levels(df_test[[subgroup]]) <- base::gsub(x = base::levels(df_test[[subgroup]]), pattern = " ", replacement = "_") #Avoid whitespace and dots that can be an issue with reshape.

		df_dummy <- df_test[x]
		df_dummy_matrix <- stats::model.matrix(~ . - 1, data = df_dummy)  #Applying dummy variables
		df_dummy_matrix <- base::cbind(df_test[[weight]], df_dummy_matrix)
		df_dummy_matrix[, -1] <- df_dummy_matrix[, 1] * df_dummy_matrix[, -1] #Applying weights
		df_dummy_matrix <- df_dummy_matrix[, -1]
		df_test <- base::cbind(df_test, df_dummy_matrix)
		df_test_summ <- stats::aggregate(. ~ df_test[[subgroup]], df_test, base::sum)
		df_test_summ <- df_test_summ[!base::names(df_test_summ) %in% base::c(x, subgroup)]
		base::names(df_test_summ)[1] <- subgroup
		df_test_summ_n <- stats::aggregate(df_test[[x]] ~ df_test[[subgroup]], df_test, base::length)
		base::names(df_test_summ_n)[2] <- "Sample_Size"

		for (i in 3:base::ncol(df_test_summ)) {
			df_test_summ[base::ncol(df_test_summ) + 1] <- df_test_summ[i] / df_test_summ[[weight]] #Apply proportions for each level
			base::names(df_test_summ)[base::ncol(df_test_summ)] <- base::paste0(base::names(df_test_summ[i]), "_prop")
		}

		df_test_summ_subgroup <- df_test_summ[, subgroup]
		df_test_summ_prop <- df_test_summ[, grep("_prop", base::names(df_test_summ))]
		df_test_summ <- base::cbind(df_test_summ_subgroup, df_test_summ_prop, df_test_summ_n[2])
		names(df_test_summ) <- base::c(subgroup, base::levels(df_test[[x]]), "Sample_Size")

		result <- stats::reshape(df_test_summ, varying = base::levels(df_test[[x]]), v.names = "prop", timevar = x, direction = "long", times = base::levels(df_test[[x]]))
		result <- result[, -base::which(base::names(result) == "id")]
		result[[x]] <- base::factor(result[[x]], levels = base::levels(df_test[[x]]))
		base::levels(result[[subgroup]]) <- base::gsub(x = base::levels(result[[subgroup]]), pattern = "_", replacement = " ")
	} else {
		df_test_summ <- Hmisc::summarize(base::cbind(df_test[[x]], df_test[[weight]]), Hmisc::llist(df_test[[subgroup]]), function(y) Hmisc::wtd.mean(y[, 1], y[, 2]), stat.name = "wtd.mean")
		base::names(df_test_summ)[1] <- subgroup
		df_test_summ_n <- stats::aggregate(df_test[[x]] ~ df_test[[subgroup]], df_test, base::length)
		base::names(df_test_summ_n)[2] <- "Sample_Size"
		result <- base::cbind(df_test_summ, df_test_summ_n[2])
	}

	base::rownames(result) <- NULL

	if (nlabels == TRUE) {
		LEV <- base::levels(result[[subgroup]])

		if (newline == TRUE) {
			result[[subgroup]] <- base::paste0(result[[subgroup]], "\n(n=", base::format(result$Sample_Size, big.mark = ",", scientific = FALSE, trim = TRUE), ")")
		} else {
			result[[subgroup]] <- base::paste0(result[[subgroup]], " (n=", base::format(result$Sample_Size, big.mark = ",", scientific = FALSE, trim = TRUE), ")")
		}

		New_element <- base::unique(result[[subgroup]])
		index <- base::pmatch(LEV, New_element)

		result[[subgroup]] <- base::factor(result[[subgroup]], levels = New_element[index])
	}

	result <- result[, base::c((1:base::ncol(result))[-base::which(base::names(result) == "Sample_Size")], base::which(base::names(result) == "Sample_Size"))]

	return(result)
}
philstraforelli/ggsigmark documentation built on May 20, 2019, 1:59 p.m.