R/tbl_sig.R

Defines functions tbl_sig

Documented in tbl_sig

#' Create a data frame for geom_sigmark charting
#'
#' @description Create a data frame for charting from the \link[ggsigmark]{freq_t_test} and \link[ggsigmark]{freq_prop_test} outputs
#' @param tbl_df A df object created from either the \link[ggsigmark]{freq_t_test} or the \link[ggsigmark]{freq_prop_test} functions.
#' @param subgroup A factor variable name that subgroups cases, in quotes.
#' @param space_label A numeric value indicating how much to offset the icons from a \link[ggplot2]{geom_text} label. This doesn't need a value if `compare` is set to "w2w" or "curr". It is recommended to be 0.3 for proportions, but trial and error may be necessary. It definitely is for means.
#' @param space_between A numeric value indicating how much to offset the icons from each other. A value is needed if `compare` is set to "total", but can otherwise be ignored. It is recommended to be 0.1 for proportions, but trial and error may be necessary. It definitely is for means.
#' @param compare A string value indicating how the result of significant differences will be displayed. This is expanded in the Details section.
#'
#' @details
#' \itemize{
#'   \item `all`: The default setting. All significant differences will be displayed. This is to be used when comparing across subgroups.
#'   \item `total`: This setting is when you wish to compare a particular subgroup to the rest of the sample.
#'   \item `w2w`: This setting is for tracking waves in which we are only showing those significant differences from one wave to the next.
#'   \item `curr`: This setting is for tracking waves in which we are only showing those past waves that are significantly different from the current wave.
#'}
#'
#' @export
#'
#' @examples
#' ### With proportions:
#' gss_data1 <- dplyr::filter(gss_data, year == "2016",
#'                                       conlegis %in% c("A GREAT DEAL", "ONLY SOME", "HARDLY ANY"))
#'
#' my_results <- freq_prop_test(gss_data1, "conlegis", "region", weight = "wtssall")
#' tbl_sig(my_results, "region", space_label = 0.1, space_between = 0.2)
#'
#' ### With means:
#' gss_data1 <- dplyr::filter(gss_data, year == "2016", coninc > 0)
#'
#' my_results <- freq_t_test(gss_data1, "coninc", "region", weight = "wtssnr")
#' tbl_sig(my_results, "region", space_label = 1, space_between = 4)

tbl_sig <- function(tbl_df, subgroup, space_label = NULL, space_between = NULL, compare = "all") {

	if (compare == "all" & (base::is.null(space_label) | base::is.null(space_between))) {
		stop("Please ensure a value is provided to both space_label and space_between.")
	}

	##new in v0.0.2
	if (compare == "total" & base::is.null(space_label)) {
		stop("Please ensure a value is provided to space_label.")
	}

	if (compare == "total" & base::sum(base::grepl("Rest of Sample", tbl_df$group1), base::grepl("Rest of Sample", tbl_df$group2)) == 0) {
		stop("Are you sure you want to use 'total'? It doesn't appear that a level was specified to test to the rest of the sample.")
	}

	if (base::sum(base::names(tbl_df) == "level") > 0) { #For proportions
		tbl_sig <- tbl_df[base::which(tbl_df$significant == TRUE), ]
		##new in v0.0.2
		if (compare == "total") {
			tbl_sig[["group"]] <- base::ifelse(!base::grepl("Rest of Sample", tbl_sig$group1), tbl_sig$group1, tbl_sig$group2)
			tbl_sig[["sign"]] <- base::ifelse(base::grepl("Rest of Sample", tbl_sig$group1), tbl_sig$group1, tbl_sig$group2)
			tbl_sig[["group_prop"]] <- base::ifelse(!base::grepl("Rest of Sample", tbl_sig$group1), tbl_sig$prop_group1, tbl_sig$prop_group2)
			tbl_sig[["sign_prop"]] <- base::ifelse(base::grepl("Rest of Sample", tbl_sig$group1), tbl_sig$prop_group1, tbl_sig$prop_group2)
			tbl_sig[["group_Sample_Size"]] <- base::ifelse(!base::grepl("Rest of Sample", tbl_sig$group1), tbl_sig$Sample_Size_group1, tbl_sig$Sample_Size_group2)
			tbl_sig[["sign_Sample_Size"]] <- base::ifelse(base::grepl("Rest of Sample", tbl_sig$group1), tbl_sig$Sample_Size_group1, tbl_sig$Sample_Size_group2)
		} else {
			##new in v0.0.2
			tbl_sig[["group"]] <- base::ifelse(tbl_sig$prop_group1 > tbl_sig$prop_group2, tbl_sig$group1, tbl_sig$group2)
			tbl_sig[["sign"]] <- base::ifelse(tbl_sig$prop_group1 < tbl_sig$prop_group2, tbl_sig$group1, tbl_sig$group2)
			tbl_sig[["group_prop"]] <- base::ifelse(tbl_sig$prop_group1 > tbl_sig$prop_group2, tbl_sig$prop_group1, tbl_sig$prop_group2)
			tbl_sig[["sign_prop"]] <- base::ifelse(tbl_sig$prop_group1 < tbl_sig$prop_group2, tbl_sig$prop_group1, tbl_sig$prop_group2)
			tbl_sig[["group_Sample_Size"]] <- base::ifelse(tbl_sig$prop_group1 > tbl_sig$prop_group2, tbl_sig$Sample_Size_group1, tbl_sig$Sample_Size_group2)
			tbl_sig[["sign_Sample_Size"]] <- base::ifelse(tbl_sig$prop_group1 < tbl_sig$prop_group2, tbl_sig$Sample_Size_group1, tbl_sig$Sample_Size_group2)
			##new in v0.0.2
		}
		##new in v0.0.2
		tbl_sig[["group"]] <- base::factor(tbl_sig[["group"]], levels = 1:base::nlevels(tbl_df[["group1"]]), labels = base::levels(tbl_df[["group1"]]))
		tbl_sig[["sign"]] <- base::factor(tbl_sig[["sign"]], levels = 1:base::nlevels(tbl_df[["group1"]]), labels = base::levels(tbl_df[["group1"]]))
		tbl_sig <- tbl_sig[base::names(tbl_sig) %in% c("group", "sign", "level", "group_prop", "sign_prop", "group_Sample_Size", "sign_Sample_Size")]
		tbl_sig <- tbl_sig[base::order(tbl_sig$group, tbl_sig$sign, tbl_sig$level), ]
	} else if (base::sum(base::names(tbl_df) == "level") == 0) { #For numeric variables
		tbl_sig <- tbl_df[base::which(tbl_df$significant == TRUE), ]
		##new in v0.0.2
		if (compare == "total") {
			tbl_sig[["group"]] <- base::ifelse(!base::grepl("Rest of Sample", tbl_sig$group1), tbl_sig$group1, tbl_sig$group2)
			tbl_sig[["sign"]] <- base::ifelse(base::grepl("Rest of Sample", tbl_sig$group1), tbl_sig$group1, tbl_sig$group2)
			tbl_sig[["group_wtd.mean"]] <- base::ifelse(!base::grepl("Rest of Sample", tbl_sig$group1), tbl_sig$wtd.mean_group1, tbl_sig$wtd.mean_group2)
			tbl_sig[["sign_wtd.mean"]] <- base::ifelse(base::grepl("Rest of Sample", tbl_sig$group1), tbl_sig$wtd.mean_group1, tbl_sig$wtd.mean_group2)
			tbl_sig[["group_Sample_Size"]] <- base::ifelse(!base::grepl("Rest of Sample", tbl_sig$group1), tbl_sig$Sample_Size_group1, tbl_sig$Sample_Size_group2)
			tbl_sig[["sign_Sample_Size"]] <- base::ifelse(base::grepl("Rest of Sample", tbl_sig$group1), tbl_sig$Sample_Size_group1, tbl_sig$Sample_Size_group2)
			} else {
			##new in v0.0.2
			tbl_sig[["group"]] <- base::ifelse(tbl_sig$wtd.mean_group1 > tbl_sig$wtd.mean_group2, tbl_sig$group1, tbl_sig$group2)
			tbl_sig[["sign"]] <- base::ifelse(tbl_sig$wtd.mean_group1 < tbl_sig$wtd.mean_group2, tbl_sig$group1, tbl_sig$group2)
			tbl_sig[["group_wtd.mean"]] <- base::ifelse(tbl_sig$wtd.mean_group1 > tbl_sig$wtd.mean_group2, tbl_sig$wtd.mean_group1, tbl_sig$wtd.mean_group2)
			tbl_sig[["sign_wtd.mean"]] <- base::ifelse(tbl_sig$wtd.mean_group1 < tbl_sig$wtd.mean_group2, tbl_sig$wtd.mean_group1, tbl_sig$wtd.mean_group2)
			tbl_sig[["group_Sample_Size"]] <- base::ifelse(tbl_sig$wtd.mean_group1 > tbl_sig$wtd.mean_group2, tbl_sig$Sample_Size_group1, tbl_sig$Sample_Size_group2)
			tbl_sig[["sign_Sample_Size"]] <- base::ifelse(tbl_sig$wtd.mean_group1 < tbl_sig$wtd.mean_group2, tbl_sig$Sample_Size_group1, tbl_sig$Sample_Size_group2)
			##new in v0.0.2
		}
		##new in v0.0.2
		tbl_sig[["group"]] <- base::factor(tbl_sig[["group"]], levels = 1:base::nlevels(tbl_df[["group1"]]), labels = base::levels(tbl_df[["group1"]]))
		tbl_sig[["sign"]] <- base::factor(tbl_sig[["sign"]], levels = 1:base::nlevels(tbl_df[["group1"]]), labels = base::levels(tbl_df[["group1"]]))
		tbl_sig <- tbl_sig[base::names(tbl_sig) %in% c("group", "sign", "group_wtd.mean", "sign_wtd.mean", "group_Sample_Size", "sign_Sample_Size")]
		tbl_sig <- tbl_sig[base::order(tbl_sig$group, tbl_sig$sign), ]
	}

	if (compare == "all") {
		if (base::sum(base::names(tbl_df) == "level") > 0) {
			tbl_sig$pos <- tbl_sig$group_prop + space_label + ((stats::ave(base::seq_len(base::nrow(tbl_sig)), tbl_sig[c("group", "level")], FUN = base::seq_along) - 1) * space_between)
		} else if (base::sum(base::names(tbl_df) == "level") == 0) {
			tbl_sig$pos <- tbl_sig$group_wtd.mean + space_label + ((stats::ave(base::seq_len(base::nrow(tbl_sig)), tbl_sig["group"], FUN = base::seq_along) - 1) * space_between)
		}
	}

	##new in v0.0.2
	if (compare == "total") {
		if (base::sum(base::names(tbl_df) == "level") > 0) {
			tbl_sig$pos <- tbl_sig$group_prop + space_label
			tbl_sig$Direction <- base::ifelse(tbl_sig$group_prop > tbl_sig$sign_prop, "Higher", "Lower")
			tbl_sig$Direction <- base::factor(tbl_sig$Direction, levels = c("Higher", "Lower"))
		} else if (base::sum(base::names(tbl_df) == "level") == 0) {
			tbl_sig$pos <- tbl_sig$group_wtd.mean + space_label
			tbl_sig$Direction <- base::ifelse(tbl_sig$group_wtd.mean > tbl_sig$sign_wtd.mean, "Higher", "Lower")
			tbl_sig$Direction <- base::factor(tbl_sig$Direction, levels = c("Higher", "Lower"))
		}
	}
	##new in v0.0.2

	if (compare == "w2w") {
		tbl_sig <- tbl_sig[base::which(base::abs(base::as.numeric(tbl_sig$group) - base::as.numeric(tbl_sig$sign)) == 1), ]
		tbl_sig$Direction <- base::ifelse(base::as.numeric(tbl_sig$group) > base::as.numeric(tbl_sig$sign), "Increase", "Decrease")
		tbl_sig$Direction <- base::factor(tbl_sig$Direction, levels = c("Increase", "Decrease"))
		tbl_sig$wave_pos <- base::ifelse(base::as.numeric(tbl_sig$group) > base::as.numeric(tbl_sig$sign), tbl_sig$group, tbl_sig$sign)
		tbl_sig$wave_pos <- base::factor(tbl_sig$wave_pos, levels = 1:base::nlevels(tbl_sig[["group"]]), labels = base::levels(tbl_sig[["group"]]))
		if (base::sum(base::names(tbl_df) == "level") > 0) {
			tbl_sig$result_pos <- base::ifelse(base::as.numeric(tbl_sig$group) > base::as.numeric(tbl_sig$sign), tbl_sig$group_prop, tbl_sig$sign_prop)
		} else if (base::sum(base::names(tbl_df) == "level") == 0) {
			tbl_sig$result_pos <- base::ifelse(base::as.numeric(tbl_sig$group) > base::as.numeric(tbl_sig$sign), tbl_sig$group_wtd.mean, tbl_sig$sign_wtd.mean)
		}
	}

	if (compare == "curr") {
		tbl_sig <- tbl_sig[base::which(base::as.numeric(tbl_sig$group) == base::nlevels(tbl_sig$group) | base::as.numeric(tbl_sig$sign) == base::nlevels(tbl_sig$sign)), ]
		tbl_sig$Direction <- base::ifelse(base::as.numeric(tbl_sig$group) > base::as.numeric(tbl_sig$sign), "Increase", "Decrease")
		tbl_sig$Direction <- base::factor(tbl_sig$Direction, levels = c("Increase", "Decrease"))
		tbl_sig$wave_pos <- base::ifelse(base::as.numeric(tbl_sig$group) > base::as.numeric(tbl_sig$sign), tbl_sig$sign, tbl_sig$group)
		tbl_sig$wave_pos <- base::factor(tbl_sig$wave_pos, levels = 1:base::nlevels(tbl_sig[["group"]]), labels = base::levels(tbl_sig[["group"]]))
		if (base::sum(base::names(tbl_df) == "level") > 0) {
			tbl_sig$result_pos <- base::ifelse(base::as.numeric(tbl_sig$group) < base::as.numeric(tbl_sig$sign), tbl_sig$group_prop, tbl_sig$sign_prop)
		} else if (base::sum(base::names(tbl_df) == "level") == 0) {
			tbl_sig$result_pos <- base::ifelse(base::as.numeric(tbl_sig$group) < base::as.numeric(tbl_sig$sign), tbl_sig$group_wtd.mean, tbl_sig$sign_wtd.mean)
		}
	}

	base::colnames(tbl_sig)[base::which(base::colnames(tbl_sig) == "group")] <- subgroup
	base::rownames(tbl_sig) <- NULL

	return(tbl_sig)
}
philstraforelli/ggsigmark documentation built on May 20, 2019, 1:59 p.m.