Nothing
#' Frequency Tables with Percentage and Odds Ratios
#'
#' A method for making 1- and 2-way frequency tables with percentages and odds
#' ratios.
#'
#' @param normaldata A data frame or data frame extension (e.g. a tibble).
#' @param var1 A character string naming the first variable to get frequencies.
#' @param var2 An optional character naming the second variable to get
#' frequencies. If `NULL` (standard) a 1-way frequency table of only `var1` is
#' created, and if `var2` is specified a 2-way table is returned.
#' @param by_vars An optional character vector naming variables in `normal_data`
#' to stratify the calculations and output by. That is, ALL calculations will
#' be made within the combinations of variables in the vector, hence it's
#' possible to get N and % for many groups in one go.
#' @param include_NA A logical. If `FALSE` (standard) missing variables (`NA`'s)
#' will be removed from `var1` and `var2`. Any missing values in `by_vars`
#' will not be removed. If `TRUE` all missing values will be included in
#' calculations and the output.
#' @param values_to_remove An optional character vector. When specified all
#' values from `var1` and `var2` found in `values_to_remove` will be removed
#' from the calculations and output.
#' @param weightvar An optional character naming a column in `normaldata` with
#' numeric weights for each observation. If `NULL` (standard) all observations
#' have weight 1.
#' @param textvar An optional character. When specified `textvar` is added to
#' the resulting table as a comment. When `NULL` (standard) no such text
#' addition is made.
#' @param number_decimals A numeric indicating the number of decimals to show on
#' percentages and weighted frequencies in the combined frequency and percent
#' variables.
#' @param output A character indicating the output type wanted:
#' * `"all"` - will give ALL output from tables. In many cases unnecessary and
#' hard to get an overview of. This is set as the standard.
#' * `"numeric"` - will give frequencies and percents as numeric variables
#' only, thus the number_decimals option is not in effect. This option might
#' be useful when making figures/graphs.
#' * `"col"` - will only give unweighted number of observations and weighted
#' column percent (if weights are used, otherwise unweighted)
#' * `"colw"` - will only give weighted number of observations and weighted
#' column percent (if weights are used, otherwise unweighted)
#' * `"row"`- will only give unweighted number of observations and weighted
#' row percent (if weights are used, otherwise unweighted). Only works in
#' two-way tables (`var2` is specified)
#' * `"roww"` - will only give weighted number of oberservations and weighted
#' column percent (if weights are used, otherwise unweighted). Only works in
#' two-way tables (`var2` is specified)
#' * `"total"` - will only give unweighted number of observations and
#' weighted percent of the total (if weights are used, otherwise unweighted).
#' Only works in two-way tables (`var2` is specified)
#' * `"totalw"` - will only give weighted number of observations and
#' weighted percent of the total (if weights are used, otherwise unweighted).
#' Only works in two-way tables (`var2` is specified)
#' * Any other text will give the default ("all")
#' @param chisquare A logical. `FALSE` (standard) will not calculate p-value for
#' the chi-square test for two-way tables (`var2` is specified). If `TRUE`,
#' the table will include the chi-square p-value as well as the chi-square
#' statistic and the corresponding degrees of freedom. It will be included in
#' the output whichever output option have been specified. No chi-square test
#' is performed or included in one-way tables (`var2` is unspecified)
#'
#' @return A frequency table as a data frame object.
#'
#' @author ASO
#'
#' @seealso [freq_function_repeated()] to to get frequencies for multiple
#' variables in one go.
#'
#' @examples
#' data("starwars", package = "dplyr")
#'
#' test_table1 <- freq_function(
#' starwars,
#' var1 = "homeworld"
#' )
#'
#' test_table2 <- freq_function(
#' starwars,
#' var1 = "sex",
#' var2 = "eye_color",
#' output = "total"
#' )
#'
#' test_table3 <- freq_function(
#' starwars,
#' var1 = "hair_color",
#' var2 = "skin_color",
#' by_vars = "gender",
#' output = "col",
#' number_decimals = 5
#' )
#'
#' @export
freq_function <- function(
normaldata,
var1,
var2 = NULL,
by_vars = NULL,
include_NA = FALSE,
values_to_remove = NULL,
weightvar = NULL,
textvar = NULL,
number_decimals = 2,
output = c("all", "numeric", "col", "colw", "row", "roww", "total", "totalw"),
chisquare = FALSE
) {
### Begin input checks
if (missing(normaldata)) {
stop("'normaldata' must be a data frame.")
}
if (!inherits(normaldata, "data.frame")) {
stop("'normaldata' must be a data frame.")
}
if (missing(var1) || !(is.character(var1) && length(var1) == 1)) {
stop(
"'var1' must be a length 1 character vector naming ",
"the first variable to get frequencies."
)
}
if (!(is.null(var2) || (is.character(var2) && length(var2) == 1))) {
stop(
"'var2' must be NULL or optionally a length 1 character ",
"vector naming the second\nvariable to get frequencies."
)
}
if (!is.null(by_vars)) {
if (!inherits(by_vars, "character")) {
stop(
"by_vars must be NULL or optionally a character vector ",
"naming variables in normal_data\n to stratify the ",
"calculations and output by."
)
} else if (!all(by_vars %in% names(normaldata))) {
stop(
glue::glue(
"by_vars must name variables in normaldata.\n",
"The following are not names of normaldata columns:\n",
"{paste(by_vars[!(by_vars %in% names(starwars))], collapse = ', ')}"
)
)
}
}
if (!(isTRUE(include_NA) || isFALSE(include_NA))) {
stop("'include_NA' must be a boolean.")
}
if (!is.null(values_to_remove)) {
if (!inherits(values_to_remove, "character")) {
stop(
"'values_to_remove' must be NULL or optionally a character vector."
)
}
}
if (!is.null(weightvar)) {
# The final weightvar check returns a warning if some weights in the column
# specified by weightvar are negative or NA. This is done because these
# observations are removed before creating the frequency tables - so it
# will not result in an error - but the removal of these observations are
# likely unexpected to the user. As a help, affected rows are printed.
# To improve the readability of the warning, concecutive rows are grouped
# together.
if (!inherits(weightvar, "character")) {
stop(
"weightvar must be NULL or optionally a character of length 1 ",
"naming a column\nin `normaldata` with numeric weights ",
"for each observation."
)
} else if (!(length(weightvar == 1) && weightvar %in% names(normaldata))) {
stop(
"weightvar must name a single column in 'normaldata' with numeric",
"weights\nfor each observation."
)
} else if (!is.numeric(normaldata[[weightvar]])) {
stop(
"The column named in weightvar must contain numeric weights",
"for each observation."
)
}
if (any(normaldata[[weightvar]] <= 0 | is.na(normaldata[[weightvar]]))) {
warnindex <- which(
normaldata[[weightvar]] <= 0 |
is.na(normaldata[[weightvar]])
)
init <- TRUE
warntext <- ""
for (i in seq_len(length(warnindex) - 1)) {
if (init) {
warntext <- paste0(warntext, warnindex[i])
if (warnindex[i + 1] == warnindex[i] + 1) {
init <- FALSE
} else {
warntext <- paste0(warntext, ", ")
}
} else if (warnindex[i + 1] != warnindex[i] + 1) {
warntext <- paste0(warntext, ":", warnindex[i], ", ")
init <- TRUE
}
}
paste0(warntext, warnindex[length(warnindex)])
warning(glue::glue(
"Non-positive weights or NAs detected in row(s)\n",
"{warntext}\n",
"Rows with non-positive weights or NAs are removed."
))
}
}
if (!(is.null(textvar) || inherits(textvar, "character"))) {
stop("When 'textvar' is specified it must be a character.")
}
if (!inherits(number_decimals, "numeric")) {
stop("'number_decimals' must be a non-negative integer.")
}
if (number_decimals < 0) {
stop("'number_decimals' must be a non-negative integer.")
}
output <- match.arg(output)
if (!(isTRUE(chisquare) || isFALSE(chisquare))) {
stop("'chisquare' must be a boolean.")
}
# End input checks
# Select only mentioned variables from called data (not using specifications)
func_table <- normaldata |>
dplyr::select(
dplyr::all_of(var1),
dplyr::all_of(var2),
dplyr::all_of(weightvar),
dplyr::all_of(by_vars)
)
# Encoding for weight variable - unless a precalculated weight should be used,
# the weight will be 1 for all observations
if (!is.null(weightvar)) {
func_table <- func_table |>
dplyr::mutate("weight_used" = .data[[weightvar]]) |>
dplyr::filter(.data$weight_used > 0)
}
else {
func_table <- dplyr::mutate(func_table, weight_used = as.numeric(1))
}
# Keeping/removing missing (=NA) in the data depending on user specification
if (include_NA == FALSE & is.null(var2)) {
func_table <- dplyr::filter(func_table, !is.na(.data[[var1]]))
} else if (include_NA == FALSE & !is.null(var2)) {
func_table <-
dplyr::filter(
func_table,
!is.na(.data[[var1]]) & !is.na(.data[[var2]])
)
}
# Remove specified values (if any) from the values used in tables before
# calculations (by-variables NOT influenced)
if (is.null(var2)) {
func_table <- func_table |>
dplyr::filter(!.data[[var1]] %in% values_to_remove)
} else {
func_table <- func_table |>
dplyr::filter(
!.data[[var1]] %in% values_to_remove &
!.data[[var2]] %in% values_to_remove
)
}
# Change all factor variables in data to character variables
# (basically it will be by-variables who are changed)
func_table <- func_table |>
dplyr::mutate(dplyr::across(dplyr::where(is.factor), as.character))
# Making summations and calculations (n, % etc.)
if (is.null(by_vars) && is.null(var2)) {
# Getting unweighted and weighted n for each group
func_table <- func_table |>
dplyr::summarise(
n = dplyr::n(),
n_weighted = sum(.data$weight_used, na.rm = TRUE),
.by = dplyr::all_of(var1)
) |>
dplyr::mutate(
Func_var = var1
) |>
dplyr::select("Func_var", dplyr::all_of(var1), "n", "n_weighted") |>
# add summation within variables
(\(x) {
y <- x |>
dplyr::summarise(
n_total = sum(.data$n),
n_weighted_total = sum(.data$n_weighted, na.rm = TRUE)
) |>
dplyr::mutate(
Func_var = var1,
"{var1}" := "Total"
) |>
dplyr::select(
"Func_var",
dplyr::all_of(var1),
"n" = "n_total",
"n_weighted" = "n_weighted_total"
)
multi_join(
dplyr::bind_rows(x, y),
y,
.by = "Func_var"
)
})() |>
dplyr::mutate(
Column_pct = ((.data$n_weighted.1 / .data$n_weighted.2) * 100),
Freq_col_pct = paste0(
.data$n.1,
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Column_pct),
"%)"
),
Freqw_col_pct = paste0(
sprintf(paste0("%.",number_decimals,"f"), .data$n_weighted.1),
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Column_pct),
"%)"
),
"{var1}" := .data[[glue::glue("{var1}.1")]],
N = .data$n.1,
N_weighted = .data$n_weighted.1,
) |>
dplyr::select(
dplyr::all_of(var1),
"N",
"N_weighted",
"Column_pct",
"Freq_col_pct",
"Freqw_col_pct"
)
if (output == "numeric") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("Freq")
)
} else if (output == "col") {
func_table <- dplyr::select(
func_table,
-"N",
-"N_weighted",
-"Column_pct",
-"Freqw_col_pct"
)
} else if (output == "colw") {
func_table <- dplyr::select(
func_table,
-"N",
-"N_weighted",
-"Column_pct",
-"Freq_col_pct"
)
}
# Adding text-variable IF text have been added to textvar
# at function call + return final table
if (!is.null(textvar)) {
# Adding text-variable
func_table <- func_table |>
dplyr::mutate(Description = textvar) |>
dplyr::relocate("Description")
}
return(func_table)
} else if (is.null(var2)) {
# Getting unweighted and weighted n for each group
func_table <- func_table |>
dplyr::summarise(
n = dplyr::n(),
n_weighted = sum(.data$weight_used, na.rm = TRUE),
.by = c(dplyr::all_of(by_vars), dplyr::all_of(var1))
) |>
dplyr::mutate(
Func_var = var1
) |>
dplyr::select(
dplyr::all_of(by_vars),
"Func_var",
dplyr::all_of(var1),
"n",
"n_weighted"
) |>
# Summation within variables
(\(x) {
y <- x |>
dplyr::summarise(
n_total = sum(.data$n),
n_weighted_total = sum(.data$n_weighted, na.rm = TRUE),
.by = dplyr::all_of(by_vars)
) |>
dplyr::mutate(
Func_var = var1,
"{var1}" := "Total",
n = .data$n_total,
n_weighted = .data$n_weighted_total
) |>
dplyr::select(
dplyr::all_of(by_vars),
"Func_var",
dplyr::all_of(var1),
"n",
"n_weighted"
)
multi_join(
dplyr::bind_rows(x, y),
y,
.by = c(by_vars, "Func_var")
)
})() |>
dplyr::mutate(
Column_pct = ((.data$n_weighted.1 / .data$n_weighted.2) * 100),
Freq_col_pct = paste0(
.data$n.1,
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Column_pct),
"%)"
),
Freqw_col_pct = paste0(
sprintf(paste0("%.",number_decimals,"f"), .data$n_weighted.1),
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Column_pct),
"%)"
),
"{var1}" := .data[[glue::glue("{var1}.1")]],
N = .data$n.1,
N_weighted = .data$n_weighted.1
) |>
dplyr::select(
dplyr::all_of(by_vars),
dplyr::all_of(var1),
"N",
"N_weighted",
"Column_pct",
"Freq_col_pct",
"Freqw_col_pct"
) |>
dplyr::arrange(
dplyr::across(c(dplyr::all_of(by_vars), dplyr::all_of(var1)))
)
if (output == "numeric") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("Freq")
)
} else if (output == "col") {
func_table <- dplyr::select(
func_table,
-"N",
-"N_weighted",
-"Column_pct",
-"Freqw_col_pct"
)
} else if (output == "colw") {
func_table <- dplyr::select(
func_table,
-"N",
-"N_weighted",
-"Column_pct",
-"Freq_col_pct"
)
}
# Adding text if textvar is used + return final table
if (!is.null(textvar)) {
func_table <- func_table |>
dplyr::mutate(Description = textvar) |>
dplyr::relocate("Description")
}
return(func_table)
}
else if (is.null(by_vars)) {
if (chisquare == TRUE) {
# Degrees of freedom, chi-square test
chi_degree1 <- length(unique(func_table[[var1]]))
chi_degree2 <- length(unique(func_table[[var2]]))
chi_degree_total <- (chi_degree1 - 1) * (chi_degree2 - 1)
chi_degree_freedom <- as.data.frame(chi_degree_total)
}
# Getting unweighted and weighted n for each combination
func_table <- func_table |>
dplyr::summarise(
n = dplyr::n(),
n_weighted = sum(.data$weight_used, na.rm = TRUE),
.by = c(dplyr::all_of(var1), dplyr::all_of(var2))
) |>
dplyr::mutate(
Func_var1 = var1,
Func_var2 = var2
) |>
dplyr::select(
"Func_var1",
"Func_var2",
dplyr::all_of(var1),
dplyr::all_of(var2),
"n",
"n_weighted"
) |>
(\(x) {
multi_join(
# Summation within variable1
x |>
dplyr::summarise(
n = sum(.data$n),
n_weighted = sum(.data$n_weighted, na.rm = TRUE),
.by = dplyr::all_of(var1)
) |>
dplyr::mutate(
Func_var1 = var1,
"{var2}" := "Total"
) |>
dplyr::select(
"Func_var1",
dplyr::all_of(var1),
dplyr::all_of(var2),
"n",
"n_weighted"
),
x,
.by = c("Func_var1", var1)
) |>
dplyr::select(
"Func_var1",
"Func_var2",
dplyr::all_of(var1),
"{var2}" := glue::glue("{var2}.2"),
"{var1}_level_total" := "n.1",
"{var1}_level_total_weighted" := "n_weighted.1",
"n" = "n.2",
"n_weighted" = "n_weighted.2"
) |>
multi_join(
# Summation totals
x |>
dplyr::summarise(
n = sum(.data$n),
n_weighted = sum(.data$n_weighted, na.rm = TRUE)
) |>
dplyr::mutate(
Func_var1 = var1,
Func_var2 = var2,
Var_tot = "Total"
) |>
dplyr::select(
"Func_var1",
"Func_var2",
"{var1}" := "Var_tot",
"{var2}" := "Var_tot",
"n",
"n_weighted"
),
.by = c("Func_var1","Func_var2")
) |>
dplyr::select(
"Func_var1",
"Func_var2",
"{var1}" := glue::glue("{var1}.1"),
"{var2}" := glue::glue("{var2}.1"),
"Total_n" = "n.2",
"Total_n_weighted" = "n_weighted.2",
glue::glue("{var1}_level_total"),
glue::glue("{var1}_level_total_weighted"),
"n" = "n.1",
"n_weighted" = "n_weighted.1"
) |>
multi_join(
# Summation within variable2
x |>
dplyr::summarise(
n = sum(.data$n),
n_weighted = sum(.data$n_weighted, na.rm = TRUE),
.by = dplyr::all_of(var2)
) |>
dplyr::mutate(
Func_var2 = var2,
"{var1}" := "Total"
) |>
dplyr::select(
"Func_var2",
dplyr::all_of(var1),
dplyr::all_of(var2),
"n",
"n_weighted"
),
.by = c("Func_var2", var2)
) |>
dplyr::select(
"Func_var1",
"Func_var2",
"{var1}" := glue::glue("{var1}.1"),
dplyr::all_of(var2),
"Total_n",
"Total_n_weighted",
glue::glue("{var1}_level_total"),
glue::glue("{var1}_level_total_weighted"),
"{var2}_level_total" := "n.2",
"{var2}_level_total_weighted" := "n_weighted.2",
"n" = "n.1",
"n_weighted" = "n_weighted.1"
)
})()
if (chisquare == TRUE) {
# Calculating expected numbers for each cell
chi_table <- func_table |>
dplyr::mutate(
expected =
(.data[[glue::glue("{var1}_level_total_weighted")]] /
.data$Total_n_weighted) *
.data[[glue::glue("{var2}_level_total_weighted")]],
observed = .data$n_weighted,
cell_chi = (((.data$observed - .data$expected)^2) / .data$expected)
) |>
dplyr::summarise(cell_chi_total = sum(.data$cell_chi, na.rm = TRUE))
}
func_table <-
list(
# Percent calculations for each Var1 and Var2 combination
var1_var2_comb = func_table |>
dplyr::mutate(
Total_pct = (
(.data$n_weighted / .data$Total_n_weighted) * 100
),
Row_pct = (
(.data$n_weighted /
.data[[glue::glue("{var1}_level_total_weighted")]]) * 100
),
Column_pct = (
(.data$n_weighted /
.data[[glue::glue("{var2}_level_total_weighted")]]) * 100
)
) |>
dplyr::select(
"Func_var1",
"Func_var2",
dplyr::all_of(var1),
dplyr::all_of(var2),
"N" = "n",
"Weighted_N" = "n_weighted",
"Total_pct",
"Row_pct",
"Column_pct"
),
# Percent calculations for each Var1 row totals (sum over var2)
var1_comb = func_table |>
dplyr::mutate(
"{var2}" := "Total",
n = .data[[glue::glue("{var1}_level_total")]],
Total_pct = (
(.data[[glue::glue("{var1}_level_total_weighted")]] /
.data$Total_n_weighted) * 100
),
Column_pct = (
(.data[[glue::glue("{var1}_level_total_weighted")]] /
.data$Total_n_weighted) * 100
),
Row_pct = (
(.data[[glue::glue("{var1}_level_total_weighted")]] /
.data[[glue::glue("{var1}_level_total_weighted")]]) * 100
)
) |>
dplyr::select(
"Func_var1",
"Func_var2",
dplyr::all_of(var1),
dplyr::all_of(var2),
"N" = "n",
"Weighted_N" = glue::glue("{var1}_level_total_weighted"),
"Total_pct",
"Row_pct",
"Column_pct"
) |>
dplyr::distinct(),
# Percent calculations for each Var2 row totals (sum over var1)
var2_comb = func_table |>
dplyr::mutate(
"{var1}" := "Total",
n = .data[[glue::glue("{var2}_level_total")]],
Total_pct =
(.data[[glue::glue("{var2}_level_total_weighted")]] /
.data$Total_n_weighted) * 100,
Column_pct =
(.data[[glue::glue("{var2}_level_total_weighted")]] /
.data[[glue::glue("{var2}_level_total_weighted")]]) * 100,
Row_pct =
(.data[[glue::glue("{var2}_level_total_weighted")]] /
.data$Total_n_weighted) * 100
) |>
dplyr::select(
"Func_var1",
"Func_var2",
dplyr::all_of(var1),
dplyr::all_of(var2),
"N" = "n",
"Weighted_N" = glue::glue("{var2}_level_total_weighted"),
"Total_pct",
"Row_pct",
"Column_pct"
) |>
dplyr::distinct(),
# Percent calculations for total (complete total)
tot_comb = func_table |>
dplyr::mutate(
tot_var = "Total",
n = .data$Total_n,
Total_pct = ((.data$Total_n_weighted / .data$Total_n_weighted) * 100),
Column_pct = ((.data$Total_n_weighted / .data$Total_n_weighted) * 100),
Row_pct = ((.data$Total_n_weighted / .data$Total_n_weighted) * 100)
) |>
dplyr::select(
"Func_var1",
"Func_var2",
"{var1}" := "tot_var",
"{var2}" := "tot_var",
"N" = "n",
"Weighted_N" = "Total_n_weighted",
"Total_pct",
"Row_pct",
"Column_pct"
) |>
dplyr::distinct()
) |>
(\(x) {
dplyr::bind_rows(x[[1]], x[[2]], x[[3]], x[[4]])
})() |>
dplyr::mutate(
Freq_col_pct = paste0(
.data$N,
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Column_pct),
"%)"
),
Freqw_col_pct = paste0(
sprintf(paste0("%.",number_decimals,"f"), .data$Weighted_N),
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Column_pct),
"%)"
),
Freq_row_pct = paste0(
.data$N,
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Row_pct),
"%)"
),
Freqw_row_pct = paste0(
sprintf(paste0("%.",number_decimals,"f"), .data$Weighted_N),
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Row_pct),
"%)"
),
Freq_total_pct = paste0(
.data$N,
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Total_pct),
"%)"
),
Freqw_total_pct = paste0(
sprintf(paste0("%.",number_decimals,"f"), .data$Weighted_N),
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Total_pct),
"%)"
)
) |>
dplyr::select(
"Func_var2",
dplyr::all_of(var1),
dplyr::all_of(var2),
"N",
"Weighted_N",
"Row_pct",
"Column_pct",
"Total_pct",
"Freq_total_pct",
"Freqw_total_pct",
"Freq_row_pct",
"Freqw_row_pct",
"Freq_col_pct",
"Freqw_col_pct"
)
# Change rotation on table, so the rows and columns are easier to follow
func_table <- dplyr::select(
func_table,
-"Freq_total_pct",
-"Freqw_total_pct",
-"Freq_row_pct",
-"Freqw_row_pct",
-"Freq_col_pct",
-"Freqw_col_pct"
) |>
tidyr::pivot_wider(
names_from = c("Func_var2", dplyr::all_of(var2)),
values_from = c(
"N",
"Weighted_N",
"Row_pct",
"Column_pct",
"Total_pct"
),
values_fill = 0
) |>
dplyr::full_join(
dplyr::select(
func_table,
-"N",
-"Weighted_N",
-"Row_pct",
-"Column_pct",
-"Total_pct"
) |>
tidyr::pivot_wider(
names_from = c("Func_var2", dplyr::all_of(var2)),
values_from = c(
"Freq_total_pct",
"Freqw_total_pct",
"Freq_row_pct",
"Freqw_row_pct",
"Freq_col_pct",
"Freqw_col_pct"
),
values_fill = "0 (0%)"
),
by = var1
)
if (chisquare == TRUE) {
# Degrees of freedom, chi-square test
chi_p_prp <- dplyr::bind_cols(chi_table, chi_degree_freedom)
chi_p <- chi_p_prp |>
dplyr::rowwise() |>
dplyr::mutate(
p_value = pchisq(
.data$cell_chi_total,
df = .data$chi_degree_total,
lower.tail = FALSE
)
) |>
dplyr::ungroup() |>
dplyr::mutate("{var1}" := "Total")
func_table <- dplyr::full_join(func_table, chi_p, by = var1)
}
if (output == "numeric") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("Freq")
)
} else if (output == "col") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("N_"),
-dplyr::starts_with("Weighted_N_"),
-dplyr::starts_with("Row_pct_"),
-dplyr::starts_with("Column_pct_"),
-dplyr::starts_with("Total_pct_"),
-dplyr::starts_with("Freq_total_"),
-dplyr::starts_with("Freqw_total_"),
-dplyr::starts_with("Freq_row_"),
-dplyr::starts_with("Freqw_row_"),
-dplyr::starts_with("Freqw_col_")
)
} else if (output == "colw") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("N_"),
-dplyr::starts_with("Weighted_N_"),
-dplyr::starts_with("Row_pct_"),
-dplyr::starts_with("Column_pct_"),
-dplyr::starts_with("Total_pct_"),
-dplyr::starts_with("Freq_total_"),
-dplyr::starts_with("Freqw_total_"),
-dplyr::starts_with("Freq_row_"),
-dplyr::starts_with("Freqw_row_"),
-dplyr::starts_with("Freq_col_")
)
} else if (output == "row") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("N_"),
-dplyr::starts_with("Weighted_N_"),
-dplyr::starts_with("Row_pct_"),
-dplyr::starts_with("Column_pct_"),
-dplyr::starts_with("Total_pct_"),
-dplyr::starts_with("Freq_total_"),
-dplyr::starts_with("Freqw_total_"),
-dplyr::starts_with("Freqw_row_"),
-dplyr::starts_with("Freq_col_"),
-dplyr::starts_with("Freqw_col_")
)
} else if (output == "roww") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("N_"),
-dplyr::starts_with("Weighted_N_"),
-dplyr::starts_with("Row_pct_"),
-dplyr::starts_with("Column_pct_"),
-dplyr::starts_with("Total_pct_"),
-dplyr::starts_with("Freq_total_"),
-dplyr::starts_with("Freqw_total_"),
-dplyr::starts_with("Freq_row_"),
-dplyr::starts_with("Freq_col_"),
-dplyr::starts_with("Freqw_col_")
)
} else if (output == "total") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("N_"),
-dplyr::starts_with("Weighted_N_"),
-dplyr::starts_with("Row_pct_"),
-dplyr::starts_with("Column_pct_"),
-dplyr::starts_with("Total_pct_"),
-dplyr::starts_with("Freqw_total_"),
-dplyr::starts_with("Freq_row_"),
-dplyr::starts_with("Freqw_row_"),
-dplyr::starts_with("Freq_col_"),
-dplyr::starts_with("Freqw_col_")
)
} else if (output == "totalw") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("N_"),
-dplyr::starts_with("Weighted_N_"),
-dplyr::starts_with("Row_pct_"),
-dplyr::starts_with("Column_pct_"),
-dplyr::starts_with("Total_pct_"),
-dplyr::starts_with("Freq_total_"),
-dplyr::starts_with("Freq_row_"),
-dplyr::starts_with("Freqw_row_"),
-dplyr::starts_with("Freq_col_"),
-dplyr::starts_with("Freqw_col_")
)
}
if (!is.null(textvar)) {
# Adding text-variable
func_table <- func_table |>
dplyr::mutate(Description = textvar) |>
dplyr::relocate("Description")
}
return(func_table)
} else {
if (chisquare == TRUE) {
# Degrees of freedom, chi-square test
chi_degree1 <- func_table |>
dplyr::summarise(
chi_degree1 = dplyr::n_distinct(.data[[var1]]),
.by = dplyr::all_of(by_vars)
)
chi_degree2 <- func_table |>
dplyr::summarise(
chi_degree2 = dplyr::n_distinct(.data[[var2]]),
.by = dplyr::all_of(by_vars)
)
chi_degree_freedom <- dplyr::full_join(
chi_degree1,
chi_degree2,
by = by_vars
) |>
dplyr::mutate(
chi_degree_total = ((.data$chi_degree1 - 1) * (.data$chi_degree2 - 1))
) |>
dplyr::select(-"chi_degree1", -"chi_degree2")
}
# Getting unweighted and weighted n for each combination
func_table <- func_table |>
dplyr::summarise(
n = dplyr::n(),
n_weighted = sum(.data$weight_used, na.rm = TRUE),
.by = c(dplyr::all_of(by_vars), dplyr::all_of(var1), dplyr::all_of(var2))
) |>
dplyr::mutate(
Func_var1 = var1,
Func_var2 = var2
) |>
dplyr::select(
dplyr::all_of(by_vars),
"Func_var1",
"Func_var2",
dplyr::all_of(var1),
dplyr::all_of(var2),
"n",
"n_weighted"
) |>
(\(x) {
multi_join(
# Summation within variable1
x |>
dplyr::summarise(
n = sum(.data$n),
n_weighted = sum(.data$n_weighted, na.rm = TRUE),
.by = c(dplyr::all_of(by_vars), dplyr::all_of(var1))
) |>
dplyr::mutate(
Func_var1 = var1,
"{var2}" := "Total"
) |>
dplyr::select(
dplyr::all_of(by_vars),
"Func_var1",
dplyr::all_of(var1),
dplyr::all_of(var2),
"n",
"n_weighted"
),
x,
.by = c(by_vars, "Func_var1", var1)
) |>
dplyr::select(
dplyr::all_of(by_vars),
"Func_var1",
"Func_var2",
dplyr::all_of(var1),
"{var2}" := glue::glue("{var2}.2"),
"{var1}_level_total" := "n.1",
"{var1}_level_total_weighted" := "n_weighted.1",
"n" = "n.2",
"n_weighted" = "n_weighted.2"
) |>
multi_join(
# Summation totals
x |>
dplyr::summarise(
n = sum(.data$n),
n_weighted = sum(.data$n_weighted, na.rm = TRUE),
.by = dplyr::all_of(by_vars)
) |>
dplyr::mutate(
Func_var1 = var1,
Func_var2 = var2,
tot_var = "Total"
) |>
dplyr::select(
dplyr::all_of(by_vars),
"Func_var1",
"Func_var2",
"{var1}" := "tot_var",
"{var2}" := "tot_var",
"n",
"n_weighted"
),
.by = c(by_vars, "Func_var1","Func_var2")
) |>
dplyr::select(
dplyr::all_of(by_vars),
"Func_var1",
"Func_var2",
"{var1}" := glue::glue("{var1}.1"),
"{var2}" := glue::glue("{var2}.1"),
"Total_n" = "n.2",
"Total_n_weighted" = "n_weighted.2",
glue::glue("{var1}_level_total"),
glue::glue("{var1}_level_total_weighted"),
"n" = "n.1",
"n_weighted" = "n_weighted.1"
) |>
multi_join(
# Summation within variable2
x |>
dplyr::summarise(
n = sum(.data$n),
n_weighted = sum(.data$n_weighted, na.rm = TRUE),
.by = c(dplyr::all_of(by_vars), dplyr::all_of(var2))
) |>
dplyr::mutate(
Func_var2 = var2,
"{var1}" := "Total"
) |>
dplyr::select(
dplyr::all_of(by_vars),
"Func_var2",
dplyr::all_of(var1),
dplyr::all_of(var2),
"n",
"n_weighted"
),
.by = c(by_vars, "Func_var2", var2)
) |>
dplyr::select(
dplyr::all_of(by_vars),
"Func_var1",
"Func_var2",
"{var1}" := glue::glue("{var1}.1"),
dplyr::all_of(var2),
"Total_n",
"Total_n_weighted",
glue::glue("{var1}_level_total"),
glue::glue("{var1}_level_total_weighted"),
"{var2}_level_total" := "n.2",
"{var2}_level_total_weighted" := "n_weighted.2",
"n" = "n.1",
"n_weighted" = "n_weighted.1"
)
})()
if (chisquare == TRUE) {
# Calculating expected numbers for each cell
chi_table <- func_table |>
dplyr::mutate(
expected = (
(.data[[glue::glue("{var1}_level_total_weighted")]] / .data$Total_n_weighted) *
.data[[glue::glue("{var2}_level_total_weighted")]]
),
observed = .data$n_weighted,
cell_chi = (((.data$observed - .data$expected)^2) / .data$expected)
) |>
dplyr::summarise(
cell_chi_total = sum(.data$cell_chi, na.rm = TRUE),
.by = dplyr::all_of(by_vars)
)
}
func_table <- list(
# Percent calculations for each Var1 and Var2 combination
var1_var2_comb = func_table |>
dplyr::mutate(
Total_pct =
(.data$n_weighted / .data$Total_n_weighted) * 100,
Row_pct =
(.data$n_weighted /
.data[[glue::glue("{var1}_level_total_weighted")]]) * 100,
Column_pct =
(.data$n_weighted /
.data[[glue::glue("{var2}_level_total_weighted")]]) * 100
) |>
dplyr::select(
dplyr::all_of(by_vars),
"Func_var1",
"Func_var2",
dplyr::all_of(var1),
dplyr::all_of(var2),
"N" = "n",
"Weighted_N" = "n_weighted",
"Total_pct",
"Row_pct",
"Column_pct"
),
# Percent calculations for each Var1 row totals (sum over var2)
var1_comb = func_table |>
dplyr::mutate(
"{var2}" := "Total",
n = .data[[glue::glue("{var1}_level_total")]],
Total_pct =
(.data[[glue::glue("{var1}_level_total_weighted")]] /
.data$Total_n_weighted) * 100,
Column_pct =
(.data[[glue::glue("{var1}_level_total_weighted")]] /
.data$Total_n_weighted) * 100,
Row_pct =
(.data[[glue::glue("{var1}_level_total_weighted")]] /
.data[[glue::glue("{var1}_level_total_weighted")]]) * 100
) |>
dplyr::select(
dplyr::all_of(by_vars),
"Func_var1",
"Func_var2",
dplyr::all_of(var1),
dplyr::all_of(var2),
"N" = "n",
"Weighted_N" = glue::glue("{var1}_level_total_weighted"),
"Total_pct",
"Row_pct",
"Column_pct"
) |>
dplyr::distinct(),
# Percent calculations for each Var2 row totals (sum over var1)
var2_comb = func_table |>
dplyr::mutate(
"{var1}" := "Total",
n = .data[[glue::glue("{var2}_level_total")]],
Total_pct =
(.data[[glue::glue("{var2}_level_total_weighted")]] /
.data$Total_n_weighted) * 100,
Column_pct =
(.data[[glue::glue("{var2}_level_total_weighted")]] /
.data[[glue::glue("{var2}_level_total_weighted")]]) * 100,
Row_pct =
(.data[[glue::glue("{var2}_level_total_weighted")]] /
.data$Total_n_weighted) * 100
) |>
dplyr::select(
dplyr::all_of(by_vars),
"Func_var1",
"Func_var2",
dplyr::all_of(var1),
dplyr::all_of(var2),
"N" = "n",
"Weighted_N" = glue::glue("{var2}_level_total_weighted"),
"Total_pct",
"Row_pct",
"Column_pct"
) |>
dplyr::distinct(),
# Percent calculations for total (complete total)
tot_comb = func_table |>
dplyr::mutate(
tot_var = "Total",
n = .data$Total_n,
Total_pct = ((.data$Total_n_weighted / .data$Total_n_weighted) * 100),
Column_pct = ((.data$Total_n_weighted / .data$Total_n_weighted) * 100),
Row_pct = ((.data$Total_n_weighted / .data$Total_n_weighted) * 100)
) |>
dplyr::select(
dplyr::all_of(by_vars),
"Func_var1",
"Func_var2",
"{var1}" := "tot_var",
"{var2}" := "tot_var",
"N" = "n",
"Weighted_N" = "Total_n_weighted",
"Total_pct",
"Row_pct",
"Column_pct"
) |>
dplyr::distinct()
) |>
(\(x) {
dplyr::bind_rows(x[[1]], x[[2]], x[[3]], x[[4]])
})() |>
dplyr::mutate(
Freq_col_pct = paste0(
.data$N,
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Column_pct),
"%)"
),
Freqw_col_pct = paste0(
sprintf(paste0("%.",number_decimals,"f"), .data$Weighted_N),
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Column_pct),
"%)"
),
Freq_row_pct = paste0(
.data$N,
" (",sprintf(paste0("%.",number_decimals,"f"), .data$Row_pct),
"%)"
),
Freqw_row_pct = paste0(
sprintf(paste0("%.",number_decimals,"f"), .data$Weighted_N),
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Row_pct),
"%)"
),
Freq_total_pct = paste0(
.data$N,
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Total_pct),
"%)"
),
Freqw_total_pct = paste0(
sprintf(paste0("%.",number_decimals,"f"), .data$Weighted_N),
" (",
sprintf(paste0("%.",number_decimals,"f"), .data$Total_pct),
"%)"
)
) |>
dplyr::select(
dplyr::all_of(by_vars),
"Func_var2",
dplyr::all_of(var1),
dplyr::all_of(var2),
"N",
"Weighted_N",
"Row_pct",
"Column_pct",
"Total_pct",
"Freq_total_pct",
"Freqw_total_pct",
"Freq_row_pct",
"Freqw_row_pct",
"Freq_col_pct",
"Freqw_col_pct"
)
# Change rotation on table, so the rows and columns are easier to follow
func_table <- dplyr::select(
func_table,
-"Freq_total_pct",
-"Freqw_total_pct",
-"Freq_row_pct",
-"Freqw_row_pct",
-"Freq_col_pct",
-"Freqw_col_pct"
) |>
tidyr::pivot_wider(
names_from = c("Func_var2", dplyr::all_of(var2)),
values_from = c(
"N",
"Weighted_N",
"Row_pct",
"Column_pct",
"Total_pct"
),
values_fill = 0
) |>
dplyr::full_join(
dplyr::select(
func_table,
-"N",
-"Weighted_N",
-"Row_pct",
-"Column_pct",
-"Total_pct"
) |>
tidyr::pivot_wider(
names_from = c("Func_var2", dplyr::all_of(var2)),
values_from = c(
"Freq_total_pct",
"Freqw_total_pct",
"Freq_row_pct",
"Freqw_row_pct",
"Freq_col_pct",
"Freqw_col_pct"
),
values_fill = "0 (0%)"
),
by = c(by_vars , var1)
) |>
dplyr::arrange(dplyr::across(dplyr::all_of(by_vars)))
if (chisquare == TRUE) {
# Degrees of freedom, chi-square test
chi_p <- dplyr::full_join(
chi_table,
chi_degree_freedom,
by = by_vars
) |>
dplyr::rowwise() |>
dplyr::mutate(
p_value = pchisq(
.data$cell_chi_total,
df = .data$chi_degree_total,
lower.tail = FALSE
)
) |>
dplyr::ungroup() |>
dplyr::mutate("{var1}" := "Total")
func_table <- dplyr::full_join(
func_table,
chi_p,
by = c(by_vars, var1)
)
}
if (output == "numeric") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("Freq")
)
} else if (output == "col") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("N_"),
-dplyr::starts_with("Weighted_N_"),
-dplyr::starts_with("Row_pct_"),
-dplyr::starts_with("Column_pct_"),
-dplyr::starts_with("Total_pct_"),
-dplyr::starts_with("Freq_total_"),
-dplyr::starts_with("Freqw_total_"),
-dplyr::starts_with("Freq_row_"),
-dplyr::starts_with("Freqw_row_"),
-dplyr::starts_with("Freqw_col_")
)
} else if (output == "colw") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("N_"),
-dplyr::starts_with("Weighted_N_"),
-dplyr::starts_with("Row_pct_"),
-dplyr::starts_with("Column_pct_"),
-dplyr::starts_with("Total_pct_"),
-dplyr::starts_with("Freq_total_"),
-dplyr::starts_with("Freqw_total_"),
-dplyr::starts_with("Freq_row_"),
-dplyr::starts_with("Freqw_row_"),
-dplyr::starts_with("Freq_col_")
)
} else if (output == "row") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("N_"),
-dplyr::starts_with("Weighted_N_"),
-dplyr::starts_with("Row_pct_"),
-dplyr::starts_with("Column_pct_"),
-dplyr::starts_with("Total_pct_"),
-dplyr::starts_with("Freq_total_"),
-dplyr::starts_with("Freqw_total_"),
-dplyr::starts_with("Freqw_row_"),
-dplyr::starts_with("Freq_col_"),
-dplyr::starts_with("Freqw_col_")
)
} else if (output == "roww") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("N_"),
-dplyr::starts_with("Weighted_N_"),
-dplyr::starts_with("Row_pct_"),
-dplyr::starts_with("Column_pct_"),
-dplyr::starts_with("Total_pct_"),
-dplyr::starts_with("Freq_total_"),
-dplyr::starts_with("Freqw_total_"),
-dplyr::starts_with("Freq_row_"),
-dplyr::starts_with("Freq_col_"),
-dplyr::starts_with("Freqw_col_")
)
} else if (output == "total") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("N_"),
-dplyr::starts_with("Weighted_N_"),
-dplyr::starts_with("Row_pct_"),
-dplyr::starts_with("Column_pct_"),
-dplyr::starts_with("Total_pct_"),
-dplyr::starts_with("Freqw_total_"),
-dplyr::starts_with("Freq_row_"),
-dplyr::starts_with("Freqw_row_"),
-dplyr::starts_with("Freq_col_"),
-dplyr::starts_with("Freqw_col_")
)
} else if (output == "totalw") {
func_table <- dplyr::select(
func_table,
-dplyr::starts_with("N_"),
-dplyr::starts_with("Weighted_N_"),
-dplyr::starts_with("Row_pct_"),
-dplyr::starts_with("Column_pct_"),
-dplyr::starts_with("Total_pct_"),
-dplyr::starts_with("Freq_total_"),
-dplyr::starts_with("Freq_row_"),
-dplyr::starts_with("Freqw_row_"),
-dplyr::starts_with("Freq_col_"),
-dplyr::starts_with("Freqw_col_")
)
}
if (!is.null(textvar)) {
# Adding text-variable
func_table <- func_table |>
dplyr::mutate(Description = textvar) |>
dplyr::relocate("Description")
}
return(func_table)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.