Nothing
# helper to compute crosstables --------------
.crosstable <- function(x,
by,
weights = NULL,
remove_na = FALSE,
proportions = NULL,
obj_name = NULL,
group_variable = NULL) {
if (!is.null(proportions)) {
proportions <- match.arg(proportions, c("row", "column", "full"))
}
# frequency table
if (is.null(weights)) {
# we have a `.default` and a `.data.frame` method for `data_tabulate()`.
# since this is the default, `x` can be an object which cannot be used
# with `table()`, that's why we add `tryCatch()` here. Below we give an
# informative error message for non-supported objects.
if (remove_na) {
x_table <- tryCatch(table(x, by), error = function(e) NULL)
} else {
x_table <- tryCatch(table(addNA(x), addNA(by)), error = function(e) NULL)
}
} else if (remove_na) {
# weighted frequency table, excluding NA
x_table <- tryCatch(
stats::xtabs(
weights ~ x + by,
data = data.frame(weights = weights, x = x, by = by),
na.action = stats::na.omit,
addNA = FALSE
),
error = function(e) NULL
)
} else {
# weighted frequency table, including NA
x_table <- tryCatch(
stats::xtabs(
weights ~ x + by,
data = data.frame(weights = weights, x = addNA(x), by = addNA(by)),
na.action = stats::na.pass,
addNA = TRUE
),
error = function(e) NULL
)
}
if (is.null(x_table)) {
insight::format_warning(paste0("Can't compute cross tables for objects of class `", class(x)[1], "`."))
return(NULL)
}
out <- as.data.frame(stats::ftable(x_table))
colnames(out) <- c("Value", "by", "N")
total_n <- sum(out$N, na.rm = TRUE)
# we want to round N for weighted frequencies
if (!is.null(weights)) {
out$N <- round(out$N)
total_n <- round(total_n)
}
out <- data_to_wide(out, values_from = "N", names_from = "by")
# use variable name as column name
if (!is.null(obj_name)) {
colnames(out)[1] <- obj_name
}
# for grouped data frames, add info about grouping variables
if (!is.null(group_variable)) {
var_info <- toString(lapply(colnames(group_variable), function(i) {
sprintf("%s (%s)", i, group_variable[[i]])
}))
out <- cbind(out[1], data.frame(Group = var_info, stringsAsFactors = FALSE), out[-1])
}
attr(out, "total_n") <- total_n
attr(out, "weights") <- weights
attr(out, "proportions") <- proportions
attr(out, "varname") <- obj_name
class(out) <- c("datawizard_crosstab", "data.frame")
out
}
# methods ---------------------
#' @export
format.datawizard_crosstab <- function(x, format = "text", digits = 1, big_mark = NULL, ...) {
# convert to character manually, else, for large numbers,
# format_table() returns scientific notation
x <- as.data.frame(x)
# remove group variable
x$Group <- NULL
# compute total N for rows and colummns
total_n <- attributes(x)$total_n
total_column <- rowSums(x[, -1], na.rm = TRUE)
total_row <- c(colSums(x[, -1], na.rm = TRUE), total_n)
# proportions?
props <- attributes(x)$proportions
if (!is.null(props)) {
# we copy x to tmp, because when we create strings with "sprintf()", the
# variable is coerced to character, and in subsequent iterations of the loop,
# mathemathical operations are not possible anymore
tmp <- x
if (identical(props, "row")) {
for (i in seq_len(nrow(x))) {
row_sum <- sum(x[i, -1], na.rm = TRUE)
if (row_sum == 0) {
row_sum_string <- "(0%)"
} else {
row_sum_string <- sprintf("(%.*f%%)", digits, 100 * x[i, -1] / row_sum)
}
tmp[i, -1] <- paste(format(x[i, -1]), format(row_sum_string, justify = "right"))
}
} else if (identical(props, "column")) {
for (i in seq_len(ncol(x))[-1]) {
col_sum <- sum(x[, i], na.rm = TRUE)
if (col_sum == 0) {
col_sum_string <- "(0%)"
} else {
col_sum_string <- sprintf("(%.*f%%)", digits, 100 * x[, i] / col_sum)
}
tmp[, i] <- paste(format(x[, i]), format(col_sum_string, justify = "right"))
}
} else if (identical(props, "full")) {
for (i in seq_len(ncol(x))[-1]) {
tmp[, i] <- paste(
format(x[, i]),
format(sprintf("(%.*f%%)", digits, 100 * x[, i] / total_n), justify = "right")
)
}
}
# copy back final result
x <- tmp
}
x[] <- lapply(x, as.character)
# format data frame
ftab <- insight::format_table(x, ...)
# replace empty cells with NA
ftab[] <- lapply(ftab, function(i) {
i[i == ""] <- ifelse(identical(format, "text"), "<NA>", "(NA)") # nolint
i
})
# Remove ".00" from numbers
ftab$Total <- gsub("\\.00$", "", as.character(total_column))
# for text format, insert "empty row" before last total row
if (identical(format, "text") || identical(format, "markdown")) {
empty_row <- as.data.frame(t(data.frame(
rep("", ncol(ftab)),
c("Total", as.character(total_row)),
stringsAsFactors = FALSE
)))
} else {
empty_row <- as.data.frame(t(data.frame(
c("Total", as.character(total_row)),
stringsAsFactors = FALSE
)))
}
colnames(empty_row) <- colnames(ftab)
ftab <- rbind(ftab, empty_row)
ftab[nrow(ftab), ] <- gsub("\\.00$", "", ftab[nrow(ftab), ])
# insert big marks?
ftab$Total <- .add_commas_in_numbers(ftab$Total, big_mark)
ftab[nrow(ftab), ] <- .add_commas_in_numbers(ftab[nrow(ftab), ], big_mark)
# also format NA column name
colnames(ftab)[colnames(ftab) == "NA"] <- ifelse(identical(format, "text"), "<NA>", "(NA)")
ftab
}
#' @export
print.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
# grouped data? if yes, add information on grouping factor
if (is.null(x[["Group"]])) {
caption <- NULL
} else {
caption <- paste0("Grouped by ", x[["Group"]][1])
x$Group <- NULL
}
# print table
cat(insight::export_table(
format(x, big_mark = big_mark, ...),
cross = "+",
missing = "<NA>",
caption = caption,
empty_line = "-"
))
invisible(x)
}
#' @export
print_md.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
# grouped data? if yes, add information on grouping factor
if (is.null(x[["Group"]])) {
caption <- NULL
} else {
caption <- paste0("Grouped by ", x[["Group"]][1])
x$Group <- NULL
}
# print table
insight::export_table(
format(x, format = "markdown", big_mark = big_mark, ...),
cross = "+",
missing = "<NA>",
caption = caption,
empty_line = "-",
format = "markdown"
)
}
#' @export
print_html.datawizard_crosstab <- function(x, big_mark = NULL, ...) {
# grouped data? if yes, add information on grouping factor
if (!is.null(x[["Group"]])) {
x$groups <- paste0("Grouped by ", x[["Group"]][1])
x$Group <- NULL
}
# print table
insight::export_table(
format(x, big_mark = big_mark, format = "html", ...),
missing = "(NA)",
format = "html",
by = "groups"
)
}
#' @export
print.datawizard_crosstabs <- function(x, big_mark = NULL, ...) {
for (i in seq_along(x)) {
print(x[[i]], big_mark = big_mark, ...)
cat("\n")
}
invisible(x)
}
#' @export
print_html.datawizard_crosstabs <- function(x, big_mark = NULL, ...) {
if (length(x) == 1) {
print_html(x[[1]], big_mark = big_mark, ...)
} else {
x <- lapply(x, function(i) {
# grouped data? if yes, add information on grouping factor
if (!is.null(i[["Group"]])) {
i$groups <- paste0("Grouped by ", i[["Group"]][1])
i$Group <- NULL
}
format(i, format = "html", big_mark = big_mark, ...)
})
out <- do.call(rbind, x)
# print table
insight::export_table(
out,
missing = "(NA)",
format = "html",
by = "groups"
)
}
}
# helper ---------------------
.validate_by <- function(by, x) {
if (!is.null(by)) {
if (is.character(by)) {
# If "by" is a character string, must be of length 1
if (length(by) > 1) {
insight::format_error(
"If `by` is a string indicating a variable name, `by` must be of length 1.",
"You may use `data_group()` to group by multiple variables, then call `data_tabulate()`."
)
}
# if "by" is a character, "x" must be a data frame
if (!is.data.frame(x)) {
insight::format_error("If `by` is a string indicating a variable name, `x` must be a data frame.")
}
# is "by" a column in "x"?
if (!by %in% colnames(x)) {
insight::format_error(sprintf(
"The variable specified in `by` was not found in `x`. %s",
.misspelled_string(names(x), by, "Possibly misspelled?")
))
}
by <- x[[by]]
}
# is "by" of same length as "x"?
if (is.data.frame(x) && length(by) != nrow(x)) {
insight::format_error("Length of `by` must be equal to number of rows in `x`.") # nolint
}
if (!is.data.frame(x) && length(by) != length(x)) {
insight::format_error("Length of `by` must be equal to length of `x`.") # nolint
}
if (!is.factor(by)) {
# coerce "by" to factor, including labels
by <- to_factor(by, labels_to_levels = TRUE, verbose = FALSE)
}
}
by
}
.validate_table_weights <- function(weights, x, weights_expression = NULL) {
# exception: for vectors, if weighting variable not found, "weights" is NULL.
# to check this, we further need to check whether a weights expression was
# provided, e.g. "weights = iris$not_found" - all this is only relevant when
# weights is NULL
if (is.null(weights)) {
# possibly misspelled weights-variables for default-method ----------------
# -------------------------------------------------------------------------
# do we have any value for weights_expression?
if (!is.null(weights_expression) &&
# due to deparse() and substitute, NULL becomes "NULL" - we need to check for this
!identical(weights_expression, "NULL") &&
# we should only run into this problem, when a variable from a data frame
# is used in the data_tabulate() method for vectors - thus, we need to check
# whether the weights_expression contains a "$" - `iris$not_found` is "NULL"
# we need this check, because the default-method of data_tabulate() is called
# from the data.frame method, where `weights = weights`, and then,
# deparse(substitute(weights)) is "weights" (not "NULL" or "iris$not_found"),
# leading to an error when actually all is OK (if "weights" is NULL)
# Example:
#> efc$weights <- abs(rnorm(n = nrow(efc), mean = 1, sd = 0.5))
# Here, efc$wweight is NULL
#> data_tabulate(efc$c172code, weights = efc$wweight)
# Here, wweight errors anyway, because object "wweight" is not found
#> data_tabulate(efc$c172code, weights = wweight)
grepl("$", weights_expression, fixed = TRUE)) {
insight::format_error("The variable specified in `weights` was not found. Possibly misspelled?")
}
} else {
# possibly misspecified weights-variables for data.frame-method -----------
# -------------------------------------------------------------------------
if (is.character(weights)) {
# If "weights" is a character string, must be of length 1
if (length(weights) > 1) {
insight::format_error(
"If `weights` is a string indicating a variable name, `weights` must be of length 1."
)
}
# if "weights" is a character, "x" must be a data frame
if (!is.data.frame(x)) {
insight::format_error("If `weights` is a string indicating a variable name, `x` must be a data frame.") # nolint
}
# is "by" a column in "x"?
if (!weights %in% colnames(x)) {
insight::format_error(sprintf(
"The variable specified in `weights` was not found in `x`. %s",
.misspelled_string(names(x), weights, "Possibly misspelled?")
))
}
weights <- x[[weights]]
}
# is "by" of same length as "x"?
if (is.data.frame(x) && length(weights) != nrow(x)) {
insight::format_error("Length of `weights` must be equal to number of rows in `x`.") # nolint
}
if (!is.data.frame(x) && length(weights) != length(x)) {
insight::format_error("Length of `weights` must be equal to length of `x`.") # nolint
}
}
weights
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.