#' Append a totals row and/or column to a data.frame
#'
#' This function defaults to excluding the first column of the input data.frame,
#' assuming that it contains a descriptive variable, but this can be overridden
#' by specifying the columns to be totaled in the `...` argument. Non-numeric
#' columns are converted to character class and have a user-specified fill character
#' inserted in the totals row.
#'
#' @param dat An input `data.frame` with at least one numeric column. If given a
#' list of data.frames, this function will apply itself to each `data.frame`
#' in the list (designed for 3-way `tabyl` lists).
#' @param where One of "row", "col", or `c("row", "col")`
#' @param fill If there are non-numeric columns, what should fill the bottom row
#' of those columns? If a string, relevant columns will be coerced to character.
#' If `NA` then column types are preserved.
#' @param na.rm Should missing values (including `NaN`) be omitted from the calculations?
#' @param name Name of the totals row and/or column. If both are created, and
#' `name` is a single string, that name is applied to both. If both are created
#' and `name` is a vector of length 2, the first element of the vector will be
#' used as the row name (in column 1), and the second element will be used as the
#' totals column name. Defaults to "Total".
#' @param ... Columns to total. This takes a tidyselect specification. By default,
#' all numeric columns (besides the initial column, if numeric) are included in
#' the totals, but this allows you to manually specify which columns should be
#' included, for use on a data.frame that does not result from a call to `tabyl`.
#' @return A `data.frame` augmented with a totals row, column, or both.
#' The `data.frame` is now also of class `tabyl` and stores information about
#' the attached totals and underlying data in the tabyl attributes.
#' @export
#' @examples
#' mtcars %>%
#' tabyl(am, cyl) %>%
#' adorn_totals()
adorn_totals <- function(dat, where = "row", fill = "-", na.rm = TRUE, name = "Total", ...) {
if ("both" %in% where) {
where <- c("row", "col")
}
# if input is a list, call purrr::map to recursively apply this function to each data.frame
if (is.list(dat) && !is.data.frame(dat)) {
purrr::map(dat, adorn_totals, where, fill, na.rm, name)
} else {
if (!is.data.frame(dat)) {
stop("adorn_totals() must be called on a data.frame or list of data.frames")
}
numeric_cols <- which(vapply(dat, is.numeric, logical(1)))
non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols)
if (rlang::dots_n(...) == 0) {
# by default 1st column is not totaled so remove it from numeric_cols and add to non_numeric_cols
numeric_cols <- setdiff(numeric_cols, 1)
non_numeric_cols <- unique(c(1, non_numeric_cols))
cols_to_total <- numeric_cols
} else {
expr <- rlang::expr(c(...))
cols_to_total <- tidyselect::eval_select(expr, data = dat)
if (any(cols_to_total %in% non_numeric_cols)) {
cols_to_total <- setdiff(cols_to_total, non_numeric_cols)
}
}
if (length(cols_to_total) == 0) {
stop("at least one targeted column must be of class numeric. Control target variables with the ... argument. adorn_totals should be called before other adorn_ functions.")
}
if (sum(where %in% c("row", "col")) != length(where)) {
stop("\"where\" must be one of \"row\", \"col\", or c(\"row\", \"col\")")
}
if (length(name) == 1) name <- rep(name, 2)
# grouped_df causes problems, #97
if (inherits(dat, "grouped_df")) {
dat <- dplyr::ungroup(dat)
}
dat <- as_tabyl(dat) # even a tabyl needs to be recast as a tabyl to reset the core in case it's been sorted
# set totals attribute
if (sum(where %in% attr(dat, "totals")) > 0) { # if either of the values of "where" are already in totals attribute
stop("trying to re-add a totals dimension that is already been added")
} else if (length(attr(dat, "totals")) == 1) {
# if totals row OR col has already been adorned, append new axis to the current attribute
attr(dat, "totals") <- c(attr(dat, "totals"), where)
} else {
attr(dat, "totals") <- where
}
if ("row" %in% where) {
# capture factor levels if relevant, #494
factor_input <- is.factor(dat[[1]])
if (factor_input) {
col1_backup <- dat[[1]][1]
}
# creates the totals row to be appended
col_sum <- function(a_col, na_rm = na.rm) {
# can't do this with if_else because it doesn't like the sum() of a character vector,
# even if that clause is not reached
if (is.numeric(a_col)) {
sum(a_col, na.rm = na_rm)
} else {
if (!is.character(fill)) { # if fill isn't a character string, use NA consistent with data types
switch(typeof(a_col),
"character" = NA_character_,
"integer" = NA_integer_,
"double" = if (inherits(a_col, "Date") || inherits(a_col, "POSIXt")) {
as.Date(NA_real_, origin = "1970-01-01")
} else {
NA_real_
},
"complex" = NA_complex_,
NA
)
} else {
fill # otherwise just use the string provided
}
}
}
if (is.character(fill)) { # if fill is a string, keep original implementation
col_totals <- purrr::map_df(dat, col_sum)
not_totaled_cols <- setdiff(1:length(col_totals), cols_to_total)
col_totals[not_totaled_cols] <- fill # reset numeric columns that weren't to be totaled
dat[not_totaled_cols] <- lapply(dat[not_totaled_cols], as.character) # type compatibility for bind_rows
} else {
cols_idx <- seq_along(dat) # get col indexes
names(cols_idx) <- names(dat) # name them using dat names
col_totals <- purrr::map_df(cols_idx, function(i) {
if (is.numeric(dat[[i]]) && !i %in% cols_to_total) { # check if numeric and not to be totaled
switch(typeof(dat[[i]]), # and set to NA
"integer" = NA_integer_,
"double" = NA_real_,
NA
)
} else { # otherwise run col_sum on the rest
col_sum(dat[[i]])
}
})
if (!is.character(dat[[1]]) && !1 %in% cols_to_total) {
# convert first col to character so that name can be appended
dat[[1]] <- as.character(dat[[1]])
col_totals[[1]] <- as.character(col_totals[[1]])
}
}
if (!1 %in% cols_to_total) { # give users the option to total the first column?? Up to them I guess
col_totals[1, 1] <- name[1] # replace first column value with name argument
} else {
message("Because the first column was specified to be totaled, it does not contain the label 'Total' (or user-specified name) in the totals row")
}
dat[(nrow(dat) + 1), ] <- col_totals[1, ] # insert totals_col as last row in dat
if (factor_input) { # restore factor/ordered info, #494
dat[[1]] <- factor(dat[[1]],
levels = c(setdiff(levels(col1_backup), name[1]), name[1]), # don't add if level is present
ordered = is.ordered(col1_backup)
)
}
}
if ("col" %in% where) {
# Add totals col
row_totals <- dat %>%
dplyr::select(dplyr::all_of(cols_to_total) & dplyr::where(is.numeric)) %>%
dplyr::transmute(Total = rowSums(., na.rm = na.rm))
dat[[name[2]]] <- row_totals$Total
}
dat
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.