#' Add column totals to \code{summary_factorlist()} output
#'
#' @param df.in \code{summary_factorlist()} output.
#' @param .data Data frame used to create \code{summary_factorlist()}.
#' @param dependent Character. Name of dependent variable.
#' @param na_include_dependent Logical. When TRUE, missing data in the dependent
#' variable is included in totals.
#' @param percent Logical. Include percentage.
#' @param digits Integer length 2. Number of digits for (1) percentage, (2) weighted count.
#' @param label Character. Label for total row.
#' @param prefix Character. Prefix for column totals, e.g "N=".
#' @param weights Character vector of length 1: name of column to use for weights.
#'
#' @return Data frame.
#' @export
#'
#' @examples
#' explanatory = c("age.factor", "sex.factor", "obstruct.factor", "perfor.factor")
#' dependent = 'mort_5yr'
#' colon_s %>%
#' summary_factorlist(dependent, explanatory) %>%
#' ff_column_totals(colon_s, dependent)
#'
#' # Ensure works with missing data in dependent
#' colon_s = colon_s %>%
#' dplyr::mutate(
#' mort_5yr = forcats::fct_na_value_to_level(mort_5yr, level = "(Missing)")
#' )
#' colon_s %>%
#' summary_factorlist(dependent, explanatory) %>%
#' ff_column_totals(colon_s, dependent)
ff_column_totals <- function(df.in, .data, dependent, na_include_dependent = FALSE,
percent = TRUE, digits = c(1, 0), label = NULL, prefix = "", weights = NULL){
if(!any(names(df.in) == "label")) stop("finalfit function must include: add_dependent_label = FALSE")
if(na_include_dependent){
.data = .data %>%
dplyr::mutate_if(names(.) %in% unlist(dependent) &
sapply(., is.factor),
forcats::fct_na_value_to_level, level = "(Missing)"
)
} else {
.data = .data %>%
tidyr::drop_na(dependent)
}
# Create column totals
totals = .data %>%
{ if(is.null(weights)){
dplyr::count(., !! dplyr::sym(dependent), .drop = FALSE)
} else {
dplyr::count(., !! dplyr::sym(dependent), .drop = FALSE, wt = !! dplyr::sym(weights))
}} %>%
dplyr::mutate(
grand_total = sum(n, na.rm = TRUE),
percent = 100 * n / grand_total
)
grand_total = totals %>% dplyr::pull(grand_total) %>% unique() %>% round_tidy(digits[[2]])
if(percent){
totals = totals %>%
dplyr::mutate(
n = paste0(prefix, format_n_percent(n, percent, digits[[1]], digits[[2]]))
)
} else {
totals = totals %>%
dplyr::mutate(
n = paste0(prefix, round_tidy(n, digits[[2]]))
)
}
if(is.null(label) & percent) label = "Total N (%)"
if(is.null(label) & !percent) label = "Total N"
# Pivot and add
totals = totals %>%
dplyr::select(-c(grand_total, percent)) %>%
tidyr::pivot_wider(names_from = dependent, values_from = n) %>%
as.data.frame() %>%
dplyr::mutate(label = label,
levels= "") %>%
dplyr::select(label, levels, dplyr::everything())
df.out = dplyr::bind_rows(totals, df.in)
df.out[1, is.na(df.out[1, ])] = "" # For neatness change NA to "" in top row
# Make total
if(any(names(df.out) == "Total")){
df.out[1, "Total"] = paste0(prefix, grand_total)
}
if(any(names(df.out) == "All")){
df.out[1, "All"] = paste0(prefix, grand_total)
}
return(df.out)
}
#' @rdname ff_column_totals
#' @export
finalfit_column_totals = ff_column_totals
#' Add row totals to \code{summary_factorlist()} output
#'
#' This adds a total and missing count to variables. This is useful for
#' continuous variables. Compare this to \code{summary_factorlist(total_col =
#' TRUE)} which includes a count for each dummy variable as a factor and mean
#' (sd) or median (iqr) for continuous variables.
#'
#' @param df.in \code{summary_factorlist()} output.
#' @param .data Data frame used to create \code{summary_factorlist()}.
#' @param dependent Character. Name of dependent variable.
#' @param explanatory Character vector of any length: name(s) of explanatory
#' variables.
#' @param missing_column Logical. Include a column of counts of missing data.
#' @param digits Integer length 1. Number of digits for percentage.
#' @param percent Logical. Include percentage.
#' @param na_complete_cases Logical. When TRUE, missing data counts for variables
#' are for compelte cases across all included variables.
#' @param na_include_dependent Logical. When TRUE, missing data in the dependent
#' variable is included in totals.
#' @param total_name Character. Name of total column.
#' @param na_name Character. Name of missing column.
#'
#' @return Data frame.
#' @export
#'
#' @examples
#' explanatory = c("age.factor", "sex.factor", "obstruct.factor", "perfor.factor")
#' dependent = 'mort_5yr'
#' colon_s %>%
#' summary_factorlist(dependent, explanatory) %>%
#' ff_row_totals(colon_s, dependent, explanatory)
ff_row_totals <- function(df.in, .data, dependent, explanatory, missing_column = TRUE,
percent = TRUE, digits = 1,
na_include_dependent = FALSE, na_complete_cases = FALSE,
total_name = "Total N", na_name= "Missing N"){
if(!any(names(df.in) == "label"))
stop("summary_factorlist function must include: add_dependent_label = FALSE")
# Extract labels
var_labels = .data %>%
extract_variable_label()
if(na_include_dependent){
.data = .data %>%
dplyr::mutate_if(names(.) %in% unlist(dependent) &
sapply(., is.factor),
forcats::fct_na_value_to_level, level = "(Missing)"
)
} else {
.data = .data %>%
tidyr::drop_na(dependent)
}
which_anyNA <- function(.data){
.data %>%
tibble::rowid_to_column() %>%
dplyr::filter_all(dplyr::any_vars(is.na(.))) %>%
dplyr::pull(rowid)
}
if(na_complete_cases){
.data[which_anyNA(.data), ] = NA
}
# Relabel
.data = .data %>%
ff_relabel(var_labels)
df.out = df.in %>%
dplyr::left_join(
missing_glimpse(.data, explanatory, digits = digits) %>%
dplyr::mutate(label = as.character(label)), by = "label"
) %>%
{ if(!percent){
dplyr::mutate(., # Rename, change to character, remove "NAs"
!! total_name := as.character(n) %>%
dplyr::coalesce("")
)
} else {
dplyr::mutate(., # Rename, change to character, remove "NAs"
!! total_name := stringr::str_c(n, " (", (100 - as.numeric(missing_percent)) %>%
round_tidy(digits), ")") %>%
dplyr::coalesce("")
)
}}
if(missing_column){
df.out = df.out %>%
dplyr::mutate(
!! na_name := as.character(missing_n) %>% dplyr::coalesce("")
) %>% # Reorder columns, remove unwanted columns
dplyr::select(label, !! total_name, !! na_name, dplyr::everything(),
-c(n, missing_n, var_type, missing_percent))
} else {
df.out = df.out %>%
dplyr::select(label, !! total_name, dplyr::everything(),
-c(n, missing_n, var_type, missing_percent))
}
return(df.out)
}
#' @rdname ff_row_totals
#' @export
finalfit_row_totals = ff_row_totals
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.