Nothing
#' Sort Stacked Hierarchical ARDs
#'
#' @description `r lifecycle::badge('experimental')`\cr
#'
#' This function is used to sort stacked hierarchical ARDs.
#'
#' For the purposes of this function, we define a "variable group" as a combination of ARD rows grouped by the
#' combination of all their variable levels, but excluding any `by` variables.
#'
#' @param x (`card`)\cr
#' a stacked hierarchical ARD of class `'card'` created using [ard_stack_hierarchical()] or
#' [`ard_stack_hierarchical_count()`].
#' @param sort ([`formula-list-selector`][syntax], `string`)\cr
#' a named list, a list of formulas, a single formula where the list element is a named list of functions
#' (or the RHS of a formula), or a single string specifying the types of sorting to perform at each hierarchy variable
#' level. If the sort method for any variable is not specified then the method will default to `"descending"`. If a
#' single unnamed string is supplied it is applied to all variables. For each variable, the value specified must
#' be one of:
#' - `"alphanumeric"` - at the specified hierarchy level of the ARD, groups are ordered alphanumerically
#' (i.e. A to Z) by `variable_level` text.
#' - `"descending"` - within each variable group of the ARD at the specified hierarchy level, count sums are
#' calculated for each group and groups are sorted in descending order by sum. When `sort` is `"descending"` for a
#' given variable and `n` is included in `statistic` for the variable then `n` is used to calculate variable group
#' sums, otherwise `p` is used. If neither `n` nor `p` are present in `x` for the variable, an error will occur.
#'
#' Defaults to `everything() ~ "descending"`.
#'
#' @return an ARD data frame of class 'card'
#' @seealso [filter_ard_hierarchical()]
#' @name sort_ard_hierarchical
#'
#' @note
#' If overall data is present in `x` (i.e. the ARD was created with `ard_stack_hierarchical(overall=TRUE)`), the
#' overall data will be sorted last within each variable group (i.e. after any other rows with the same combination of
#' variable levels).
#'
#' @examplesIf (identical(Sys.getenv("NOT_CRAN"), "true") || identical(Sys.getenv("IN_PKGDOWN"), "true"))
#' ard_stack_hierarchical(
#' ADAE,
#' variables = c(AESOC, AEDECOD),
#' by = TRTA,
#' denominator = ADSL,
#' id = USUBJID
#' ) |>
#' sort_ard_hierarchical(AESOC ~ "alphanumeric")
#'
#' ard_stack_hierarchical_count(
#' ADAE,
#' variables = c(AESOC, AEDECOD),
#' by = TRTA,
#' denominator = ADSL
#' ) |>
#' sort_ard_hierarchical(sort = list(AESOC ~ "alphanumeric", AEDECOD ~ "descending"))
NULL
#' @rdname sort_ard_hierarchical
#' @export
sort_ard_hierarchical <- function(x, sort = everything() ~ "descending") {
set_cli_abort_call()
# check and process inputs ---------------------------------------------------------------------
check_not_missing(x)
check_not_missing(sort)
check_class(x, "card")
if (!"args" %in% names(attributes(x))) {
cli::cli_abort(
paste(
"Sorting is only available for stacked hierarchical ARDs created using",
"{.fun ard_stack_hierarchical} or {.fun ard_stack_hierarchical_count}."
),
call = get_cli_abort_call()
)
}
if (all(x$variable %in% "..ard_hierarchical_overall..")) {
return(x)
}
ard_args <- attributes(x)$args
# for calculations by highest severity, innermost variable is extracted from `by`
if (length(ard_args$by) > 1) {
ard_args$variables <- c(ard_args$variables, dplyr::last(ard_args$by))
ard_args$include <- c(ard_args$include, dplyr::last(ard_args$by))
ard_args$by <- ard_args$by[-length(ard_args$by)]
}
# get and check sorting method(s)
if (is.character(sort)) {
sort <- stats::as.formula(paste0("everything() ~ '", sort, "'"))
}
process_formula_selectors(
as.list(ard_args$variables) |> data.frame() |> stats::setNames(ard_args$variables),
sort = sort
)
fill_formula_selectors(
as.list(ard_args$variables) |> data.frame() |> stats::setNames(ard_args$variables),
sort = everything() ~ "descending"
)
check_list_elements(
x = sort,
predicate = \(x) x %in% c("descending", "alphanumeric"),
error_msg = "Sorting type must be either {.val descending} or {.val alphanumeric} for all variables."
)
by <- ard_args$by
cols <-
ard_args$variables |>
stats::setNames(
x |>
dplyr::select(all_ard_group_n(seq_along(ard_args$variables) + length(by), types = "names"), "variable") |>
names()
)
# attributes and total n not sorted - appended to bottom of sorted ARD
has_attr <- "attributes" %in% x$context | "total_n" %in% x$context
if (has_attr) {
x_attr <- x |>
dplyr::filter(.data$context %in% c("attributes", "total_n"))
x <- x |>
dplyr::filter(!.data$context %in% c("attributes", "total_n"))
}
# header row info not sorted - appended to top of sorted ARD
has_hdr <- !is_empty(by) | "..ard_hierarchical_overall.." %in% x$variable
if (has_hdr) {
x_header <- x |>
dplyr::filter(.data$variable %in% c(by, "..ard_hierarchical_overall..")) |>
# header statistic rows above "..ard_hierarchical_overall.." rows
dplyr::arrange(dplyr::desc(.data$variable))
x <- x |>
dplyr::filter(!.data$variable %in% c(by, "..ard_hierarchical_overall.."))
}
# reformat ARD for sorting ---------------------------------------------------------------------
x_sort <- x |>
# for sorting, assign indices to each row in original order
dplyr::mutate(idx = dplyr::row_number())
# reformat current variable columns for sorting
x_sort <- x_sort |>
.ard_reformat_sort(by, cols)
for (i in seq_along(cols)) {
sort_i <- sort[[cols[i]]] # current sorting type
cur_var <- names(cols)[i] # current grouping variable
x_sort <- x_sort |>
# group by current and all previous grouping columns
dplyr::group_by(dplyr::pick(
any_of(cards::all_ard_group_n(seq_len(i) + length(by))),
any_of(c(cur_var, paste0(cur_var, "_level")))
))
if (sort_i == "descending") {
# descending sort
x_sort <- x_sort |>
# calculate sums for each group at the current level, then get group indices
.append_hierarchy_sums(ard_args, cols, i)
} else {
# alphanumeric sort
x_sort <- x_sort |>
# sort grouping variables in alphanumeric order
dplyr::arrange(.by_group = TRUE) |>
# append group indices
dplyr::mutate(!!paste0("sort_group_", i) := dplyr::cur_group_id())
}
}
idx_sorted <- x_sort |>
dplyr::ungroup() |>
# sort according to determined orders at each hierarchy level
dplyr::arrange(dplyr::pick(starts_with("sort_group_"))) |>
# pull ordered row indices
dplyr::pull("idx")
# sort ARD
x <- x[idx_sorted, ]
# if present, keep header info at top of ARD
if (has_hdr) x <- dplyr::bind_rows(x_header, x)
# if present, keep attributes at bottom of ARD
if (has_attr) x <- dplyr::bind_rows(x, x_attr)
x
}
# this function reformats a hierarchical ARD for sorting
.ard_reformat_sort <- function(x, by, cols) {
for (i in seq_along(cols)) {
# get current grouping variables
cur_var <- names(cols)[i]
cur_var_lvl <- paste0(cur_var, "_level")
# outer hierarchy variables - process summary rows
if (!cur_var %in% "variable") {
x[x$variable %in% cols[i], ] <-
x[x$variable %in% cols[i], ] |>
dplyr::mutate(
# move variable/level names to correct grouping variable columns
!!cur_var := .data$variable,
!!cur_var_lvl := as.list(.data$variable_level),
# mark rows as overall summary data
variable = "..overall..",
variable_level = as.list(NA_character_)
)
}
# overall=TRUE - process summary rows (no `by` variable)
if (!is_empty(by) & !cur_var %in% "variable" & any(x[[paste0("group", i)]] %in% cols[i])) {
next_var_gp <- paste0("group", i + length(by) + 1) %in% names(x)
x[x[[paste0("group", i)]] %in% cols[i], ] <-
x[x[[paste0("group", i)]] %in% cols[i], ] |>
dplyr::mutate(
# shift variable/level names one to the right
!!paste0("group", i + length(by) + 1) := if (next_var_gp) .data[[cur_var]] else NULL,
!!paste0("group", i + length(by) + 1, "_level") := if (next_var_gp) as.list(.data[[cur_var_lvl]]) else NULL,
!!cur_var := .data[[paste0("group", i)]],
!!cur_var_lvl := as.list(.data[[paste0("group", i, "_level")]])
)
}
# previous hierarchy variables - process summary rows
if (any(is.na(x[[cur_var]]))) {
x[is.na(x[[cur_var]]), ] <-
x[is.na(x[[cur_var]]), ] |>
dplyr::mutate(
# mark summary rows from previous variables as "empty" for the current
# to sort them prior to non-summary rows in the same section
!!cur_var :=
dplyr::case_when(
.data$variable %in% "..overall.." ~ "..empty..",
.default = NA
),
!!cur_var_lvl := as.list(NA)
)
}
x <- x |>
dplyr::rowwise() |>
# unlist cur_var_lvl column
dplyr::mutate(dplyr::across(all_of(cur_var_lvl), ~ as.character(unlist(.x)))) |>
dplyr::ungroup()
}
x
}
# this function calculates and appends group sums/ordering for the current hierarchy level (across `by` variables)
.append_hierarchy_sums <- function(x, ard_args, cols, i) {
cur_var <- names(cols)[i] # get current grouping variable
next_var <- names(cols)[i + 1] # get next grouping variable
# all variables in x have n or p stat present (not required if filtered out first)
n_stat <- is_empty(setdiff(
intersect(ard_args$include, x$variable),
x |> dplyr::filter(.data$stat_name == "n") |> dplyr::pull("variable")
))
if (!n_stat) {
p_stat <- is_empty(setdiff(
intersect(ard_args$include, x$variable),
x |> dplyr::filter(.data$stat_name == "p") |> dplyr::pull("variable")
))
if (!p_stat) { # p statistic is also not available
cli::cli_abort(
paste(
"If {.code sort='descending'} for any variables then either {.val n} or {.val p} must be present in {.arg x}",
"for each of these specified variables in order to calculate the count sums used for sorting."
),
call = get_cli_abort_call()
)
}
}
sort_stat <- if (n_stat) "n" else "p" # statistic used to calculate group sums
# calculate group sums
sum_i <- paste0("sum_group_", i) # sum column label
x_sums <- x |>
dplyr::filter(
.data$stat_name == sort_stat, # select statistic to sum
if (!is_empty(ard_args$by)) .data$group1 %in% ard_args$by else TRUE,
if (length(c(ard_args$by, ard_args$variables)) > 1) {
if (ard_args$variable[i] %in% ard_args$include & !cur_var %in% "variable") {
# if current variable is in include, sum *only* summary rows for the current variable
.data$variable %in% "..overall.." &
if (!next_var %in% "variable") .data[[next_var]] %in% "..empty.." else TRUE
} else {
# otherwise, sum all *innermost* rows for the current variable
TRUE
}
} else {
TRUE
}
) |>
# get sum in each group
dplyr::summarize(!!sum_i := sum(unlist(.data$stat[.data$stat_name == sort_stat]))) |>
dplyr::ungroup()
sort_cols <- append(dplyr::group_vars(x), sum_i, after = length(dplyr::group_vars(x)) - 1) # sorting columns
x_sums <- x_sums |>
# sort group sums in descending order, grouping variables in alphanumeric order
dplyr::arrange(across(all_of(sort_cols), \(x) if (is.numeric(x)) dplyr::desc(x) else x)) |>
# record order of groups
dplyr::mutate(!!paste0("sort_group_", i) := dplyr::row_number())
# append corresponding group order index to each row
x |>
dplyr::left_join(x_sums, by = dplyr::group_vars(x))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.