#' Score functions for sorting `TableTrees`
#'
#' @inheritParams gen_args
#'
#' @return A single numeric value indicating score according to the relevant metric for `tt`, to be used when sorting.
#'
#' @export
#' @rdname score_funs
cont_n_allcols <- function(tt) {
ctab <- content_table(tt)
if (NROW(ctab) == 0) {
stop(
"cont_n_allcols score function used at subtable [",
obj_name(tt), "] that has no content table."
)
}
sum(sapply(
row_values(tree_children(ctab)[[1]]),
function(cv) cv[1]
))
}
#' @param j (`numeric(1)`)\cr index of column used for scoring.
#'
#' @seealso For examples and details, please read the documentation for [sort_at_path()] and the
#' [Sorting and Pruning](https://insightsengineering.github.io/rtables/latest-tag/articles/sorting_pruning.html)
#' vignette.
#'
#' @export
#' @rdname score_funs
cont_n_onecol <- function(j) {
function(tt) {
ctab <- content_table(tt)
if (NROW(ctab) == 0) {
stop(
"cont_n_allcols score function used at subtable [",
obj_name(tt), "] that has no content table."
)
}
row_values(tree_children(ctab)[[1]])[[j]][1]
}
}
#' Sorting a table at a specific path
#'
#' Main sorting function to order the sub-structure of a `TableTree` at a particular path in the table tree.
#'
#' @inheritParams gen_args
#' @param scorefun (`function`)\cr scoring function. Should accept the type of children directly under the position
#' at `path` (either `VTableTree`, `VTableRow`, or `VTableNodeInfo`, which covers both) and return a numeric value
#' to be sorted.
#' @param decreasing (`flag`)\cr whether the scores generated by `scorefun` should be sorted in decreasing order. If
#' unset (the default of `NA`), it is set to `TRUE` if the generated scores are numeric and `FALSE` if they are
#' characters.
#' @param na.pos (`string`)\cr what should be done with children (sub-trees/rows) with `NA` scores. Defaults to
#' `"omit"`, which removes them. Other allowed values are `"last"` and `"first"`, which indicate where `NA` scores
#' should be placed in the order.
#' @param .prev_path (`character`)\cr internal detail, do not set manually.
#'
#' @return A `TableTree` with the same structure as `tt` with the exception that the requested sorting has been done
#' at `path`.
#'
#' @details
#' `sort_at_path`, given a path, locates the (sub)table(s) described by the path (see below for handling of the `"*"`
#' wildcard). For each such subtable, it then calls `scorefun` on each direct child of the table, using the resulting
#' scores to determine their sorted order. `tt` is then modified to reflect each of these one or more sorting
#' operations.
#'
#' In `path`, a leading `"root"` element will be ignored, regardless of whether this matches the object name (and thus
#' actual root path name) of `tt`. Including `"root"` in paths where it does not match the name of `tt` may mask deeper
#' misunderstandings of how valid paths within a `TableTree` object correspond to the layout used to originally declare
#' it, which we encourage users to avoid.
#'
#' `path` can include the "wildcard" `"*"` as a step, which translates roughly to *any* node/branching element and means
#' that each child at that step will be *separately* sorted based on `scorefun` and the remaining `path` entries. This
#' can occur multiple times in a path.
#'
#' A list of valid (non-wildcard) paths can be seen in the `path` column of the `data.frame` created by
#' [formatters::make_row_df()] with the `visible_only` argument set to `FALSE`. It can also be inferred from the
#' summary given by [table_structure()].
#'
#' Note that sorting needs a deeper understanding of table structure in `rtables`. Please consider reading the related
#' vignette
#' ([Sorting and Pruning](https://insightsengineering.github.io/rtables/latest-tag/articles/sorting_pruning.html))
#' and explore table structure with useful functions like [table_structure()] and [row_paths_summary()]. It is also
#' very important to understand the difference between "content" rows and "data" rows. The first one analyzes and
#' describes the split variable generally and is generated with [summarize_row_groups()], while the second one is
#' commonly produced by calling one of the various [analyze()] instances.
#'
#' Built-in score functions are [cont_n_allcols()] and [cont_n_onecol()]. They are both working with content rows
#' (coming from [summarize_row_groups()]) while a custom score function needs to be used on `DataRow`s. Here, some
#' useful descriptor and accessor functions (coming from related vignette):
#' - [cell_values()] - Retrieves a named list of a `TableRow` or `TableTree` object's values.
#' - [formatters::obj_name()] - Retrieves the name of an object. Note this can differ from the label that is
#' displayed (if any is) when printing.
#' - [formatters::obj_label()] - Retrieves the display label of an object. Note this can differ from the name that
#' appears in the path.
#' - [content_table()] - Retrieves a `TableTree` object's content table (which contains its summary rows).
#' - [tree_children()] - Retrieves a `TableTree` object's direct children (either subtables, rows or possibly a mix
#' thereof, though that should not happen in practice).
#'
#' @seealso
#' * Score functions [cont_n_allcols()] and [cont_n_onecol()].
#' * [formatters::make_row_df()] and [table_structure()] for pathing information.
#' * [tt_at_path()] to select a table's (sub)structure at a given path.
#'
#' @examples
#' # Creating a table to sort
#'
#' # Function that gives two statistics per table-tree "leaf"
#' more_analysis_fnc <- function(x) {
#' in_rows(
#' "median" = median(x),
#' "mean" = mean(x),
#' .formats = "xx.x"
#' )
#' }
#'
#' # Main layout of the table
#' raw_lyt <- basic_table() %>%
#' split_cols_by("ARM") %>%
#' split_rows_by(
#' "RACE",
#' split_fun = drop_and_remove_levels("WHITE") # dropping WHITE levels
#' ) %>%
#' summarize_row_groups() %>%
#' split_rows_by("STRATA1") %>%
#' summarize_row_groups() %>%
#' analyze("AGE", afun = more_analysis_fnc)
#'
#' # Creating the table and pruning empty and NAs
#' tbl <- build_table(raw_lyt, DM) %>%
#' prune_table()
#'
#' # Peek at the table structure to understand how it is built
#' table_structure(tbl)
#'
#' # Sorting only ASIAN sub-table, or, in other words, sorting STRATA elements for
#' # the ASIAN group/row-split. This uses content_table() accessor function as it
#' # is a "ContentRow". In this case, we also base our sorting only on the second column.
#' sort_at_path(tbl, c("ASIAN", "STRATA1"), cont_n_onecol(2))
#'
#' # Custom scoring function that is working on "DataRow"s
#' scorefun <- function(tt) {
#' # Here we could use browser()
#' sum(unlist(row_values(tt))) # Different accessor function
#' }
#' # Sorting mean and median for all the AGE leaves!
#' sort_at_path(tbl, c("RACE", "*", "STRATA1", "*", "AGE"), scorefun)
#'
#' @export
sort_at_path <- function(tt,
path,
scorefun,
decreasing = NA,
na.pos = c("omit", "last", "first"),
.prev_path = character()) {
if (NROW(tt) == 0) {
return(tt)
}
## XXX hacky fix this!!!
## tt_at_path removes root even if actual root table isn't named root, we need to match that behavior
if (path[1] == "root") {
## always remove first root element but only add it to
## .prev_path (used for error reporting) if it actually matched the name
if (obj_name(tt) == "root") {
.prev_path <- c(.prev_path, path[1])
}
path <- path[-1]
}
if (identical(obj_name(tt), path[1])) {
.prev_path <- c(.prev_path, path[1])
path <- path[-1]
}
curpath <- path
subtree <- tt
backpath <- c()
count <- 0
while (length(curpath) > 0) {
curname <- curpath[1]
oldkids <- tree_children(subtree)
## we sort each child separately based on the score function
## and the remaining path
if (curname == "*") {
oldnames <- vapply(oldkids, obj_name, "")
newkids <- lapply(
seq_along(oldkids),
function(i) {
sort_at_path(oldkids[[i]],
path = curpath[-1],
scorefun = scorefun,
decreasing = decreasing,
na.pos = na.pos,
## its ok to modify the "path" here because its only ever used for
## informative error reporting.
.prev_path = c(.prev_path, backpath, paste0("* (", oldnames[i], ")"))
)
}
)
names(newkids) <- oldnames
newtab <- subtree
tree_children(newtab) <- newkids
if (length(backpath) > 0) {
ret <- recursive_replace(tt, backpath, value = newtab)
} else {
ret <- newtab
}
return(ret)
} else if (!(curname %in% names(oldkids))) {
stop(
"Unable to find child(ren) '",
curname, "'\n\t occurred at path: ",
paste(c(.prev_path, path[seq_len(count)]), collapse = " -> "),
"\n Use 'make_row_df(obj, visible_only = TRUE)[, c(\"label\", \"path\", \"node_class\")]' or \n",
"'table_structure(obj)' to explore valid paths."
)
}
subtree <- tree_children(subtree)[[curname]]
backpath <- c(backpath, curpath[1])
curpath <- curpath[-1]
count <- count + 1
}
real_backpath <- path[seq_len(count)]
na.pos <- match.arg(na.pos)
## subtree <- tt_at_path(tt, path)
kids <- tree_children(subtree)
## relax this to allow character "scores"
## scores <- vapply(kids, scorefun, NA_real_)
scores <- lapply(kids, function(x) tryCatch(scorefun(x), error = function(e) e))
errs <- which(vapply(scores, is, class2 = "error", TRUE))
if (length(errs) > 0) {
stop("Encountered at least ", length(errs), " error(s) when applying score function.\n",
"First error: ", scores[[errs[1]]]$message,
"\n\toccurred at path: ",
paste(c(.prev_path, real_backpath, names(kids)[errs[1]]), collapse = " -> "),
call. = FALSE
)
} else {
scores <- unlist(scores)
}
if (!is.null(dim(scores)) || length(scores) != length(kids)) {
stop(
"Score function does not appear to have return exactly one ",
"scalar value per child"
)
}
if (is.na(decreasing)) {
decreasing <- if (is.character(scores)) FALSE else TRUE
}
ord <- order(scores, na.last = (na.pos != "first"), decreasing = decreasing)
newkids <- kids[ord]
if (anyNA(scores) && na.pos == "omit") { # we did na last here
newkids <- head(newkids, -1 * sum(is.na(scores)))
}
newtree <- subtree
tree_children(newtree) <- newkids
tt_at_path(tt, path) <- newtree
tt
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.