#' Score functions for sorting `TableTrees`
#'
#' @inheritParams gen_args
#'
#' @return A single numeric value indicating score according to the relevant metric for `tt`, to be used when sorting.
#'
#' @export
#' @rdname score_funs
cont_n_allcols <- function(tt) {
ctab <- content_table(tt)
if (NROW(ctab) == 0) {
stop(
"cont_n_allcols score function used at subtable [",
obj_name(tt), "] that has no content table."
)
}
sum(sapply(
row_values(tree_children(ctab)[[1]]),
function(cv) cv[1]
))
}
#' @param j (`numeric(1)`)\cr index of column used for scoring.
#'
#' @seealso For examples and details, please read the documentation for [sort_at_path()] and the
#' [Sorting and Pruning](https://insightsengineering.github.io/rtables/latest-tag/articles/sorting_pruning.html)
#' vignette.
#'
#' @export
#' @rdname score_funs
cont_n_onecol <- function(j) {
function(tt) {
ctab <- content_table(tt)
if (NROW(ctab) == 0) {
stop(
"cont_n_allcols score function used at subtable [",
obj_name(tt), "] that has no content table."
)
}
row_values(tree_children(ctab)[[1]])[[j]][1]
}
}
## used for pruning functions and scoring functions(sorting)
match_fun_args <- function(fun, ...) {
dotargs <- list(...)
retargs <- list()
formnms <- names(formals(fun))
if ("..." %in% formnms) {
retargs <- dotargs
} else if (any(names(dotargs) %in% formnms)) {
retargs <- dotargs[names(dotargs) %in% formnms]
}
retargs
}
#' Sorting a table at a specific path
#'
#' Main sorting function to order the sub-structure of a `TableTree` at a particular path in the table tree.
#'
#' @inheritParams gen_args
#' @param scorefun (`function`)\cr scoring function. Should accept the type of children directly under the position
#' at `path` (either `VTableTree`, `VTableRow`, or `VTableNodeInfo`, which covers both) and return a numeric value
#' to be sorted.
#' @param decreasing (`flag`)\cr whether the scores generated by `scorefun` should be sorted in decreasing order. If
#' unset (the default of `NA`), it is set to `TRUE` if the generated scores are numeric and `FALSE` if they are
#' characters.
#' @param na.pos (`string`)\cr what should be done with children (sub-trees/rows) with `NA` scores. Defaults to
#' `"omit"`, which removes them. Other allowed values are `"last"` and `"first"`, which indicate where `NA` scores
#' should be placed in the order.
#' @param .prev_path (`character`)\cr internal detail, do not set manually.
#' @param ... Additional (named) arguments that will be passed directly down to
#' `score_fun` *if* it accepts them (or accepts `...` itself).
#'
#' @return A `TableTree` with the same structure as `tt` with the exception that the requested sorting has been done
#' at `path`.
#'
#' @details
#' `sort_at_path`, given a path, locates the (sub)table(s) described by the path (see below for handling of the `"*"`
#' wildcard). For each such subtable, it then calls `scorefun` on each direct child of the table, using the resulting
#' scores to determine their sorted order. `tt` is then modified to reflect each of these one or more sorting
#' operations.
#'
#' `score_fun` can optionally accept `decreasing`, which will be passed the value passed
#' to `sort_at_path` automatically, and other arguments which can be set via `...`. The
#' first argument passed to `scorefun` will always be the table structure (subtable or row)
#' it is scoring.
#'
#' In `path`, a leading `"root"` element will be ignored, regardless of whether this matches the object name (and thus
#' actual root path name) of `tt`. Including `"root"` in paths where it does not match the name of `tt` may mask deeper
#' misunderstandings of how valid paths within a `TableTree` object correspond to the layout used to originally declare
#' it, which we encourage users to avoid.
#'
#' `path` can include the "wildcard" `"*"` as a step, which translates roughly to *any* node/branching element and means
#' that each child at that step will be *separately* sorted based on `scorefun` and the remaining `path` entries. This
#' can occur multiple times in a path.
#'
#' A list of valid (non-wildcard) paths can be seen in the `path` column of the `data.frame` created by
#' [formatters::make_row_df()] with the `visible_only` argument set to `FALSE`. It can also be inferred from the
#' summary given by [table_structure()].
#'
#' Note that sorting needs a deeper understanding of table structure in `rtables`. Please consider reading the related
#' vignette
#' ([Sorting and Pruning](https://insightsengineering.github.io/rtables/latest-tag/articles/sorting_pruning.html))
#' and explore table structure with useful functions like [table_structure()] and [row_paths_summary()]. It is also
#' very important to understand the difference between "content" rows and "data" rows. The first one analyzes and
#' describes the split variable generally and is generated with [summarize_row_groups()], while the second one is
#' commonly produced by calling one of the various [analyze()] instances.
#'
#' Built-in score functions are [cont_n_allcols()] and [cont_n_onecol()]. They are both working with content rows
#' (coming from [summarize_row_groups()]) while a custom score function needs to be used on `DataRow`s. Here, some
#' useful descriptor and accessor functions (coming from related vignette):
#' - [cell_values()] - Retrieves a named list of a `TableRow` or `TableTree` object's values.
#' - [formatters::obj_name()] - Retrieves the name of an object. Note this can differ from the label that is
#' displayed (if any is) when printing.
#' - [formatters::obj_label()] - Retrieves the display label of an object. Note this can differ from the name that
#' appears in the path.
#' - [content_table()] - Retrieves a `TableTree` object's content table (which contains its summary rows).
#' - [tree_children()] - Retrieves a `TableTree` object's direct children (either subtables, rows or possibly a mix
#' thereof, though that should not happen in practice).
#'
#' @seealso
#' * Score functions [cont_n_allcols()] and [cont_n_onecol()].
#' * [formatters::make_row_df()] and [table_structure()] for pathing information.
#' * [tt_at_path()] to select a table's (sub)structure at a given path.
#'
#' @examples
#' # Creating a table to sort
#'
#' # Function that gives two statistics per table-tree "leaf"
#' more_analysis_fnc <- function(x) {
#' in_rows(
#' "median" = median(x),
#' "mean" = mean(x),
#' .formats = "xx.x"
#' )
#' }
#'
#' # Main layout of the table
#' raw_lyt <- basic_table() %>%
#' split_cols_by("ARM") %>%
#' split_rows_by(
#' "RACE",
#' split_fun = drop_and_remove_levels("WHITE") # dropping WHITE levels
#' ) %>%
#' summarize_row_groups() %>%
#' split_rows_by("STRATA1") %>%
#' summarize_row_groups() %>%
#' analyze("AGE", afun = more_analysis_fnc)
#'
#' # Creating the table and pruning empty and NAs
#' tbl <- build_table(raw_lyt, DM) %>%
#' prune_table()
#'
#' # Peek at the table structure to understand how it is built
#' table_structure(tbl)
#'
#' # Sorting only ASIAN sub-table, or, in other words, sorting STRATA elements for
#' # the ASIAN group/row-split. This uses content_table() accessor function as it
#' # is a "ContentRow". In this case, we also base our sorting only on the second column.
#' sort_at_path(tbl, c("ASIAN", "STRATA1"), cont_n_onecol(2))
#'
#' # Custom scoring function that is working on "DataRow"s
#' scorefun <- function(tt) {
#' # Here we could use browser()
#' sum(unlist(row_values(tt))) # Different accessor function
#' }
#' # Sorting mean and median for all the AGE leaves!
#' sort_at_path(tbl, c("RACE", "*", "STRATA1", "*", "AGE"), scorefun)
#'
#' last_cat_scorefun <- function(x, decreasing, lastcat) {
#' mycat <- obj_name(x)
#' if (mycat == lastcat) {
#' ifelse(isTRUE(decreasing), -Inf, Inf)
#' } else {
#' match(tolower(substr(mycat, 1, 1)), letters)
#' }
#' }
#'
#' lyt2 <- basic_table() %>%
#' split_rows_by("SEX") %>%
#' analyze("AGE")
#'
#' tbl2 <- build_table(lyt2, DM)
#' sort_at_path(tbl2, "SEX", last_cat_scorefun, lastcat = "M")
#' sort_at_path(tbl2, "SEX", last_cat_scorefun, lastcat = "M", decreasing = FALSE)
#'
#' @export
sort_at_path <- function(tt,
path,
scorefun,
decreasing = NA,
na.pos = c("omit", "last", "first"),
.prev_path = character(),
...) {
if (NROW(tt) == 0) {
return(tt)
}
## XXX hacky fix this!!!
## tt_at_path removes root even if actual root table isn't named root, we need to match that behavior
if (path[1] == "root") {
## always remove first root element but only add it to
## .prev_path (used for error reporting) if it actually matched the name
if (obj_name(tt) == "root") {
.prev_path <- c(.prev_path, path[1])
}
path <- path[-1]
}
if (identical(obj_name(tt), path[1])) {
.prev_path <- c(.prev_path, path[1])
path <- path[-1]
}
curpath <- path
subtree <- tt
backpath <- c()
count <- 0
while (length(curpath) > 0) {
curname <- curpath[1]
oldkids <- tree_children(subtree)
## we sort each child separately based on the score function
## and the remaining path
if (curname == "*") {
oldnames <- vapply(oldkids, obj_name, "")
newkids <- lapply(
seq_along(oldkids),
function(i) {
sort_at_path(oldkids[[i]],
path = curpath[-1],
scorefun = scorefun,
decreasing = decreasing,
na.pos = na.pos,
## its ok to modify the "path" here because its only ever used for
## informative error reporting.
.prev_path = c(.prev_path, backpath, paste0("* (", oldnames[i], ")")),
...
)
}
)
names(newkids) <- oldnames
newtab <- subtree
tree_children(newtab) <- newkids
if (length(backpath) > 0) {
ret <- recursive_replace(tt, backpath, value = newtab)
} else {
ret <- newtab
}
return(ret)
} else if (!(curname %in% names(oldkids))) {
stop(
"Unable to find child(ren) '",
curname, "'\n\t occurred at path: ",
paste(c(.prev_path, path[seq_len(count)]), collapse = " -> "),
"\n Use 'make_row_df(obj, visible_only = TRUE)[, c(\"label\", \"path\", \"node_class\")]' or \n",
"'table_structure(obj)' to explore valid paths."
)
}
subtree <- tree_children(subtree)[[curname]]
backpath <- c(backpath, curpath[1])
curpath <- curpath[-1]
count <- count + 1
}
real_backpath <- path[seq_len(count)]
na.pos <- match.arg(na.pos)
## subtree <- tt_at_path(tt, path)
kids <- tree_children(subtree)
## relax this to allow character "scores"
## scores <- vapply(kids, scorefun, NA_real_)
more_args <- match_fun_args(scorefun, decreasing = decreasing, ...)
scores <- lapply(kids, function(x) {
tryCatch(do.call(scorefun, c(list(x), more_args)),
error = function(e) e
)
})
errs <- which(vapply(scores, is, class2 = "error", TRUE))
if (length(errs) > 0) {
stop("Encountered at least ", length(errs), " error(s) when applying score function.\n",
"First error: ", scores[[errs[1]]]$message,
"\n\toccurred at path: ",
paste(c(.prev_path, real_backpath, names(kids)[errs[1]]), collapse = " -> "),
call. = FALSE
)
} else {
scores <- unlist(scores)
}
if (!is.null(dim(scores)) || length(scores) != length(kids)) {
stop(
"Score function does not appear to have return exactly one ",
"scalar value per child"
)
}
if (is.na(decreasing)) {
decreasing <- if (is.character(scores)) FALSE else TRUE
}
ord <- order(scores, na.last = (na.pos != "first"), decreasing = decreasing)
newkids <- kids[ord]
if (anyNA(scores) && na.pos == "omit") { # we did na last here
newkids <- head(newkids, -1 * sum(is.na(scores)))
}
newtree <- subtree
tree_children(newtree) <- newkids
tt_at_path(tt, path) <- newtree
tt
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.