R/make_split_fun.R

Defines functions drop_facet_levels trim_levels_in_facets add_overall_facet add_combo_facet make_split_fun .can_take_spl_context add_to_split_result make_split_result validate_split_result assert_splres_element in_col_split

Documented in add_combo_facet add_overall_facet add_to_split_result drop_facet_levels make_split_fun make_split_result trim_levels_in_facets

#' Variable Associated With a Split
#'
#' This function  is intended  for use  when writing  custom splitting
#' logic.  In  cases where  the  split  is  associated with  a  single
#' variable, the  name of that variable  will be returned. At  time of
#' writing     this    includes     splits    generated     via    the
#' \code{\link{split_rows_by}}, \code{\link{split_cols_by}},
#' \code{\link{split_rows_by_cuts}}, \code{\link{split_cols_by_cuts}},
#' \code{\link{split_rows_by_cutfun}}, and
#' \code{\link{split_cols_by_cutfun}} layout directives.
#' @param spl Split. The split object
#'
#' @return for splits with a single variable associated with them, the split, for others, an error is raised.
#' @export
#' @seealso \code{\link{make_split_fun}}
setGeneric("spl_variable", function(spl) standardGeneric("spl_variable"))
#' @rdname spl_variable
#' @export
setMethod("spl_variable", "VarLevelSplit", function(spl) spl_payload(spl))
#' @rdname spl_variable
#' @export
setMethod("spl_variable", "VarDynCutSplit", function(spl) spl_payload(spl))
#' @rdname spl_variable
#' @export
setMethod("spl_variable", "VarStaticCutSplit", function(spl) spl_payload(spl))
#' @rdname spl_variable
#' @export
setMethod("spl_variable", "Split",
          function(spl) stop("Split class ",
                             class(spl),
                             " not associated with a single variable.")
          )

in_col_split <- function(spl_ctx) {
    identical(names(spl_ctx),
              names(context_df_row( cinfo = NULL)))
}


assert_splres_element <- function(pinfo, nm, len = NULL, component = NULL) {
    msg_2_append <- ""
    if(!is.null(component)) {
        msg_2_append <- paste0("Invalid split function constructed by upstream call to ",
                           "make_split_fun. Problem source: ",
                           component, " argument.")
    }
    if(!(nm %in% names(pinfo)))
        stop("Split result does not have required element: ", nm, ".",
             msg_2_append)
    if(!is.null(len) && length(pinfo[[nm]]) != len)
        stop("Split result element ", nm, " does not have required length ", len, ".",
             msg_2_append)
    TRUE
}

validate_split_result <- function(pinfo, component = NULL) {
    assert_splres_element(pinfo, "datasplit", component = component)
    len <- length(pinfo$datasplit)
    assert_splres_element(pinfo, "values", len, component = component)
    assert_splres_element(pinfo, "labels", len, component = component)
    TRUE
}

#' Construct split result object
#'
#' These functions can be  used to create or add to  a split result in
#' functions which implement core  splitting or post-processing within
#' a custom split function.
#'
#' @param values character  or `list(SplitValue)`. The values associated
#'     with each facet
#' @param  datasplit `list(data.frame)`. The  facet data for  each facet
#'     generated in the split
#' @param labels character. The labels associated with each facet
#' @param  extras NULL or list.  Extra values associated with  each of
#'     the facets which  will be passed to  analysis functions applied
#'     within the facet.
#'
#' @return a named list representing the facets generated by the split
#'     with elements  `values`, `datasplit`,  and `labels`,  which are
#'     the same length and correspond to each other elementwise.
#'
#' @details
#' These functions does various housekeeping to ensure that the split result
#' list is as the rtables internals expect it, most of which are not
#' relevant to end users.
#'
#'
#' @examples
#' splres <- make_split_result(values = c("hi", "lo"),
#'     datasplit = list(hi = mtcars, lo = mtcars[1:10,]),
#'     labels = c("more data", "less data"))
#'
#' splres2 <- add_to_split_result(splres,
#'     values = "med",
#'     datasplit = list(med = mtcars[1:20,]),
#'     labels = "kinda some data")
#' @rdname make_split_result
#' @export
#' @family make_custom_split
make_split_result <- function(values, datasplit, labels, extras = NULL) {
    if(length(values) == 1 && is(datasplit, "data.frame"))
        datasplit <- list(datasplit)
    ret <- list(values = values, datasplit = datasplit, labels = labels)
    if(!is.null(extras))
        ret$extras <- extras
    .fixupvals(ret)
}

#' @rdname make_split_result
#' @param splres list. A list representing the result of splitting.
#' @export
add_to_split_result <- function(splres, values, datasplit, labels, extras = NULL) {
    validate_split_result(splres)
    newstuff <- make_split_result(values, datasplit, labels, extras)
    ret <- lapply(names(splres),
                  function(nm) c(splres[[nm]], newstuff[[nm]]))
    names(ret) <- names(splres)
    .fixupvals(ret)
}

.can_take_spl_context <- function(f) any(c(".spl_context", "...") %in% names(formals(f)))

#' Create a Custom Splitting Function
#'
#' @param pre  list.  Zero or  more functions  which  operate on  the
#'     incoming data and return a new data frame that should split via
#'     `core_split`. They will be called on the data in the order they
#'     appear in the list.
#' @param core_split  function or NULL. If not NULL,  a function which
#'     accepts the same arguments  do_base_split does, and returns the
#'     same type of named list.   Custom functions which override this
#'     behavior cannot be used in column splits.
#' @param post list. Zero or  more functions which should be called on
#'     the list output by splitting.
#'
#' @details
#'
#' Custom split  functions can be  thought of  as (up to)  3 different
#' types of manipulations of the splitting process
#'
#' 1. Preprocessing of the incoming data to be split
#' 2. (Row-splitting only) Customization of the core mapping of incoming data to facets, and
#' 3. Postprocessing operations on the set of facets (groups) generated by the split.
#'
#' This  function  provides  an   interface  to  create  custom  split
#' functions by implementing and specifying sets of operations in each
#' of those classes of customization independently.
#'
#' Preprocessing  functions (1),  must  accept:  `df`, `spl`,  `vals`,
#' `labels`,  and  can  optionally accept  `.spl_context`.  They  then
#' manipulate  `df` (the  incoming data  for the  split) and  return a
#' modified data.frame.  This modified  data.frame *must*  contain all
#' columns present in the incoming  data.frame, but can add columns if
#' necessary (though we note that these  new columns cannot be used in
#' the layout as split or analysis variables, because they will not be
#' present when validity checking is done).
#'
#' The  preprocessing   component  is   useful  for  things   such  as
#' manipulating factor  levels, e.g.,  to trim  unobserved ones  or to
#' reorder levels based on observed counts, etc.
#'
#' Customization of core splitting (2)  is currently only supported in
#' row  splits.  Core  splitting functions  override  the  fundamental
#' splitting procedure,  and are only  necessary in rare  cases. These
#' must  accept  `spl`, `df`,  `vals`,  `labels`,  and can  optionally
#' accept `.spl_context`. They must return a named list with elements,
#' all of  the same  length, as follows:  - `datasplit`  (containing a
#' list of data.frames), -  `values` containing values associated with
#' the   facets,   which   must   be   `character`   or   `SplitValue`
#' objects. These  values will  appear in the  paths of  the resulting
#' table.  - `labels` containing  the character labels associated with
#' `values`
#'
#' Postprocessing functions  (3) must  accept the  result of  the core
#' split  as  their  first  argument  (which  as  of  writing  can  be
#' anything), in addition  to `spl`, and `fulldf`,  and can optionally
#' accept `.spl_context`. They must each  return a modified version of
#' the same structure specified above for core splitting.
#'
#' In both the pre- and  post-processing cases, multiple functions can
#' be specified. When this happens,  they are applied sequentially, in
#' the order they  appear in the list passed to  the relevant argument
#' (`pre` and `post`, respectively).
#'
#' @return A function for use as a custom split function.
#' @export
#' @family make_custom_split
#' @seealso [custom_split_funs] for a more detailed discussion on what 
#' custom split functions do.
#' @examples
#'
#' mysplitfun <- make_split_fun(pre = list(drop_facet_levels),
#'     post = list(add_overall_facet("ALL", "All Arms")))
#'
#'
#' basic_table(show_colcounts = TRUE) %>%
#'    split_cols_by("ARM", split_fun = mysplitfun) %>%
#'    analyze("AGE") %>%
#'    build_table(subset(DM, ARM %in% c("B: Placebo", "C: Combination")))
#'
#' ## post (and pre) arguments can take multiple functions, here
#' ## we add an overall facet and the reorder the facets
#' reorder_facets <- function(splret, spl, fulldf, ...) {
#'   ord <- order(names(splret$values))
#'   make_split_result(splret$values[ord],
#'                       splret$datasplit[ord],
#'                       splret$labels[ord])
#' }
#'
#' mysplitfun2 <-  make_split_fun(pre = list(drop_facet_levels),
#'                              post = list(add_overall_facet("ALL", "All Arms"),
#'                                          reorder_facets))
#' basic_table(show_colcounts = TRUE) %>%
#'    split_cols_by("ARM", split_fun = mysplitfun2) %>%
#'    analyze("AGE") %>%
#'    build_table(subset(DM, ARM %in% c("B: Placebo", "C: Combination")))
#'
#' very_stupid_core <- function(spl, df, vals, labels, .spl_context) {
#'     make_split_result(c("stupid", "silly"),
#'                       datasplit = list(df[1:10,], df[11:30,]),
#'                       labels = c("first 10", "second 20"))
#' }
#'
#' dumb_30_facet <- add_combo_facet("dumb",
#'                                  label = "thirty patients",
#'                                  levels = c("stupid", "silly"))
#' nonsense_splfun <-  make_split_fun(core_split = very_stupid_core,
#'                                    post = list(dumb_30_facet))
#'
#' ## recall core split overriding is not supported in column space
#' ## currently, but we can see it in action in row space
#'
#' lyt_silly <- basic_table() %>%
#'       split_rows_by("ARM", split_fun = nonsense_splfun) %>%
#'       summarize_row_groups() %>%
#'       analyze("AGE")
#' silly_table <- build_table(lyt_silly, DM)
#' silly_table
make_split_fun <- function(pre = list(), core_split = NULL, post = list()) {
    function(df,
             spl,
             vals = NULL,
             labels = NULL,
             trim = FALSE,
             .spl_context) {
        orig_columns <- names(df)
        for(pre_fn in pre) {
            if(.can_take_spl_context(pre_fn))
                df <- pre_fn(df = df, spl = spl, vals = vals, labels = labels, .spl_context = .spl_context)
            else
                df <- pre_fn(df = df, spl = spl, vals = vals, labels = labels)
            if(!is(df, "data.frame"))
                stop("Error in custom split function, pre-split step did not return a data.frame. ",
                     "See upstream call to make_split_fun for original source of error.")
        }

        if(!all(orig_columns %in% names(df)))
            stop("Preprocessing functions(s) in custom split function removed a column from the incoming data.",
                 " This is not supported. See upstread make_split_fun call (pre argument) for original source of error.")

        if(is.null(core_split)) {
            ret <- do_base_split(spl = spl, df = df, vals = vals, labels = labels)
        } else if (!in_col_split(.spl_context)) {
            ret <- core_split(spl = spl, df = df, vals = vals, labels = labels, .spl_context)
            validate_split_result(ret, component = "core_split")
        } else {
            stop("Use of custom split functions which override core splitting ",
                 "behavior is not currently supported in column space.")
        }

        for(post_fn in post) {
            if(.can_take_spl_context(post_fn))
                ret <- post_fn(ret, spl = spl, .spl_context = .spl_context, fulldf = df)
            else
                ret <- post_fn(ret, spl = spl, fulldf = df)
        }
        validate_split_result(ret, "post")
        ret
    }
}

#' Add a combination facet in postprocessing
#'
#' @description Add a combination facet during postprocessing stage in a custom split fun.
#'
#' @param name character(1). Name for the resulting facet (for use in pathing, etc).
#' @param label character(1). Label for the resulting facet.
#' @param levels character. Vector of levels to  combine within the resulting facet.
#' @param extra list. Extra arguments to be passed to analysis functions applied
#' within the resulting facet.
#'
#' @details For `add_combo_facet`, the data associated with the resulting
#' facet will be the data associated with the facets for each level in
#' `levels`, `rbound` together. In particular, this means that if those levels
#' are overlapping, data that appears in both will be duplicated.
#'
#' @return a function which can be used within the `post` argument in
#' `make_split_fun`.
#'
#' @seealso \code{\link{make_split_fun}}
#'
#' @examples
#' mysplfun <- make_split_fun(post = list(add_combo_facet("A_B", label = "Arms A+B",
#'                                                        levels = c("A: Drug X", "B: Placebo")),
#'                                        add_overall_facet("ALL", label = "All Arms")))
#'
#' lyt <- basic_table(show_colcounts = TRUE) %>%
#'     split_cols_by("ARM", split_fun = mysplfun) %>%
#'     analyze("AGE")
#'
#' tbl <- build_table(lyt, DM)
#'
#' @export
#' @family make_custom_split
add_combo_facet <- function(name, label = name, levels, extra = list()) {
    function(ret, spl, .spl_context, fulldf) {
        val <- LevelComboSplitValue(val = name, extr = extra, combolevels = levels, label = label)
        add_to_split_result(ret, values = list(val), labels = label,
                            datasplit = list(do.call(rbind, ret$datasplit[levels])))
    }
}

#' @rdname add_combo_facet
#' @export
add_overall_facet <- function(name, label, extra = list()) {
    add_combo_facet(name = name, label = label, levels = select_all_levels,
                    extra = extra)
}

#' Trim Levels of Another Variable From Each Facet (Postprocessing split step)
#' @param innervar character. The variable(s) to trim (remove
#' unobserved levels) independently within each facet.
#'
#' @return a function suitable for use in the `pre`
#'     (list) argument of `make_split_fun`
#' @seealso make_split_fun
#' @export
#' @family make_custom_split
trim_levels_in_facets <- function(innervar) {
    function(ret, ...) {
        for(var in innervar) {
            ret$datasplit <- lapply(ret$datasplit, function(df) {
                df[[var]] <- factor(df[[var]])
                df
            })
        }
        ret
    }
}

#' Preprocessing Functions for use in make_split_fun
#'
#' This function is intended for use as a preprocessing
#' component in `make_split_fun`, and should not be called
#' directly by end users.
#'
#' @param df data.frame. The incoming data corresponding with the parent facet
#' @param spl Split.
#' @param ... dots. This is used internally to pass parameters.
#' @export
#' @seealso make_split_fun
#' @family make_custom_split
drop_facet_levels <- function(df, spl, ...) {
    if(!is(spl, "VarLevelSplit") || is.na(spl_payload(spl)))
        stop("Unable to determine faceting variable in drop_facet_levels application.")
    var <- spl_payload(spl)
    df[[var]] <- factor(df[[var]])
    df
}

Try the rtables package in your browser

Any scripts or data that you put into this service are public.

rtables documentation built on Aug. 30, 2023, 5:07 p.m.