R/colby_constructors.R

Defines functions append_topleft basic_table list_wrap_df list_wrap_x manual_cols .fd_helper add_existing_table add_colcounts summarize_row_groups counts_wpcts .validate_cfuns .count_wpcts_constr .count_raw_constr add_overall_col analyze_colvars get_acolvar_vars get_acolvar_name analyze split_rows_by_cutfun qtile_cuts split_rows_by_quartiles split_cols_by_quartiles split_cols_by_cutfun split_rows_by_cuts split_cols_by_cuts split_rows_by_multivar split_cols_by_multivar split_rows_by .tl_indent split_cols_by is_analysis_spl

Documented in add_colcounts add_existing_table add_overall_col analyze analyze_colvars append_topleft basic_table counts_wpcts list_wrap_df list_wrap_x manual_cols split_cols_by split_cols_by_cutfun split_cols_by_cuts split_cols_by_multivar split_cols_by_quartiles split_rows_by split_rows_by_cutfun split_rows_by_cuts split_rows_by_multivar split_rows_by_quartiles summarize_row_groups

label_pos_values <- c("hidden", "visible", "topleft")

#' @name internal_methods
#' @rdname int_methods
NULL

#' combine `SplitVector` objects
#' @param x `SplitVecttor`
#' @param ... Splits or `SplitVector` objects
#' @exportMethod c
#' @return Various, but should be considered implementation details.
#' @rdname int_methods
setMethod("c", "SplitVector", function(x, ...) {
    arglst <- list(...)
    stopifnot(all(sapply(arglst, is, "Split")))
    tmp <- c(unclass(x), arglst)
    SplitVector(lst = tmp)
})

## split_rows and split_cols are "recursive method stacks" which follow
## the general pattern of accept object -> call add_*_split on slot of object ->
## update object with value returned from slot method, return object.
##
## Thus each of the methods is idempotent, returning an updated object of the
## same class it was passed. The exception for idempotency is the NULL method
## which constructs a PreDataTableLayouts object with the specified split in the
## correct place.

## The cascading (by class) in this case is as follows for the row case:
## PreDataTableLayouts -> PreDataRowLayout -> SplitVector
#' @param cmpnd_fun function. Intended for internal use.
#' @param pos numeric(1). Intended for internal use.
#' @param spl Split. The split.
#' @rdname int_methods
setGeneric("split_rows",
           function(lyt = NULL, spl, pos,
                    cmpnd_fun = AnalyzeMultiVars) standardGeneric("split_rows"))
#' @rdname int_methods
setMethod("split_rows", "NULL", function(lyt, spl, pos, cmpnd_fun = AnalyzeMultiVars) {
    .Deprecated(msg = "Initializing layouts via NULL is deprecated, please use basic_table() instead")
    rl <- PreDataRowLayout(SplitVector(spl))
    cl <- PreDataColLayout()
    PreDataTableLayouts(rlayout = rl, clayout = cl)
})
#' @rdname int_methods
setMethod("split_rows", "PreDataRowLayout",
          function(lyt, spl, pos, cmpnd_fun = AnalyzeMultiVars) {
    stopifnot(pos > 0 && pos <= length(lyt) + 1)
    tmp  <- if (pos <= length(lyt)) {
               split_rows(lyt[[pos]], spl, pos, cmpnd_fun)
           } else {
               if(pos != 1 && has_force_pag(spl))
                   stop("page_by splits cannot have top-level siblings",
                        call. = FALSE)
               SplitVector(spl)
           }
    lyt[[pos]] <- tmp
    lyt
})

is_analysis_spl <- function(spl) {
    is(spl, "VAnalyzeSplit") || is(spl, "AnalyzeMultiVars")
}

## note "pos" is ignored here because it is for which nest-chain
## spl should be placed in, NOIT for where in that chain it should go
#' @rdname int_methods
setMethod("split_rows", "SplitVector",
          function(lyt, spl, pos, cmpnd_fun = AnalyzeMultiVars) {
    ## if(is_analysis_spl(spl) &&
    ##    is_analysis_spl(last_rowsplit(lyt))) {
    ##     return(cmpnd_last_rowsplit(lyt, spl, cmpnd_fun))
    ## }

    if(has_force_pag(spl) && length(lyt) > 0 &&
       !has_force_pag(lyt[[length(lyt)]]))
        stop("page_by splits cannot be nested within non-page_by splits",
             call. = FALSE)
    tmp <- c(unclass(lyt), spl)
    SplitVector(lst = tmp)
})

#' @rdname int_methods
setMethod("split_rows", "PreDataTableLayouts",
          function(lyt, spl, pos) {
    rlyt <- rlayout(lyt)
    addtl <- FALSE
    split_label <- obj_label(spl)
    if(is(spl, "Split") && ## exclude existing tables that are being tacked in
       identical(label_position(spl), "topleft") &&
       length(split_label) == 1  && nzchar(split_label)) {
        addtl <- TRUE
##        label_position(spl) <- "hidden"
    }

    rlyt <- split_rows(rlyt, spl, pos)
    rlayout(lyt) <- rlyt
    if(addtl) {
        lyt <- append_topleft(lyt, indent_string(split_label, .tl_indent(lyt)))
    }
    lyt
})
#' @rdname int_methods
setMethod("split_rows", "ANY",
          function(lyt, spl, pos) stop("nope. can't add a row split to that (",
                                       class(lyt), "). contact the maintaner.")
          )

#' @rdname int_methods
#' @param constructor function.
setGeneric("cmpnd_last_rowsplit", function(lyt, spl, constructor) standardGeneric("cmpnd_last_rowsplit"))
#' @rdname int_methods
setMethod("cmpnd_last_rowsplit", "NULL", function(lyt, spl, constructor) {
    stop("no existing splits to compound with. contact the maintainer") # nocov
})
#' @rdname int_methods
setMethod("cmpnd_last_rowsplit", "PreDataRowLayout",
          function(lyt, spl, constructor) {
    pos <- length(lyt)
    tmp <- cmpnd_last_rowsplit(lyt[[pos]], spl, constructor)
    lyt[[pos]] <- tmp
    lyt
})
#' @rdname int_methods
setMethod("cmpnd_last_rowsplit", "SplitVector",
          function(lyt, spl, constructor) {
    pos <- length(lyt)
    lst <- lyt[[pos]]
    tmp <- if(is(lst, "CompoundSplit")) {
              spl_payload(lst) <- c(.uncompound(spl_payload(lst)),
                                    .uncompound(spl))
              obj_name(lst) <- make_ma_name(spl = lst)
              lst
          ## XXX never reached because AnalzyeMultiVars inherits from
          ## CompoundSplit???
          } else {
              constructor(.payload = list(lst, spl))
          }
    lyt[[pos]] <- tmp
    lyt
})

#' @rdname int_methods
setMethod("cmpnd_last_rowsplit", "PreDataTableLayouts",
          function(lyt, spl, constructor) {
    rlyt <- rlayout(lyt)
    rlyt <- cmpnd_last_rowsplit(rlyt, spl, constructor)
    rlayout(lyt) <- rlyt
    lyt
})
#' @rdname int_methods
setMethod("cmpnd_last_rowsplit", "ANY",
          function(lyt, spl, constructor) {
              stop("nope. can't do cmpnd_last_rowsplit to that (",
                   class(lyt), "). contact the maintaner.")
})


#' @rdname int_methods
setGeneric("split_cols",
           function(lyt = NULL, spl, pos)
               standardGeneric("split_cols"))
#' @rdname int_methods
setMethod("split_cols", "NULL", function(lyt, spl, pos) {
    .Deprecated(msg = paste("Initializing layouts via NULL is deprecated,",
                            "please use basic_table() instead"))
    cl <- PreDataColLayout(SplitVector(spl))
    rl <- PreDataRowLayout()
    PreDataTableLayouts(rlayout = rl, clayout = cl)
})
#' @rdname int_methods
setMethod("split_cols", "PreDataColLayout",
          function(lyt, spl, pos) {
    stopifnot(pos > 0 && pos <= length(lyt) + 1)
    tmp  <- if (pos <= length(lyt)) {
               split_cols(lyt[[pos]], spl, pos)
           } else {
               SplitVector(spl)
           }

    lyt[[pos]] <- tmp
    lyt
})
#' @rdname int_methods
setMethod("split_cols", "SplitVector",
          function(lyt, spl, pos) {
    tmp <- c(lyt, spl)
    SplitVector(lst = tmp)
})
#' @rdname int_methods
setMethod("split_cols", "PreDataTableLayouts",
          function(lyt, spl, pos) {
    rlyt <- lyt@col_layout
    rlyt <- split_cols(rlyt, spl, pos)
    lyt@col_layout <- rlyt
    lyt
})
#' @rdname int_methods
setMethod("split_cols", "ANY",
          function(lyt, spl, pos) {
              stop("nope. can't add a col split to that (", class(lyt),
                   "). contact the maintaner.")
})


# constructors ----



## Pipe-able functions to add the various types of splits to the current layout
## for both row and column.  These all act as wrappers to the split_cols and
## split_rows method stacks.


#' Declaring a column-split based on levels of a variable
#'
#' Will generate children for each subset of a categorical variable
#'
#'
#' @inheritSection custom_split_funs Custom Splitting Function Details
#'
#' @inheritParams lyt_args
#'
#' @param ref_group character(1) or `NULL`. Level of `var` which should be
#'   considered ref_group/reference
#'
#' @export
#'
#' @author Gabriel Becker
#' @return A \code{PreDataTableLayouts} object suitable for passing to further
#'   layouting functions, and to \code{build_table}.
#' @examples
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   analyze(c("AGE", "BMRKR2"))
#'
#' tbl <- build_table(lyt, ex_adsl)
#' tbl
#'
#' # Let's look at the splits in more detail
#'
#' lyt1 <- basic_table() %>% split_cols_by("ARM")
#' lyt1
#'
#' # add an analysis (summary)
#' lyt2 <- lyt1 %>%
#'     analyze(c("AGE", "COUNTRY"), afun = list_wrap_x(summary) ,
#'             format = "xx.xx")
#' lyt2
#'
#' tbl2 <- build_table(lyt2, DM)
#' tbl2
#'
#' # By default sequentially adding layouts results in nesting
#' library(dplyr)
#' DM_MF <- DM %>% filter(SEX %in% c("M", "F")) %>%
#'   mutate(SEX = droplevels(SEX))
#'
#' lyt3 <- basic_table() %>% split_cols_by("ARM") %>%
#'   split_cols_by("SEX") %>%
#'   analyze(c("AGE", "COUNTRY"), afun = list_wrap_x(summary),
#'           format = "xx.xx")
#' lyt3
#'
#' tbl3 <- build_table(lyt3, DM_MF)
#' tbl3
#'
#' # nested=TRUE vs not
#' lyt4 <- basic_table() %>% split_cols_by("ARM") %>%
#'  split_rows_by("SEX", split_fun = drop_split_levels) %>%
#'  split_rows_by("RACE", split_fun = drop_split_levels) %>%
#'  analyze("AGE")
#' lyt4
#'
#' tbl4 <- build_table(lyt4, DM)
#' tbl4
#'
#' lyt5 <- basic_table() %>% split_cols_by("ARM") %>%
#'  split_rows_by("SEX", split_fun= drop_split_levels) %>%
#'  analyze("AGE") %>%
#'  split_rows_by("RACE", nested=FALSE, split_fun = drop_split_levels) %>%
#'  analyze("AGE")
#' lyt5
#'
#' tbl5 <- build_table(lyt5, DM)
#' tbl5
#'
split_cols_by <- function(lyt,
                         var,
                         labels_var = var,
                         split_label = var,
                         split_fun = NULL,
                         format = NULL,
                         nested = TRUE,
                         child_labels = c("default", "visible", "hidden"),
                         extra_args = list(),
                         ref_group = NULL) {##,
    if(is.null(ref_group)) {
        spl <- VarLevelSplit(var = var,
                            split_label = split_label,
                            labels_var = labels_var,
                            split_format = format,
                            child_labels = child_labels,
                            split_fun = split_fun,
                            extra_args = extra_args)
    } else {
        spl <- VarLevWBaselineSplit(var = var,
                                   ref_group = ref_group,
                                   split_label = split_label,
                                   split_fun = split_fun,
                                   labels_var = labels_var,
                                   split_format = format)

    }
    pos <- next_cpos(lyt, nested)
    split_cols(lyt, spl, pos)
}


setGeneric(".tl_indent_inner", function(lyt) standardGeneric(".tl_indent_inner"))
setMethod(".tl_indent_inner", "PreDataTableLayouts",
          function(lyt) .tl_indent_inner(rlayout(lyt)))
setMethod(".tl_indent_inner", "PreDataRowLayout",
          function(lyt) {
    if(length(lyt) == 0 || length(lyt[[1]]) == 0)
        0L
    else
        .tl_indent_inner(lyt[[length(lyt)]])
})

setMethod(".tl_indent_inner", "SplitVector",
          function(lyt) {
    sum(vapply(lyt, function(x) label_position(x) == "topleft", TRUE)) - 1L
}) ##length(lyt)  - 1L)


.tl_indent <- function(lyt, nested = TRUE) {
    if(!nested)
        0L
    else
        .tl_indent_inner(lyt)
}


#' Add Rows according to levels of a variable
#'
#'
#' @inheritSection custom_split_funs Custom Splitting Function Details
#'
#'
#' @inheritParams lyt_args
#'
#' @note
#' If \code{var} is a factor with empty unobserved levels and
#' \code{labels_var} is specified, it must also be a factor
#' with the same number of levels as \code{var}. Currently the
#' error that occurs when this is not the case is not very informative,
#' but that will change in the future.
#'
#' @export
#' @author Gabriel Becker
#' @inherit split_cols_by return
#' @examples
#'
#' lyt <- basic_table() %>%
#'     split_cols_by("ARM") %>%
#'     split_rows_by("RACE", split_fun = drop_split_levels) %>%
#'     analyze("AGE", mean, var_labels = "Age", format = "xx.xx")
#'
#' tbl <- build_table(lyt, DM)
#' tbl
#'
#' lyt2 <- basic_table() %>%
#'     split_cols_by("ARM") %>%
#'     split_rows_by("RACE") %>%
#'     analyze("AGE", mean, var_labels = "Age", format = "xx.xx")
#'
#' tbl2 <- build_table(lyt2, DM)
#' tbl2
#'
#' lyt3 <- basic_table() %>%
#'     split_cols_by("ARM") %>%
#'     split_cols_by("SEX") %>%
#'     summarize_row_groups(label_fstr = "Overall (N)") %>%
#'     split_rows_by("RACE", split_label = "Ethnicity", labels_var = "ethn_lab",
#'                   split_fun = drop_split_levels) %>%
#'     summarize_row_groups("RACE", label_fstr = "%s (n)") %>%
#'     analyze("AGE", var_labels = "Age", afun = mean, format = "xx.xx")
#'
#' lyt3
#'
#' library(dplyr)
#' DM2 <- DM %>%
#'     filter(SEX %in% c("M", "F")) %>%
#'     mutate(
#'         SEX = droplevels(SEX),
#'         gender_lab = c("F" = "Female", "M" = "Male",
#'                        "U" = "Unknown",
#'                        "UNDIFFERENTIATED" = "Undifferentiated")[SEX],
#'         ethn_lab = c(
#'             "ASIAN" = "Asian",
#'             "BLACK OR AFRICAN AMERICAN" = "Black or African American",
#'             "WHITE" = "White",
#'             "AMERICAN INDIAN OR ALASKA NATIVE" = "American Indian or Alaska Native",
#'              "MULTIPLE" = "Multiple",
#'              "NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER" =
#'                  "Native Hawaiian or Other Pacific Islander",
#'              "OTHER" = "Other", "UNKNOWN" = "Unknown"
#'         )[RACE]
#'     )
#'
#' tbl3 <- build_table(lyt3, DM2)
#' tbl3
#'
split_rows_by <- function(lyt,
                         var,
                         labels_var = var,
                         split_label = var,
                         split_fun = NULL,
                         format = NULL,
                         na_str = NA_character_,
                         nested = TRUE,
                         child_labels = c("default", "visible", "hidden"),
                         label_pos = "hidden",
                         indent_mod = 0L,
                         page_by = FALSE,
                         page_prefix = split_label,
                         section_div = NA_character_) {
    label_pos <- match.arg(label_pos, label_pos_values)
    child_labels <- match.arg(child_labels)
    spl <- VarLevelSplit(var = var,
                        split_label = split_label,
                        label_pos = label_pos,
                        labels_var = labels_var,
                        split_fun = split_fun,
                        split_format = format,
                        split_na_str = na_str,
                        child_labels = child_labels,
                        indent_mod = indent_mod,
                        page_prefix = if(page_by) page_prefix else NA_character_,
                        section_div = section_div)

    pos <- next_rpos(lyt, nested)
    ret <- split_rows(lyt, spl, pos)

    ret
}


#' Associate Multiple Variables with Columns
#'
#' In some cases, the variable to be ultimately analyzed is most naturally
#' defined on a column, not a row basis. When we need columns to reflect
#' different variables entirely, rather than different levels of a single
#' variable, we use `split_cols_by_multivar`
#'
#' @inheritParams lyt_args
#'
#' @export
#'
#' @author Gabriel Becker
#'
#' @seealso [analyze_colvars()]
#' @inherit split_cols_by return
#' @examples
#'
#' library(dplyr)
#' ANL <- DM %>% mutate(value = rnorm(n()), pctdiff = runif(n()))
#'
#' ## toy example where we take the mean of the first variable and the
#' ## count of >.5 for the second.
#' colfuns <- list(function(x) in_rows(mean = mean(x), .formats = "xx.x"),
#'                 function(x) in_rows("# x > 5" = sum(x > .5), .formats = "xx"))
#'
#' lyt <- basic_table() %>%
#'     split_cols_by("ARM") %>%
#'     split_cols_by_multivar(c("value", "pctdiff")) %>%
#'     split_rows_by("RACE", split_label = "ethnicity",
#'                   split_fun = drop_split_levels) %>%
#'     summarize_row_groups() %>%
#'     analyze_colvars(afun = colfuns)
#' lyt
#'
#' tbl <- build_table(lyt, ANL)
#' tbl
#'
split_cols_by_multivar <- function(lyt,
                                  vars,
                                  split_fun = NULL,
                                  varlabels = vars,
                                  varnames = NULL,
                                  nested = TRUE,
                                  extra_args = list()) {
    spl <- MultiVarSplit(vars = vars, split_label = "",
                         varlabels = varlabels,
                         varnames = varnames,
                         split_fun = split_fun,
                         extra_args = extra_args)
    pos <- next_cpos(lyt, nested)
    split_cols(lyt, spl, pos)
}

#' Associate Multiple Variables with Rows
#'
#' When we need rows to reflect different variables rather than different
#' levels of a single variable, we use `split_rows_by_multivar`.
#'
#' @inheritParams lyt_args
#' @inherit split_rows_by return
#'
#' @seealso [split_rows_by()] for typical row splitting, and
#' [split_cols_by_multivar()] to perform the same type of split on a column basis.
#' @export
#'
#' @examples
#' lyt <- basic_table() %>%
#'     split_cols_by("ARM") %>%
#'     split_rows_by_multivar(c("SEX", "STRATA1")) %>%
#'     summarize_row_groups() %>%
#'     analyze(c("AGE", "SEX"))
#'
#' tbl <- build_table(lyt, DM)
#' tbl
#'
split_rows_by_multivar <- function(lyt,
                                   vars,
                                   split_fun = NULL,
                                   split_label = "",
                                   varlabels = vars,
                                   format = NULL,
                                   na_str = NA_character_,
                                   nested = TRUE,
                                   child_labels = c("default", "visible", "hidden"),
                                   indent_mod = 0L,
                                   section_div = NA_character_,
                                   extra_args = list()) {
    child_labels <- match.arg(child_labels)
    spl <- MultiVarSplit(vars = vars, split_label = split_label, varlabels,
                         split_format = format,
                         split_na_str = na_str,
                         child_labels = child_labels,
                         indent_mod = indent_mod,
                         split_fun = split_fun,
                         section_div = section_div,
                         extra_args = extra_args)
    pos <- next_rpos(lyt, nested)
    split_rows(lyt, spl, pos)
}

#' Split on static or dynamic cuts of the data
#'
#' Create columns (or row splits) based on values (such as quartiles) of
#' \code{var}.
#'
#' @inheritParams lyt_args
#' @param cuts numeric. Cuts to use
#' @param cutlabels character (or NULL). Labels for the cuts
#' @param cumulative logical. Should the cuts be treated as cumulative. Defaults
#'   to \code{FALSE}
#' @param cutfun function. Function which accepts the full vector of \code{var}
#'   values and returns cut points to be passed to \code{cut}.
#'
#'
#' @details For dynamic cuts, the cut is transformed into a static cut by
#' \code{\link{build_table}} \emph{based on the full dataset}, before
#' proceeding. Thus even when nested within another split in column/row space,
#' the resulting split will reflect the overall values (e.g., quartiles) in the
#' dataset, NOT the values for subset  it is nested under.
#'
#' @export
#'
#' @rdname varcuts
#' @inherit split_cols_by return
#' @author Gabriel Becker
#'
#' @examples
#' library(dplyr)
#'
#' # split_cols_by_cuts
#' lyt <- basic_table() %>%
#'     split_cols_by("ARM") %>%
#'     split_cols_by_cuts("AGE", split_label = "Age",
#'                        cuts = c(0, 25, 35, 1000),
#'                        cutlabels = c("young", "medium", "old")) %>%
#'     analyze(c("BMRKR2", "STRATA2")) %>%
#'     append_topleft("counts")
#'
#' tbl <- build_table(lyt, ex_adsl)
#' tbl
#'
#' # split_rows_by_cuts
#' lyt2 <- basic_table() %>%
#'     split_cols_by("ARM") %>%
#'     split_rows_by_cuts("AGE", split_label = "Age",
#'                   cuts = c(0, 25, 35, 1000),
#'                   cutlabels = c("young", "medium", "old")) %>%
#'     analyze(c("BMRKR2", "STRATA2")) %>%
#'     append_topleft("counts")
#'
#'
#' tbl2 <- build_table(lyt2, ex_adsl)
#' tbl2
#'
#' # split_cols_by_quartiles
#'
#' lyt3 <- basic_table() %>%
#'     split_cols_by("ARM") %>%
#'     split_cols_by_quartiles("AGE", split_label = "Age") %>%
#'     analyze(c("BMRKR2", "STRATA2")) %>%
#'     append_topleft("counts")
#'
#' tbl3 <- build_table(lyt3, ex_adsl)
#' tbl3
#'
#' # split_rows_by_quartiles
#' lyt4 <- basic_table(show_colcounts = TRUE) %>%
#'     split_cols_by("ARM") %>%
#'     split_rows_by_quartiles("AGE", split_label = "Age") %>%
#'     analyze("BMRKR2") %>%
#'     append_topleft(c("Age Quartiles", " Counts BMRKR2"))
#'
#' tbl4 <- build_table(lyt4, ex_adsl)
#' tbl4
#'
#' # split_cols_by_cutfun
#' cutfun <- function(x) {
#'   cutpoints <- c(
#'     min(x),
#'     mean(x),
#'     max(x)
#'   )
#'
#'   names(cutpoints) <- c("", "Younger", "Older")
#'   cutpoints
#' }
#'
#' lyt5 <- basic_table() %>%
#'   split_cols_by_cutfun("AGE", cutfun = cutfun) %>%
#'   analyze("SEX")
#'
#' tbl5 <- build_table(lyt5, ex_adsl)
#' tbl5
#'
#' # split_rows_by_cutfun
#' lyt6 <- basic_table() %>%
#'   split_cols_by("SEX") %>%
#'   split_rows_by_cutfun("AGE", cutfun = cutfun) %>%
#'   analyze("BMRKR2")
#'
#' tbl6 <- build_table(lyt6, ex_adsl)
#' tbl6
#'
split_cols_by_cuts <- function(lyt, var, cuts,
                              cutlabels = NULL,
                              split_label = var,
                              nested = TRUE,
                              cumulative = FALSE) {
    spl <- make_static_cut_split(var = var,
                                split_label = split_label,
                                cuts = cuts,
                                cutlabels = cutlabels,
                                cumulative = cumulative)
    ## if(cumulative)
    ##     spl = as(spl, "CumulativeCutSplit")
    pos <- next_cpos(lyt, nested)
    split_cols(lyt, spl, pos)
}

#' @export
#' @rdname varcuts
split_rows_by_cuts <- function(lyt, var, cuts,
                              cutlabels = NULL,
                              split_label = var,
                              format = NULL,
                              na_str = NA_character_,
                              nested = TRUE,
                              cumulative = FALSE,
                              label_pos = "hidden",
                              section_div = NA_character_) {
    label_pos <- match.arg(label_pos, label_pos_values)
##    VarStaticCutSplit(
    spl <- make_static_cut_split(var, split_label, cuts = cuts,
                                 cutlabels = cutlabels,
                                 split_format = format,
                                 split_na_str = na_str,
                                 label_pos = label_pos,
                                 cumulative = cumulative,
                                 section_div = section_div)
    ## if(cumulative)
    ##     spl = as(spl, "CumulativeCutSplit")
    pos <- next_rpos(lyt, nested)
    split_rows(lyt, spl, pos)
}

#' @export
#' @rdname varcuts
split_cols_by_cutfun <- function(lyt, var,
                                cutfun = qtile_cuts,
                                cutlabelfun = function(x) NULL,
                                split_label = var,
                                nested = TRUE,
                                extra_args = list(),
                                cumulative = FALSE
                                ) {
    spl <- VarDynCutSplit(var, split_label,
                         cutfun = cutfun,
                         cutlabelfun = cutlabelfun,
                         extra_args = extra_args,
                         cumulative = cumulative,
                         label_pos = "hidden")
    pos <- next_cpos(lyt, nested)
    split_cols(lyt, spl, pos)
}

#' @export
#' @rdname varcuts
split_cols_by_quartiles <- function(lyt, var, split_label = var,
                                    nested = TRUE,
                                    extra_args = list(),
                                    cumulative = FALSE) {
    split_cols_by_cutfun(lyt = lyt,
                         var = var,
                         split_label = split_label,
                         cutfun = qtile_cuts,
                         cutlabelfun = function(x) c("[min, Q1]",
                                                   "(Q1, Q2]",
                                                   "(Q2, Q3]",
                                                   "(Q3, max]"),
                         nested = nested,
                         extra_args = extra_args,
                         cumulative = cumulative)
    ## spl = VarDynCutSplit(var, split_label, cutfun = qtile_cuts,
    ##                      cutlabelfun = function(x) c("[min, Q1]",
    ##                                                "(Q1, Q2]",
    ##                                                "(Q2, Q3]",
    ##                                                "(Q3, max]"),
    ##                      split_format = format,
    ##                      extra_args = extra_args,
    ##                      cumulative = cumulative,
    ##                      label_pos = "hidden")
    ## pos = next_cpos(lyt, nested)
    ## split_cols(lyt, spl, pos)
}


#' @export
#' @rdname varcuts
split_rows_by_quartiles <- function(lyt, var, split_label = var,
                                    format = NULL,
                                    na_str = NA_character_,
                                    nested = TRUE,
                                    child_labels = c("default", "visible", "hidden"),
                                    extra_args = list(),
                                    cumulative = FALSE,
                                    indent_mod = 0L,
                                    label_pos = "hidden",
                                    section_div = NA_character_) {
    split_rows_by_cutfun(lyt = lyt,
                         var = var,
                         split_label = split_label,
                         format = format,
                         na_str = na_str,
                         cutfun = qtile_cuts,
                         cutlabelfun = function(x) c("[min, Q1]",
                                                   "(Q1, Q2]",
                                                   "(Q2, Q3]",
                                                   "(Q3, max]"),
                         nested = nested,
                         child_labels = child_labels,
                         extra_args = extra_args,
                         cumulative = cumulative,
                         indent_mod = indent_mod,
                         label_pos = label_pos,
                         section_div = section_div)

    ## label_pos <- match.arg(label_pos, label_pos_values)
    ## spl = VarDynCutSplit(var, split_label, cutfun = qtile_cuts,
    ##                      cutlabelfun = ,
    ##                      split_format = format,
    ##                      child_labels = child_labels,
    ##                      extra_args = extra_args,
    ##                      cumulative = cumulative,
    ##                      indent_mod = indent_mod,
    ##                      label_pos = label_pos)
    ## pos = next_rpos(lyt, nested)
    ## split_rows(lyt, spl, pos)
}



qtile_cuts <- function(x) {
    ret <- quantile(x)
    names(ret) <- c("",
                   "1st qrtile",
                    "2nd qrtile",
                    "3rd qrtile",
                    "4th qrtile")
    ret
}

#' @export
#' @rdname varcuts
split_rows_by_cutfun <- function(lyt, var,
                                cutfun = qtile_cuts,
                                cutlabelfun = function(x) NULL,
                                split_label = var,
                                format = NULL,
                                na_str = NA_character_,
                                nested = TRUE,
                                child_labels = c("default", "visible", "hidden"),
                                extra_args = list(),
                                cumulative = FALSE,
                                indent_mod = 0L,
                                label_pos = "hidden",
                                section_div = NA_character_) {
    label_pos <- match.arg(label_pos, label_pos_values)
    child_labels <- match.arg(child_labels)
    spl <- VarDynCutSplit(var, split_label, cutfun = cutfun,
                         cutlabelfun = cutlabelfun,
                         split_format = format,
                         split_na_str = na_str,
                         child_labels = child_labels,
                         extra_args = extra_args,
                         cumulative = cumulative,
                         indent_mod = indent_mod,
                         label_pos = label_pos,
                         section_div = section_div)
    pos <- next_rpos(lyt, nested)
    split_rows(lyt, spl, pos)
}


#' @title .spl_context within analysis and split functions
#' 
#' @description
#' `.spl_context` is an optional parameter for any of `rtables`' special 
#' functions, them being `afun` (analysis function in [analyze]), 
#' `cfun` (content or label function in [summarize_row_groups]),
#' or `split_fun` (e.g. for [split_rows_by]).
#' 
#'
#' @details
#' The `.spl_context` `data.frame` gives information about the subsets of data
#' corresponding to the splits within-which the current `analyze` action is
#' nested. Taken together, these correspond to the path that the resulting (set
#' of) rows the analysis function is creating, although the information is in a
#' slightly different form. Each split (which correspond to groups of rows in
#' the resulting table), as well as the initial 'root' "split", is represented
#' via the following columns:
#' \describe{
#'   \item{split}{The name of the split (often the variable being split in the
#'     simple case)}
#'   \item{value}{The string representation of the value at that split}
#'   \item{full_parent_df}{a dataframe containing the full data (i.e. across all
#'     columns) corresponding to the path defined by the combination of `split`
#'     and `value` of this row \emph{and all rows above this row}}
#'   \item{all_cols_n}{the number of observations  corresponding to this row
#'     grouping (union of all columns)}
#'   \item{\emph{(row-split and analyze contexts only)} <1 column for each
#'     column in the table structure}{ These list columns (named the same as
#'     \code{names(col_exprs(tab))}) contain logical vectors corresponding to
#'     the subset of this row's `full_parent_df` corresponding to that column}
#'   \item{cur_col_id}{Identifier of the current column. This may be an internal
#'     name, constructed by pasting the column path together}
#'   \item{cur_col_subset}{List column containing logical vectors indicating the
#'     subset of that row's `full_parent_df` for the column currently being
#'     created by the analysis function}
#'   \item{cur_col_expr}{List of current column expression. This may be used to 
#'     filter `.alt_df_row` or any external data by column. Filtering `.alt_df_row`
#'     by columns produces `.alt_df`.}
#'   \item{cur_col_n}{integer column containing the observation counts for that
#'     split}
#'   \item{cur_col_split}{Current column split names. This is recovered from the
#'     current column path}
#'   \item{cur_col_split_val}{Current column split values. This is recovered from the
#'     current column path}
#' }
#'
#' \emph{note Within analysis functions that accept `.spl_context`, the
#' `all_cols_n` and `cur_col_n` columns of the dataframe will contain the 'true'
#' observation counts corresponding to the row-group and row-group x column
#' subsets of the data. These numbers will not, and currently cannot, reflect
#' alternate column observation counts provided by the `alt_counts_df`,
#' `col_counts` or `col_total` arguments to \code{\link{build_table}}}
#' 
#' @name spl_context
NULL

#' @title Additional parameters within analysis and content functions 
#' (`afun/cfun`)
#' 
#' @description 
#' It is possible to add specific parameters to `afun` and `cfun`, in [analyze]
#' and [summarize_row_groups] respectively. These parameters grant access to 
#' relevant information like the row split structure (see [spl_context]) and the
#' predefined baseline (`.ref_group`).
#' 
#' @details
#' We list and describe here all the parameters that can be added to a custom 
#' analysis function:
#' 
#' \describe{
#'   \item{.N_col}{column-wise N (column count) for the full column being
#'     tabulated within}
#'   \item{.N_total}{overall N (all observation count, defined as sum of column
#'     counts) for the tabulation}
#'   \item{.N_row}{row-wise N (row group count) for the group of observations
#'     being analyzed (i.e. with no column-based subsetting)}
#'   \item{.df_row}{data.frame for observations in the row group being analyzed
#'     (i.e. with no column-based subsetting)}
#'   \item{.var}{variable that is analyzed}
#'   \item{.ref_group}{data.frame or vector of subset corresponding to the
#'     `ref_group` column including subsetting defined by row-splitting.
#'     Optional and only required/meaningful if a `ref_group` column has been
#'     defined}
#'   \item{.ref_full}{data.frame or vector of subset corresponding to the
#'     `ref_group` column without subsetting defined by row-splitting. Optional
#'     and only required/meaningful if a `ref_group` column has been defined}
#'   \item{.in_ref_col}{boolean indicates if calculation is done for cells
#'     within the reference column}
#'   \item{.spl_context}{data.frame, each row gives information about a
#'     previous/'ancestor' split state. See \code{\link{spl_context}}}
#'   \item{.alt_df_row}{data.frame, i.e. the `alt_count_df` after 
#'     row splitting. It can be used with `.all_col_exprs` and `.spl_context`
#'     information to retrieve current faceting, but for `alt_count_df`.
#'     It can be an empty table if all the entries are filtered out.}
#'   \item{.alt_df}{data.frame, `.alt_df_row` but filtered by columns expression.
#'     This data present the same faceting of main data `df`. This also filters
#'     `NAs` out if related parameters are set to (e.g. `inclNAs` in [analyze]). 
#'     Similarly to `.alt_df_row`, it can be an empty table if all the entries 
#'     are filtered out.}
#'   \item{.all_col_exprs}{list of expressions. Each of them represents a 
#'     different column splitting.}
#'   \item{.all_col_counts}{vector of integers. Each of them represents the global
#'     count for each column. It differs if `alt_counts_df` is used 
#'     (see [build_table]).}
#' }
#' 
#' @note If any of these formals is specified incorrectly or not 
#'  present in the tabulation machinery, it will be as if missing. For
#'  example`.ref_group` will be missing if no baseline is previously defined
#'  during data splitting (via `ref_group` parameters in, e.g., [split_rows_by]).
#'  Similarly, if no `alt_counts_df` is provided into [build_table], 
#'  `.alt_df_row` and `.alt_df` will not be present.
#'
#' @name additional_fun_params
NULL

#' Generate Rows Analyzing Variables Across Columns
#'
#' Adding *analyzed variables* to our table layout defines the primary
#' tabulation to be performed. We do this by adding calls to \code{analyze}
#' and/or \code{\link{analyze_colvars}} into our layout pipeline. As with adding
#' further splitting, the tabulation will occur at the current/next level of
#' nesting by default.
#'
#' @inheritParams lyt_args
#' 
#' @inherit split_cols_by return
#'
#' @details
#'
#' When non-NULL \code{format} is used to specify formats for all generated
#' rows, and can be a character vector, a function, or a list of functions. It
#' will be repped out to the number of rows once this is known during the
#' tabulation process, but will be overridden by formats specified within
#' \code{rcell} calls in \code{afun}.
#'
#' The analysis function (\code{afun}) should take as its first parameter either
#' \code{x} or \code{df}. Which of these the function accepts changes the
#' behavior when tabulation is performed.
#'
#' \itemize{
#'   \item{
#'   If \code{afun}'s first parameter is x, it will receive the corresponding
#'   subset \emph{vector} of data from the relevant column (from \code{var}
#'   here) of the raw data being used to build the table.
#'   }
#'
#'   \item{
#'   If \code{afun}'s first parameter is \code{df}, it will receive the
#'   corresponding subset \emph{data.frame} (i.e. all columns) of the raw data
#'   being tabulated
#'   }
#' }
#'
#' In addition to differentiation on the first argument, the analysis function
#' can optionally accept a number of other parameters which, \emph{if and only
#' if} present in the formals will be passed to the function by the tabulation
#' machinery. These are listed and described in [`additional_fun_params`].
#' 
#' @note None of the arguments described in the Details section
#' can be overridden via \code{extra_args} or when calling
#' \code{\link{make_afun}}. \code{.N_col} and \code{.N_total} can
#' be overridden via the \code{col_counts} argument to
#' \code{\link{build_table}}. Alternative values for the others
#' must be calculated within \code{afun} based on a combination
#' of extra arguments and the unmodified values provided by the
#' tabulation framework.
#'
#' @export
#'
#' @author Gabriel Becker
#'
#'
#' @examples
#'
#' lyt <- basic_table() %>%
#'     split_cols_by("ARM") %>%
#'     analyze("AGE", afun = list_wrap_x(summary) , format = "xx.xx")
#' lyt
#'
#' tbl <- build_table(lyt, DM)
#' tbl
#'
#' lyt2 <- basic_table() %>%
#'     split_cols_by("Species") %>%
#'     analyze(head(names(iris), -1), afun = function(x) {
#'         list(
#'             "mean / sd" = rcell(c(mean(x), sd(x)), format = "xx.xx (xx.xx)"),
#'             "range" = rcell(diff(range(x)), format = "xx.xx")
#'         )
#'     })
#' lyt2
#'
#' tbl2 <- build_table(lyt2, iris)
#' tbl2
#'
analyze <- function(lyt,
                   vars,
                   afun = simple_analysis,
                   var_labels = vars,
                   table_names = vars,
                   format = NULL,
                   na_str = NA_character_,
                   nested = TRUE,
                   ## can't name this na_rmsymbol conflict with possible afuns!!
                   inclNAs = FALSE,
                   extra_args = list(),
                   show_labels = c("default", "visible", "hidden"),
                   indent_mod = 0L,
                   section_div = NA_character_) {
    show_labels <- match.arg(show_labels)
    subafun <- substitute(afun)
    if(is.name(subafun) &&
       is.function(afun) &&
       ## this is gross. basically testing
       ## if the symbol we have corresponds
       ## in some meaningful way to the function
       ## we will be calling.
       identical(mget(as.character(subafun),
                      mode = "function",
                      ifnotfound = list(NULL),
                      inherits = TRUE
                      )[[1]], afun)) {
        defrowlab <- as.character(subafun)
    } else {
        defrowlab <- var_labels
    }

    spl <- AnalyzeMultiVars(vars, var_labels,
                          afun = afun,
                          split_format = format,
                          split_na_str = na_str,
                          defrowlab = defrowlab,
                          inclNAs = inclNAs,
                          extra_args = extra_args,
                          indent_mod = indent_mod,
                          child_names = table_names,
                          child_labels = show_labels,
                          section_div = section_div)

    if(nested &&
       (is(last_rowsplit(lyt), "VAnalyzeSplit") ||
        is(last_rowsplit(lyt), "AnalyzeMultiVars"))) {
        cmpnd_last_rowsplit(lyt, spl, AnalyzeMultiVars)
    } else {
    ## analysis compounding now done in split_rows
        pos <- next_rpos(lyt, nested)
        split_rows(lyt, spl, pos)
    }
}



get_acolvar_name  <- function(lyt) {
    ## clyt <- clayout(lyt)
    ## stopifnot(length(clyt) == 1L)
    ## vec = clyt[[1]]
    ## vcls = vapply(vec, class, "")
    ## pos = max(which(vcls ==  "MultiVarSplit"))
    paste(c("ac", get_acolvar_vars(lyt)), collapse = "_")
}


get_acolvar_vars <- function(lyt) {
    clyt <- clayout(lyt)
    stopifnot(length(clyt) == 1L)
    vec <- clyt[[1]]
    vcls <- vapply(vec, class, "")
    pos <- which(vcls ==  "MultiVarSplit")
    if(length(pos) > 0)
        spl_payload(vec[[pos]])
    else
        "non_multivar"
}


#' Generate Rows Analyzing Different Variables Across Columns
#'
#' @inheritParams  lyt_args
#'
#' @param afun function or list. Function(s) to be used to calculate the values
#'   in each column.  The list will be repped out as needed and matched by
#'   position with the columns during tabulation. This functions accepts the same
#'   parameters as [analyze] like `afun` and `format`. For further information see 
#'   [`additional_fun_params`].
#'
#' @export
#'
#' @inherit split_cols_by return
#'
#' @seealso [split_cols_by_multivar()]
#'
#' @author Gabriel Becker
#'
#' @examples
#'
#' library(dplyr)
#' ANL <- DM %>% mutate(value = rnorm(n()), pctdiff = runif(n()))
#'
#' ## toy example where we take the mean of the first variable and the
#' ## count of >.5 for the second.
#' colfuns <- list(function(x) rcell(mean(x), format = "xx.x"),
#'                 function(x) rcell(sum(x > .5), format = "xx"))
#'
#' lyt <- basic_table() %>%
#'     split_cols_by("ARM") %>%
#'     split_cols_by_multivar(c("value", "pctdiff")) %>%
#'     split_rows_by("RACE", split_label = "ethnicity",
#'                   split_fun = drop_split_levels) %>%
#'     summarize_row_groups() %>%
#'     analyze_colvars(afun = colfuns)
#' lyt
#'
#' tbl <- build_table(lyt, ANL)
#' tbl
#'
#' lyt2 <- basic_table() %>%
#'     split_cols_by("ARM") %>%
#'     split_cols_by_multivar(c("value", "pctdiff"),
#'                            varlabels = c("Measurement", "Pct Diff")) %>%
#'     split_rows_by("RACE", split_label = "ethnicity",
#'                   split_fun = drop_split_levels) %>%
#'     summarize_row_groups() %>%
#'     analyze_colvars(afun = mean, format = "xx.xx")
#'
#' tbl2 <- build_table(lyt2, ANL)
#' tbl2
#'
analyze_colvars <- function(lyt, afun,
                           format = NULL,
                           nested = TRUE,
                           extra_args = list(),
                           indent_mod = 0L,
                           inclNAs = FALSE) {
    if(is.function(afun)) {
        subafun <- substitute(afun)
        if(is.name(subafun) &&
           is.function(afun) &&
           ## this is gross. basically testing
           ## if the symbol we have corresponds
           ## in some meaningful way to the function
           ## we will be calling.
           identical(mget(as.character(subafun),
                          mode = "function",
                          ifnotfound = list(NULL),
                          inherits = TRUE
                          )[[1]], afun)) {
            defrowlab <- as.character(subafun)
        } else {
            defrowlab <- ""
        }
        afun <- lapply(get_acolvar_vars(lyt),
                      function(x) afun)
    } else {
        defrowlab <- ""
    }
    spl <- AnalyzeColVarSplit(afun = afun,
                          defrowlab = defrowlab,
                          split_format = format,
                          split_name = get_acolvar_name(lyt),
                          indent_mod = indent_mod,
                          extra_args = extra_args,
                          inclNAs = inclNAs)
    pos <- next_rpos(lyt, nested, for_analyze = TRUE)
    split_rows(lyt, spl, pos)
}

## Add a total column at the next **top level** spot in
## the column layout.

#' Add Overall Column
#'
#' @description This function will \emph{only} add an overall
#' column at the \emph{top} level of splitting, NOT within
#' existing column splits.
#' See \code{\link{add_overall_level}} for the recommended
#' way to add overall columns more generally within existing splits.
#'
#' @inheritParams lyt_args
#'
#' @inherit split_cols_by return
#'
#' @export
#'
#' @seealso [add_overall_level()]
#'
#' @examples
#' lyt <- basic_table() %>%
#'    split_cols_by("ARM") %>%
#'    add_overall_col("All Patients") %>%
#'    analyze("AGE")
#' lyt
#'
#' tbl <- build_table(lyt, DM)
#' tbl
#'
add_overall_col <- function(lyt, label) {
    spl <- AllSplit(label)
    split_cols(lyt,
                  spl,
                  next_cpos(lyt, FALSE))
}


#'
#' @inheritParams lyt_args
#' @export
#'
#' @rdname int_methods
setGeneric(".add_row_summary",
           function(lyt,
                    label,
                    cfun,
                    child_labels = c("default", "visible", "hidden"),
                    cformat = NULL,
                    cna_str = "-",
                    indent_mod = 0L,
                    cvar = "",
                    extra_args = list()) standardGeneric(".add_row_summary"))
#' @rdname int_methods
setMethod(".add_row_summary", "PreDataTableLayouts",
          function(lyt,
                   label,
                   cfun,
                   child_labels = c("default", "visible", "hidden"),
                   cformat = NULL,
                   cna_str = "-",
                   indent_mod = 0L,
                   cvar = "",
                   extra_args = list()) {
    child_labels <- match.arg(child_labels)
    tmp <- .add_row_summary(rlayout(lyt), label, cfun,
                      child_labels = child_labels,
                      cformat = cformat,
                      cna_str = cna_str,
                      indent_mod = indent_mod,
                      cvar = cvar,
                      extra_args = extra_args)
    rlayout(lyt) <- tmp
    lyt
})
#' @rdname int_methods
setMethod(".add_row_summary", "PreDataRowLayout",
          function(lyt,
                   label,
                   cfun,
                   child_labels = c("default", "visible", "hidden"),
                   cformat = NULL,
                   cna_str = "-",
                   indent_mod = 0L,
                   cvar = "",
                   extra_args = list()) {
    child_labels <- match.arg(child_labels)
    if(length(lyt) == 0 ||
       (length(lyt) == 1 && length(lyt[[1]]) == 0)) {
        ## XXX ignoring indent mod here
        rt <- root_spl(lyt)
        rt <- .add_row_summary(rt,
                              label,
                              cfun,
                              child_labels = child_labels,
                              cformat = cformat,
                              cna_str = cna_str,
                              cvar = cvar,
                              extra_args = extra_args)
        root_spl(lyt) <- rt
    } else {
        ind <- length(lyt)
        tmp <- .add_row_summary(lyt[[ind]], label, cfun,
                          child_labels = child_labels,
                          cformat = cformat,
                          cna_str = cna_str,
                          indent_mod = indent_mod,
                          cvar = cvar,
                          extra_args = extra_args)
        lyt[[ind]] <- tmp
    }
    lyt
})
#' @rdname int_methods
setMethod(".add_row_summary", "SplitVector",
          function(lyt,
                   label,
                   cfun,
                   child_labels = c("default", "visible", "hidden"),
                   cformat = NULL,
                   cna_str = "-",
                   indent_mod = 0L,
                   cvar = "",
                   extra_args = list()) {
    child_labels <- match.arg(child_labels)
    ind <- length(lyt)
    if(ind == 0) stop("no split to add content rows at")
    spl <- lyt[[ind]]
    # if(is(spl, "AnalyzeVarSplit"))
    #     stop("can't add content rows to analyze variable split")
    tmp <- .add_row_summary(spl,
                           label,
                           cfun,
                           child_labels = child_labels,
                           cformat = cformat,
                           cna_str = cna_str,
                           indent_mod = indent_mod,
                           cvar = cvar,
                           extra_args = extra_args)
    lyt[[ind]] <- tmp
    lyt
})
#' @rdname int_methods
setMethod(".add_row_summary", "Split",
          function(lyt,
                   label,
                   cfun,
                   child_labels = c("default", "visible", "hidden"),
                   cformat = NULL,
                   cna_str = "-",
                   indent_mod = 0L,
                   cvar = "",
                   extra_args = list()) {
    child_labels <- match.arg(child_labels)
 #   lbl_kids = .labelkids_helper(child_labels)
    content_fun(lyt) <- cfun
    content_indent_mod(lyt) <- indent_mod
    content_var(lyt) <- cvar
    ##obj_format(lyt) = cformat
    content_format(lyt) <- cformat
    if(!identical(child_labels, "default") &&
       !identical(child_labels, label_kids(lyt))) {
        label_kids(lyt) <- child_labels
    }
    content_na_str <- cna_str
    content_extra_args(lyt) <- extra_args
    lyt
})

.count_raw_constr <- function(var, format, label_fstr) {
    function(df, labelstr = "") {
        if(grepl("%s", label_fstr, fixed = TRUE))
            label <- sprintf(label_fstr, labelstr)
        else
            label <- label_fstr
        if(is(df, "data.frame")) {
            if(!is.null(var) && nzchar(var))
                cnt <- sum(!is.na(df[[var]]))
            else
                cnt <- nrow(df)
        } else { # df is the data column vector
            cnt <- sum(!is.na(df))
        }
        ret <- rcell(cnt, format = format,
                    label = label)
        ret
    }
}

.count_wpcts_constr <- function(var, format, label_fstr) {
    function(df, labelstr = "", .N_col) {
        if(grepl("%s", label_fstr, fixed = TRUE))
            label <- sprintf(label_fstr, labelstr)
        else
            label <- label_fstr
        if(is(df, "data.frame")) {
            if(!is.null(var) && nzchar(var))
                cnt <- sum(!is.na(df[[var]]))
            else
                cnt <- nrow(df)
        } else { # df is the data column vector
            cnt <- sum(!is.na(df))
        }
        ## the formatter does the *100 so we don't here.
        ## TODO name elements of this so that ARD generation has access to them
        ## ret <- rcell(c(n = cnt, pct = cnt / .N_col),
        ret <- rcell(c(cnt, cnt / .N_col),
                    format = format,
                    label = label)
        ret
    }
}

.validate_cfuns <- function(fun) {
    if(is.list(fun))
        return(unlist(lapply(fun, .validate_cfuns)))

    frmls <- formals(fun)
    ls_pos <- match("labelstr", names(frmls))
    if(is.na(ls_pos))
        stop("content functions must explicitly accept a 'labelstr' argument")

    list(fun)
}

#' Analysis function to count levels of a factor with percentage of the column total
#'
#' @param x factor. Vector of data, provided by rtables pagination machinery
#' @param .N_col integer(1). Total count for the column, provided by rtables pagination machinery
#'
#' @return A `RowsVerticalSection` object with counts (and percents) for each level of the factor
#' @export
#' @examples
#'
#' counts_wpcts(DM$SEX, 400)
counts_wpcts<- function(x, .N_col) {
    if(!is.factor(x))
        stop("using the 'counts_wpcts' analysis function requires factor data ",
             "to guarantee equal numbers of rows across all collumns, got class ",
             class(x), ".")
    ret <- table(x)
    in_rows(.list = lapply(ret, function(y) rcell(y * c(1, 1/.N_col), format = "xx (xx.x%)")))
}

#' Add a content row of summary counts
#'
#' @inheritParams lyt_args
#'
#' @inherit split_cols_by return
#'
#' @details If `format` expects 1 value (i.e. it is specified as a format string
#'   and `xx` appears for two values (i.e. `xx` appears twice in the
#'   format string) or is specified as a function, then both raw and percent of
#'   column total counts are calculated. If `format` is a format string where
#'   `xx` appears only one time, only raw counts are used.
#'
#' `cfun` must accept `x` or `df` as its first argument. For the `df` argument
#' `cfun` will receive the subset `data.frame` corresponding with the row-
#' and column-splitting for the cell being calculated. Must accept `labelstr` as
#' the second parameter, which accepts the `label` of the level of the parent
#' split currently being summarized. Can additionally take any optional argument
#' supported by analysis functions. (see \code{\link{analyze}}).
#' 
#' In addition, if complex custom functions are needed, we suggest checking the
#' available [additional_fun_params] that apply here as for `afun`.
#'
#' @export
#'
#' @author Gabriel Becker
#'
#' @examples
#'
#' DM2 <- subset(DM, COUNTRY %in% c("USA", "CAN", "CHN"))
#'
#' lyt <- basic_table() %>% split_cols_by("ARM") %>%
#'     split_rows_by("COUNTRY", split_fun = drop_split_levels) %>%
#'     summarize_row_groups(label_fstr = "%s (n)") %>%
#'     analyze("AGE", afun = list_wrap_x(summary) , format = "xx.xx")
#' lyt
#'
#' tbl <- build_table(lyt, DM2)
#' tbl
#'
#' row_paths_summary(tbl) # summary count is a content table
#'
#'
#' ## use a cfun and extra_args to customize summarization
#' ## behavior
#' sfun <- function(x, labelstr, trim) {
#'     in_rows(
#'         c(mean(x, trim = trim), trim),
#'         .formats = "xx.x (xx.x%)",
#'         .labels = sprintf("%s (Trimmed mean and trim %%)",
#'                               labelstr)
#'     )
#' }
#'
#' lyt2 <- basic_table(show_colcounts = TRUE) %>%
#'     split_cols_by("ARM") %>%
#'     split_rows_by("COUNTRY", split_fun = drop_split_levels) %>%
#'     summarize_row_groups("AGE", cfun = sfun,
#'                          extra_args = list(trim = .2)) %>%
#'     analyze("AGE", afun = list_wrap_x(summary) , format = "xx.xx") %>%
#'     append_topleft(c("Country", "  Age"))
#'
#' tbl2 <- build_table(lyt2, DM2)
#' tbl2
#'
summarize_row_groups <- function(lyt,
                                var = "",
                                label_fstr = "%s",
                                format = "xx (xx.x%)",
                                na_str = "-",
                                cfun = NULL,
                                indent_mod = 0L,
                                extra_args = list()) {

    if(is.null(cfun)) {
        if(is.character(format) &&
           length(gregexpr("xx(\\.x*){0,1}", format)[[1]]) == 1) {
            cfun <- .count_raw_constr(var, format, label_fstr)
        } else {
            cfun <- .count_wpcts_constr(var, format, label_fstr)
        }
    }
    cfun <- .validate_cfuns(cfun)
    .add_row_summary(lyt,
                     cfun = cfun,
                     cformat = format,
                     cna_str = na_str,
                     indent_mod = indent_mod,
                     cvar = var,
                     extra_args = extra_args)
}


#' Add the column population counts to the header
#'
#' Add the data derived column counts.
#'
#' @details It is often the case that the the column counts derived from the
#'   input data to `build_table` is not representative of the population counts.
#'   For example, if events are counted in the table and the header should
#'   display the number of subjects and not the total number of events. In that
#'   case use the `col_count` argument in `build_table` to control the counts
#'   displayed in the table header.
#'
#' @inheritParams lyt_args
#'
#' @inherit split_cols_by return
#'
#' @export
#'
#' @author Gabriel Becker
#'
#' @examples
#' lyt <- basic_table() %>% split_cols_by("ARM") %>%
#'     add_colcounts() %>%
#'     split_rows_by("RACE", split_fun = drop_split_levels) %>%
#'     analyze("AGE", afun = function(x) list(min = min(x), max = max(x)))
#' lyt
#'
#' tbl <- build_table(lyt, DM)
#' tbl
#'
add_colcounts <- function(lyt, format = "(N=xx)") {
    if(is.null(lyt))
        lyt <- PreDataTableLayouts()
    disp_ccounts(lyt) <- TRUE
    colcount_format(lyt) <- format
    lyt
}

## Currently existing tables can ONLY be added
## as new entries at the top level, never at any
## level of nesting.
#' Add an already calculated table to the layout
#' @inheritParams lyt_args
#' @inheritParams gen_args
#'
#' @inherit split_cols_by return
#' @export
#' @author Gabriel Becker
#'
#' @examples
#' lyt1 <- basic_table() %>%
#'    split_cols_by("ARM") %>%
#'    analyze("AGE", afun = mean, format = "xx.xx")
#'
#' tbl1 <- build_table(lyt1, DM)
#' tbl1
#'
#' lyt2 <- basic_table() %>% split_cols_by("ARM") %>%
#'    analyze("AGE", afun = sd, format = "xx.xx") %>%
#'    add_existing_table(tbl1)
#'
#' tbl2 <- build_table(lyt2, DM)
#' tbl2
#'
#' table_structure(tbl2)
#' row_paths_summary(tbl2)
#'
add_existing_table <- function(lyt, tt, indent_mod = 0) {
    indent_mod(tt) <- indent_mod
    lyt <- split_rows(lyt,
                        tt,
                        next_rpos(lyt, nested = FALSE))
    lyt
}


## takes_coln = function(f) {
##     stopifnot(is(f, "function"))
##     forms = names(formals(f))
##     res = ".N_col" %in% forms
##     res
## }

## takes_totn = function(f) {
##     stopifnot(is(f, "function"))
##     forms = names(formals(f))
##     res = ".N_total" %in% forms
##     res
## }


## use data to transform dynamic cuts to static cuts
#' @rdname int_methods
setGeneric("fix_dyncuts", function(spl, df) standardGeneric("fix_dyncuts"))
#' @rdname int_methods
setMethod("fix_dyncuts", "Split", function(spl, df) spl)
#' @rdname int_methods
setMethod("fix_dyncuts", "VarDynCutSplit",
          function(spl, df) {

    var <- spl_payload(spl)
    varvec <- df[[var]]

    cfun <- spl_cutfun(spl)
    cuts <- cfun(varvec)
    cutlabels <- spl_cutlabelfun(spl)(cuts)
    if(length(cutlabels) != length(cuts) - 1  &&
       !is.null(names(cuts))) {
        cutlabels <- names(cuts)[-1]
    }

    ret <- make_static_cut_split(var = var, split_label = obj_label(spl),
                                 cuts = cuts, cutlabels = cutlabels,
                                 cumulative = spl_is_cmlcuts(spl))
    ## ret = VarStaticCutSplit(var = var, split_label = obj_label(spl),
    ##                   cuts = cuts, cutlabels = cutlabels)
    ## ## classes are tthe same structurally CumulativeCutSplit
    ## ## is just a sentinal so it can hit different make_subset_expr
    ## ## method
    ## if(spl_is_cmlcuts(spl))
    ##     ret = as(ret, "CumulativeCutSplit")
    ret
})
#' @rdname int_methods
setMethod("fix_dyncuts", "VTableTree",
          function(spl, df) spl)


.fd_helper <- function(spl, df) {
    lst <- lapply(spl, fix_dyncuts, df = df)
    spl@.Data <- lst
    spl

}
#' @rdname int_methods
setMethod("fix_dyncuts", "PreDataRowLayout",
          function(spl, df) {
 #   rt = root_spl(spl)
    ret <- .fd_helper(spl, df)
#    root_spl(ret) = rt
    ret
})
#' @rdname int_methods
setMethod("fix_dyncuts", "PreDataColLayout",
          function(spl, df) {
 #   rt = root_spl(spl)
    ret <- .fd_helper(spl, df)
 #   root_spl(ret) = rt
 #   disp_ccounts(ret) = disp_ccounts(spl)
 #   colcount_format(ret) = colcount_format(spl)
    ret
})
#' @rdname int_methods
setMethod("fix_dyncuts", "SplitVector",
          function(spl, df) {
    .fd_helper(spl, df)
})
#' @rdname int_methods
setMethod("fix_dyncuts", "PreDataTableLayouts",
          function(spl, df) {
    rlayout(spl) <- fix_dyncuts(rlayout(spl), df)
    clayout(spl) <- fix_dyncuts(clayout(spl), df)
    spl
})




## Manual column construction in a simple (seeming
## to the user) way.
#' Manual column declaration
#' @param \dots One or more vectors of levels to appear in the column space. If
#'   more than one set of levels is given, the values of the second are nested
#'   within each value of the first, and so on.
#' @param .lst A list of sets of levels, by default populated via
#'   \code{list(...)}.
#' @return An `InstantiatedColumnInfo` object, suitable for use declaring the
#'   column structure for a manually constructed table.
#' @author Gabriel Becker
#'
#' @export
#'
#' @examples
#' # simple one level column space
#' rows <- lapply(1:5, function(i) {
#'    DataRow(rep(i, times  = 3))})
#' tbl <- TableTree(kids = rows, cinfo = manual_cols(split = c("a", "b", "c")))
#' tbl
#'
#' # manually declared nesting
#' tbl2 <- TableTree(kids = list(DataRow(as.list(1:4))),
#'                  cinfo = manual_cols(Arm = c("Arm A", "Arm B"),
#'                                      Gender = c("M", "F")))
#' tbl2
#'
manual_cols <- function(..., .lst = list(...)) {
    if(is.null(names(.lst)))
        names(.lst) <- paste("colsplit", seq_along(.lst))

    splvec <- SplitVector(lst = mapply(ManualSplit, levels = .lst,
                                       label = names(.lst)))
    ctree <- splitvec_to_coltree(data.frame(), splvec = splvec, pos = TreePos())
    InstantiatedColumnInfo(treelyt = ctree)
}


#' Returns a function that coerces the return values of f to a list
#'
#' @param f The function to wrap.
#' @export
#'
#' @details \code{list_wrap_x} generates a wrapper which takes \code{x} as its
#'   first argument, while \code{list_wrap_df} generates an otherwise identical
#'   wrapper function whose first argument is named \code{df}.
#'
#'   We provide both because when using the functions as tabulation in
#'   \code{\link{analyze}}, functions which take \code{df} as their first
#'   argument are passed the full subset dataframe, while those which accept
#'   anything else {notably including \code{x}} are passed only the relevant
#'   subset of the variable being analyzed.
#'
#' @rdname list_wrap
#' @return A function which calls \code{f} and converts the result to a list of
#'   \code{CellValue} objects.
#' @author Gabriel Becker
#' @examples
#'
#' summary(iris$Sepal.Length)
#'
#' f <- list_wrap_x(summary)
#' f(x = iris$Sepal.Length)
#'
#' f2 <- list_wrap_df(summary)
#' f2(df = iris$Sepal.Length)
#'
list_wrap_x <- function(f) {
    function(x, ...) {
        vs <- as.list(f(x, ...))
        ret <- mapply(function(v, nm) {
            rcell(v, label = nm)
        },
        v = vs,
        nm = names(vs))
        ret

    }
}

#' @rdname list_wrap
#' @export
list_wrap_df <- function(f) {
    function(df, ...) {
        vs <- as.list(f(df, ...))
        ret <- mapply(function(v, nm) {
            rcell(v, label = nm)
        },
        v = vs,
        nm = names(vs))
        ret
    }
}


#' Layout with 1 column and zero rows
#'
#' Every layout must start with a basic table.
#'
#' @export
#' @inheritParams constr_args
#' @param show_colcounts logical(1). Should column counts be displayed in the
#'   resulting table when this layout is applied to data
#' @param colcount_format character(1). Format for use when displaying the
#' column counts. Must be 1d, or 2d where one component is a percent. See
#' details.
#'
#' @details
#'
#' `colcount_format` is ignored if `show_colcounts` is `FALSE` (the default).
#' When `show_colcounts` is `TRUE`, and `colcount_format` is 2-dimensional with
#' a percent component, the value component for the percent is always populated
#' with `1` (i.e. 100%). 1d formats are used to render the counts exactly as they
#' normally would be, while 2d formats which don't include a percent, and all 3d
#' formats result in an error. Formats in the form of functions are not supported for
#' `colcount` format. See [formatters::list_valid_format_labels()] for
#' the list of valid format labels to select from.
#' 
#'
#' @inherit split_cols_by return
#'
#' @note - Because percent components in `colcount_format` are *always*
#' populated with the value 1, we can get arguably strange results, such as
#' that individual arm columns and a combined "all patients" column all
#' list "100%" as their percentage, even though the individual arm columns
#' represent strict subsets of the all patients column.
#'
#' - Note that subtitles ([subtitles()]) and footers ([main_footer()] and [prov_footer()]) 
#' that spans more than one line can be supplied as a character vector to maintain 
#' indentation on multiple lines.
#' 
#' @examples
#'
#' lyt <- basic_table() %>%
#'   analyze("AGE", afun = mean)
#'
#' tbl <- build_table(lyt, DM)
#' tbl
#'
#' lyt2 <- basic_table(title = "Title of table",
#'                    subtitles = c("a number", "of subtitles"),
#'                    main_footer = "test footer",
#'                    prov_footer = paste("test.R program, executed at",
#'                                        Sys.time())) %>%
#'   split_cols_by("ARM") %>%
#'   analyze("AGE", mean)
#'
#' tbl2 <- build_table(lyt2, DM)
#' tbl2
#'
#' lyt3 <- basic_table(show_colcounts = TRUE,
#'                     colcount_format = "xx. (xx.%)") %>%
#'   split_cols_by("ARM")
#'
basic_table <- function(title = "",
                        subtitles = character(),
                        main_footer = character(),
                        prov_footer = character(),
                        show_colcounts = FALSE,
                        colcount_format = "(N=xx)",
                        inset = 0L) {
    inset <- as.integer(inset)
    if(is.na(inset) || inset < 0L)
        stop("Got invalid table_inset value, must be an integer > 0")
    ret <- PreDataTableLayouts(title = title,
                        subtitles = subtitles,
                        main_footer = main_footer,
                        prov_footer = prov_footer,
                        table_inset = as.integer(inset))
    if(show_colcounts)
        ret <- add_colcounts(ret, format = colcount_format)
    ret
}


#' Append a description to the 'top-left' materials for the layout
#'
#' @description This function \emph{adds} \code{newlines} to the current
#' set of "top-left materials".
#' @details
#'
#' Adds \code{newlines} to the set of strings representing the 'top-left'
#' materials declared in the layout (the content displayed to the left of
#' the column labels when the resulting tables are printed).
#'
#' Top-left material strings are stored and then displayed \emph{exactly as is},
#' no structure or indenting is applied to them either when they are added
#' or when they are displayed.
#' @inheritParams lyt_args
#'
#' @inherit split_cols_by return
#'
#' @param newlines character. The new line(s) to be added to the materials
#' @note Currently, where in the construction of the layout this is called
#' makes no difference, as it is independent of the actual splitting keywords.
#' This may change in the future.
#' @note This function is experimental, its name and the details of
#' its behavior are subject to change in future versions.
#'
#' @export
#' @seealso [top_left()]
#'
#' @examples
#' library(dplyr)
#'
#' DM2 <- DM %>% mutate(RACE = factor(RACE), SEX = factor(SEX))
#'
#' lyt <- basic_table() %>%
#'   split_cols_by("ARM") %>%
#'   split_cols_by("SEX") %>%
#'   split_rows_by("RACE") %>%
#'   append_topleft("Ethnicity") %>%
#'   analyze("AGE") %>%
#'   append_topleft("  Age")
#'
#' tbl <- build_table(lyt, DM2)
#' tbl
#'
append_topleft <- function(lyt, newlines) {
    stopifnot(is(lyt, "PreDataTableLayouts"),
              is(newlines, "character"))
    lyt@top_left <- c(lyt@top_left, newlines)
    lyt
}

Try the rtables package in your browser

Any scripts or data that you put into this service are public.

rtables documentation built on Aug. 30, 2023, 5:07 p.m.