Nothing
#' Generate nested count/percent for two or three levels
#'
#' This will call `freq()` multiple times and combine the levels together. This
#' is useful for adverse event and concomitant mediations.
#'
#' @param df (required) dataframe containing the two levels to summarize
#' @param denom_df (optional) dataframe containing records to use as the
#' denominator (default = df)
#' @param colvar (required) treatment variable within df to use to summarize
#' @param tablebyvar (optional) repeat entire table by variable within df.
#' @param rowvar (required) nested levels separated by a star, for example
#' AEBODSYS*AEDECOD, this can handle up to three levels.
#' @param rowbyvar (optional) repeat `rowvar` by variable within df
#' @param statlist (optional) count/percent type to return (default = "n (x.x)")
#' @param decimal (optional) decimal precision root level (default = 1)
#' @param cutoff (optional) numeric value used to cut the data to a percentage
#' threshold, if any column meets the threshold the entire record is kept.
#' @param cutoff_stat (optional) The value to cutoff by, n or pct. (default =
#' 'pct')
#' @param subset (optional) An R expression that will be passed to a
#' `dplyr::filter()` function to subset the data.frame
#' @param descending_by (optional) The column or columns to sort descending
#' values by. Can also provide a named list to do ascending order. ex.
#' c("VarName1" = "asc", "VarName2" = "desc") would sort by VarName1 in
#' ascending order and VarName2 in descending order. If not provided, the
#' columns will be sorted alphabetically.
#' @param display_missing (optional) Should the "missing" values be displayed?
#' (default = FALSE)
#' @param rowtext (optional) A character vector used to rename the `label`
#' column. If named, names will give the new level and values will be the
#' replaced value. If unnamed, and the table has only one row, the `rowtext`
#' will rename the `label` of the row.
#' @param row_header (optional) A character vector to be added to the table.
#' @param .keep (optional) Should the `rowbyvar` and `tablebyvar` be output in the
#' table. If FALSE, `rowbyvar` will still be output in the `label` column.
#' (default = TRUE)
#' @param .ord Should the ordering columns be output with the table? This is
#' useful if a table needs to be merged or reordered in any way after build.
#' @param ... (optional) Named arguments to be included as columns on the table.
#'
#' @return A dataframe of nested results by `colvar` and optional `tablebyvar`.
#' There are a few additional variable sets added to support multiple
#' requirements. \cr \cr The level variables (`level1_`, `level2_`,
#' `level3_`) will carry down the counts for each level to every record. This
#' allows for easy sorting of nested groups. \cr \cr The header variables
#' (`header1`, `header2`, `header3`) will flag the header for each level to
#' ensure each level header is sorted to the top of the level. \cr \cr The n
#' variables ("n_*") provide a numeric variable containing frequency for each
#' `colvar`. This can be used to sort and filter records. \cr \cr The pct
#' variables ("pct_*") provide a numeric variable containing percentages for
#' each `colvar`. This can be used to sort and filter records.\cr \cr
#'
#' @export
#'
#' @examples
#' adae <- data.frame(
#' SITEID = c("100", "100", "100","200", "200", "200"),
#' USUBJID = c("Demo1-101", "Demo1-102", "Demo1-103",
#' "Demo1-104", "Demo1-105", "Demo1-106"),
#' AEBODSYS = c("Cardiac disorders", "Cardiac disorders",
#' "Respiratory, thoracic and mediastinal disorders",
#' "Infections and infestations",
#' "Skin and subcutaneous tissue disorders",
#' "Infections and infestations"),
#' AEDECOD = c("Arrhythmia supraventricular", "Cardiac failure",
#' "Chronic obstructive pulmonary disease", "Pneumonia",
#' "Pustular psoriasis", "Upper respiratory tract infection"),
#' colnbr = structure(
#' c(1L, 2L, 3L, 1L, 2L, 3L),
#' .Label = c("Active", "Placebo", "Comparator"),
#' class = "factor"
#' )
#' )
#'
#' # Frequency and percent for two levels of nesting
#' nested_freq(adae
#' ,colvar = "colnbr"
#' ,rowvar = "AEBODSYS*AEDECOD"
#' ,statlist = statlist("n (x.x%)"))
#'
#' # Frequency and percent for three levels of nesting (for illustrative
#' # purpose)
#' nested_freq(adae
#' ,colvar = "colnbr"
#' ,rowvar = "SITEID*AEBODSYS*AEDECOD"
#' ,statlist = statlist("n (x.x%)"))
#'
#' # Cut records where pct meets threshold for a any column
#' nested_freq(cdisc_adae
#' ,colvar = "TRTA"
#' ,rowvar = "AEBODSYS*AEDECOD"
#' ,statlist = statlist("n (x.x%)", distinct = TRUE)
#' ,cutoff = 2
#' ,cutoff_stat = "n")
#'
#' # Cut records where pct meets threshold for a specific column
#' nested_freq(cdisc_adae
#' ,rowvar = "AEBODSYS*AEDECOD"
#' ,colvar = "TRTAN"
#' ,statlist = statlist("n (x.x%)", distinct = TRUE)
#' ,cutoff = "54 >= 2"
#' ,cutoff_stat = "n")
#'
#' # Frequency and percent for two levels of nesting and sort by descending
#' # active
#' nested_freq(adae
#' ,colvar = "colnbr"
#' ,rowvar = "AEBODSYS*AEDECOD"
#' ,statlist = statlist("n (x.x%)")
#' ,descending = "Active")
#'
#'# Below illustrates how make the same calls to nested_freq() as above, using
#'# table and # column metadata along with generate_results().
#'
#'column_metadata <- tibble::tribble(
#' ~tbltype, ~coldef, ~decode,
#' "type1", "1", "Placebo",
#' "type1", "2", "Low",
#' "type1", "3", "High"
#')
#'
#'# Frequency and percent for two levels of nesting
#'table_metadata <- tibble::tribble(
#' ~anbr, ~func, ~df, ~rowvar, ~tbltype, ~colvar, ~statlist,
#' "1", "nested_freq", "cdisc_adae", "AEBODSYS*AEDECOD", "type1", "TRTP",
#' statlist("n (x.x%)")
#')
#'#generate_results(table_metadata,
#'#column_metadata_file = tidytlg_metadata(path)
#'
#'
#'# Frequency and percent for three levels of nesting (for illustrative purpose)
#'table_metadata <- tibble::tribble(
#' ~anbr, ~func, ~df, ~rowvar, ~tbltype, ~colvar,
#' ~statlist,
#' "1", "nested_freq", "cdisc_adae", "SITEID*AEBODSYS*AEDECOD","type1",
#' "TRTP", statlist("n (x.x%)")
#')
#' # Commented out because it takes too long
#' # generate_results(table_metadata, column_metadata)
#'
#' #Cut records where pct meets threshold for a any column
#'column_metadata <- tibble::tribble(
#' ~tbltype, ~coldef, ~decode,
#' "type2", "1", "Placebo",
#' "type2", "2", "Active"
#')
#'table_metadata <- tibble::tibble(
#' anbr = "1", func = "nested_freq", df= "cdisc_adae",
#' rowvar = "AEBODSYS*AEDECOD",
#' tbltype = "type2", colvar = "TRTP", statlist = statlist("n (x.x%)"),
#' dotdotdot = "cutoff = 5"
#')
#' #generate_results(table_metadata,
#' # column_metadata_file = tidytlg_metadata(path)
#'
#'# Cut records where pct meets threshold for a specific column
#'table_metadata <- tibble::tibble(
#' anbr = "1", func = "nested_freq", df= "cdisc_adae",
#' rowvar = "AEBODSYS*AEDECOD",
#' tbltype = "type2", colvar = "TRTP", statlist = statlist("n (x.x%)"),
#' dotdotdot = "cutoff = 'col1 >= 5'"
#')
#' #generate_results(table_metadata,
#' #column_metadata_file = tidytlg_metadata(path)
#'
#'# Frequency and percent for two levels of nesting and sort by descending col1
#'table_metadata <- tibble::tibble(
#' anbr = "1", func = "nested_freq", df= "cdisc_adae",
#' rowvar = "AEBODSYS*AEDECOD",
#' tbltype = "type2", colvar = "TRTP", statlist = statlist("n (x.x%)"),
#' dotdotdot = "descending = 'col1'"
#')
#' #generate_results(table_metadata,
#' #column_metadata_file = tidytlg_metadata(path)
nested_freq <- function(df,
denom_df = df,
colvar = NULL,
tablebyvar = NULL,
rowvar = NULL,
rowbyvar = NULL,
statlist = getOption("tidytlg.nested_freq.statlist.default"),
decimal = 1,
cutoff = NULL,
cutoff_stat = "pct",
subset = TRUE,
descending_by = NULL,
display_missing = FALSE,
rowtext = NULL,
row_header = NULL,
.keep = TRUE,
.ord = FALSE,
...) {
subset <- rlang::enexprs(subset)
if (length(subset) > 0 && is.character(subset[[1]]))
subset <- as.list(parse(text = subset))
# check all the arguments being passed in except ...
arglist <- list()
args_to_chk <- names(formals())[names(formals()) != "..."]
purrr::walk(
args_to_chk,
.f = function(x) {
arglist[[x]] <<- eval(rlang::sym(x))
}
)
check_nested_freq(arglist)
level <- stringr::str_split(rowvar, "\\s*\\*\\s*", simplify = TRUE)[1, ]
attr(df, "nested") <- TRUE
attr(df, "nested_level") <- level
attr(df, "rowbyvar") <- rowbyvar
# If the statlist has a denoms_by config, use that. Otherwise default to colvar,
# tablebyvar, and rowbyvar
denoms_by <- statlist[["denoms_by"]] %||% c(colvar, tablebyvar)
if (getOption("tidytlg.denoms.message"))
message("denominator set using ", paste0(denoms_by, collapse = ", "),
". You can overwrite using the denom_by argument in statlist.\n", sep = "")
colvar_cols <- levels(as.factor(df[[colvar]]))
freqs <- df %>%
# get necessary records
filter(!!!subset) %>%
# derive nested counts
derive_nested(
denom_df = denom_df,
colvar = colvar,
level = level,
rowbyvar = rowbyvar,
tablebyvar = tablebyvar,
statlist = statlist,
denoms_by = denoms_by,
display_missing = display_missing
) %>%
# remove any results that do not meet table criteria
drop_nested(level, cutoff, colvar, level, tablebyvar, cutoff_stat, rowbyvar) %>%
round_freq(decimal) %>%
arrange_freq(statlist) %>%
row_header_merge(
colvar = colvar,
rowvar = tail(level, 1),
rowbyvar = NULL,
tablebyvar = tablebyvar,
row_header = row_header
) %>%
# pivot table to final orientation
pivot_freq(
colvar,
tail(level, 1),
c(rowbyvar, head(level, -1)),
tablebyvar,
statlist,
rowtext = rowtext,
row_header = row_header,
display_missing = FALSE,
nested = TRUE,
.keep = .keep,
.ord = .ord,
... = ...
) %>%
sort_freq(level, descending_by, tablebyvar, rowbyvar, colvar_cols, colvar)
attr(freqs, "colvar") <- colvar
attr(freqs, "tablebyvar") <- tablebyvar
attr(freqs, "rowvar") <- rowvar
attr(freqs, "statlist") <- statlist
attr(freqs, "cutoff") <- cutoff
attr(freqs, "cutoff_stat") <- cutoff_stat
attr(freqs, "descending_by") <- descending_by
attr(freqs, "rowtext") <- rowtext
attr(freqs, "row_header") <- row_header
attr(freqs, "subset") <- subset
attr(freqs, "denoms_by") <- denoms_by
structure(freqs,
class = c("tidytlg.nested_freq", "tidytlg.freq", class(freqs)))
}
#' Derive nested frequency
#'
#' @return A long dataframe with n, denom, N, and pct for each combination
#' @noRd
derive_nested <- function(df, denom_df, colvar, level, rowbyvar, tablebyvar,
statlist, descending_by, denoms_by, display_missing) {
# Get long dataframes for the "top" level
level1_df <- df %>%
derive_freq(denom_df = denom_df,
colvar = colvar,
rowbyvar = rowbyvar,
tablebyvar = tablebyvar,
rowvar = level[1],
statlist = statlist,
nested = TRUE,
rowtext = NULL,
has_subset = FALSE,
denoms_by = denoms_by,
display_missing = display_missing) %>%
mutate(
nested_level = 0)
# Get derivations for second level
level2_df <- df %>%
derive_freq(denom_df = denom_df,
colvar = colvar,
rowbyvar = c(level[1], rowbyvar),
tablebyvar = tablebyvar,
rowvar = level[2],
statlist = statlist,
nested = TRUE,
rowtext = NULL,
has_subset = FALSE,
denoms_by = denoms_by,
display_missing = display_missing) %>%
mutate(
nested_level = 1)
if (length(level) == 3) {
# If there are three levels, make a third level
level3_df <- df %>%
derive_freq(denom_df = denom_df,
colvar = colvar,
rowbyvar = c(level[1], level[2], rowbyvar),
tablebyvar = tablebyvar,
rowvar = level[3],
statlist = statlist,
nested = TRUE,
rowtext = NULL,
has_subset = FALSE,
denoms_by = denoms_by,
display_missing = display_missing) %>%
mutate(
nested_level = 2)
# In the top level dataframe, make a new variable named level[2], and level[3],
# that are the values of whatever is in level[1]. When the merge happens the
# higher levels will have values to help row bind.
level1_df <- level1_df %>%
mutate(
!!level[2] := !!sym(level[1]),
!!level[3] := !!sym(level[1])
)
level2_df <- level2_df %>%
mutate(
!!level[3] := !!sym(level[2])
)
} else {
level3_df <- NULL
level1_df <- level1_df %>%
mutate(
!!level[2] := !!sym(level[1])
)
}
# Bind the other data.frames together
bind_rows(level1_df, level2_df, level3_df) %>%
# The complete is different for nested because the levels should only include
# The combinations present in the data.
complete(nesting(!!!syms(c(level, rowbyvar))), !!!syms(c(tablebyvar, colvar)),
fill = list(n = 0, N = 0, denom = 0, pct = 0))
}
#' Drop values for a nested frequency
#'
#' Calls drop_freq but also additional logic if the lowest rowvar doesn't have
#' any values to remove all upper values.
#'
#' @return Long dataframe with values dropped
#' @noRd
drop_nested <-
function(df,
level,
cutoff,
colvar,
rowvar,
tablebyvar,
cutoff_stat,
rowbyvar) {
# Use the normal steps for dropping values
drop_freq(df = df,
cutoff = cutoff,
# tail(level, 1) is always the lowest level
rowvar = tail(level, 1),
colvar = colvar,
cutoff_stat = cutoff_stat,
# Treat the top levels as a tablebyvar so values that span higher
# level are calculated properly.
drop_by = c(tablebyvar, head(level, -1), rowbyvar)) %>%
# nest the table by any tablebyvars and the top levels
nest(nest_treatment = -any_of(c(tablebyvar, head(level, -1)))) %>%
# Update the nested tables to filter out any where n is 0 for all treatment
# groups
mutate(
nest_treatment = map(.data[["nest_treatment"]],
~ group_by(.x, !!!syms(tail(level, 1))) %>%
filter(any(n > 0)), .preserve = FALSE)
) %>%
rowwise() %>%
# If that removed all of the treatments, drop it
filter(nrow(nest_treatment) > 0) %>%
unnest(nest_treatment) %>%
# In certain cases the top level rowvar variables will persist even if there
# are no subordinate values below them. This nests the top levels(level1, or
# level1 and level2 if a three variable nest) and drops the ones that don't
# have subordinate items.
# -head(level, 1) returns everything but the top levels for nesting
nest(nested_level = -any_of(c(head(level, 1), tablebyvar))) %>%
rowwise() %>%
# If there are only 'length(unique(df[[colvar]]))' number
#of entries in the nested df, we know that the entries that are there are the
# top level and contain subordinate items, so remove them
filter(nrow(nested_level) > length(unique(df[[colvar]]))) %>%
unnest(cols = nested_level)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.