R/discrete_table.R

Defines functions pt_cat_wide_notes pt_cat_wide pt_cat_long_notes pt_cat_long cat_data prep_cat_data cat_long_all

Documented in cat_data pt_cat_long pt_cat_long_notes pt_cat_wide pt_cat_wide_notes

cat_long_all <- function(data, group = ".total", all_name = "All data") {
  N <- nrow(data)
  data_long <- pivot_longer(
    data,
    cols = all_of(group),
    values_to = "by"
  )
  data_long <- group_by(data_long, by)
  data_summ <- summarise(
    data_long,
    level = all_name,
    N = .env[["N"]],
    #value = paste(n(), paste0("(",digit1(100*n()/N),")")),
    value = paste0("n = ", n()),
    .groups = "drop"
  )
  pivot_wider(data_summ, names_from = "by")
}

prep_cat_data <- function(data, cols) {
  for(col in cols) {
    if(anyNA(data[[col]])) {
      warning(glue('col `{col}`: missing values replaced with "NA"'))
      is_fctr <- is.factor(data[[col]])
      if(is_fctr) {
        lvls <- unique(c(levels(data[[col]]),"NA"))
        data[[col]] <- as.character(data[[col]])
      }
      w <- is.na(data[[col]])
      data[[col]][w] <- rep("NA", sum(w))
      if(is_fctr) data[[col]] <- factor(data[[col]], levels = lvls)
    }
    if(!is.factor(data[[col]])) {
      data[[col]] <- fct_inorder(data[[col]])
    }
  }
  data
}

#' Summarize categorical data
#'
#' @inheritParams pt_cont_wide
#' @inheritParams pt_cat_wide
#' @param summarize_all logical indicating whether or not to include a summary
#' of the full data in the output.
#' @param all_name label for full data summary.
#' @param nby number of unique levels for the `by` variable.
#' @param wide `logical`; if `TRUE`, data frame will be returned in wide format;
#' if `FALSE`, it will be returned in `long` format.
#' @param denom the denominator to use when calculating percent for each level;
#' `group` uses the total number in the chunk being summarized; `total` uses
#' the total number in the data set; historically, `group` has been used as the
#' default.
#'
#' @examples
#'
#' cat_data(pmt_first, cols = c(SEX = "SEXf", RF = "RFf"), by = "STUDYf")
#'
#' @export
cat_data <- function(data, cols, by = ".total", panel = by,
                     summarize_all = TRUE, all_name = "All",
                     wide = FALSE, nby = NULL, complete = FALSE,
                     denom = c("group", "total")) {

  denom <- match.arg(denom)

  cols <- new_names(cols)

  data <- ungroup(data)

  data <- data_total_col(data, all_name)

  if(is.null(nby)) nby <- length(unique(data[[by]]))

  .groups <- unique(c(panel, by))

  data <- group_by(data, !!!syms(unname(.groups)))

  ans <- group_modify(
    data,
    ~ summarize_cat_chunk(., cols = cols, N = nrow(data), denom = denom)
  )

  ans <- ungroup(ans)

  ans <- mutate(ans, name = names(cols)[.data[["name"]]])

  if(wide) {
    ans <- pivot_wider(
      ans,
      names_from = c("name", "level"),
      values_from = "summary",
      names_sep = '_._'
    )
    if(isTRUE(complete)) {
      ans <- complete(ans, !!!syms(.groups))
      nstart <- length(.groups)
      nend <- ncol(ans)
      ans <- mutate(ans, across(nstart+1, function(.x) replace_na(.x, 0)))
      ans <- mutate(ans, across(seq(nstart+2, nend), function(.x) replace_na(.x, "0 (0.0)")))
    }
    ans
  } else {
    ans[["N"]] <- NULL
    ans <- pivot_wider(
      ans,
      names_from = all_of(unname(by)),
      values_from = "summary",
      names_sep = '_._'
    )
  }
  ans
}

#' Discrete data summary in long format
#'
#' @inheritParams pt_cont_long
#' @inheritParams cat_data
#' @param span variable name for column spanner
#' @param all_name_span table column name to use for data summaries across
#' levels of `span` if it is provided
#' @param summarize where to include a data summary across subgroups;
#' use `none` to drop the summary from the table
#' @param by use `span` argument instead
#'
#' @details
#' The data summary for all cells in the table is `count (percent)`. The number
#' of data records in each column variable level is given under the column
#' title as `n`.
#'
#' When `group` is selected for `denom`, `percent` is calculated with
#' denominator set to `n`, the total for each column variable level. When
#' `total` is selected for `denom`, then `percent` is calculated by the total
#' number of records in the input data.
#'
#' The notes in this table are generated with [pt_cat_long_notes()].
#'
#' @examples
#'
#' out <- pt_cat_long(pmt_first, cols = "SEXf,ASIANf", span = "FORMf")
#'
#' \dontrun{
#' st2report(stable(out))
#' }
#'
#' @return
#' An object with class `pmtable`; see [class-pmtable].
#'
#' @export
pt_cat_long <- function(data, cols, span  = ".total",
                        all_name = " ",
                        all_name_span = "Summary",
                        summarize = c("both", "right", "top", "none"),
                        table = NULL, by = NULL,
                        denom = c("group", "total")) {

  summarize <- match.arg(summarize)
  denom <- match.arg(denom)
  summarize_all <- summarize != "none"
  complete <- isTRUE(complete)

  has_span <- !missing(span)

  if(!missing(by) & missing(span)) {
    warning("the 'by' argument was used; maybe you wanted 'span' instead?")
  }

  if(span == ".total" & missing(all_name_span)) {
    all_name_span <- "Summary"
  }

  cols <- new_names(cols, table = table)

  data <- data_total_col(data, all_name_span)

  assert_that(length(span)==1)
  span <- new_names(span, table = table)

  spans <- levels(factor(data[[span]]))
  nspan <- length(spans)

  check_discrete(data = data, cols = cols, others = span)

  data <- prep_cat_data(data, cols)

  ans <- cat_data(
    data = data,
    cols = cols,
    by = span,
    nby = nspan,
    denom = denom
  )

  if(summarize_all) {
    if(has_span && summarize %in% c("right", "both")) {
      all <- cat_data(
        data,
        cols = cols,
        by = ".total",
        nby = nspan,
        all_name = all_name_span,
      )
      all[["N"]] <- NULL
      ans <- left_join(ans, all, by = c("name", "level"))
    }
  }

  output_span <- NULL

  if(has_span) {
    output_span <- colgroup(names(span), unique(data[[span]]))
  }

  out <- list(
    data = ans,
    span = output_span,
    align = cols_center(.outer = 'l'),
    cols_rename = span,
    cols_blank = "level",
    panel = "name",
    notes = pt_cat_long_notes()
  )

  if(summarize %in% c("top", "both")) {
    bot <- cat_long_all(data, unname(span))
    if(summarize=="both") {
      bot[[all_name_span]] <- paste0("n = ", nrow(data))
    }
    bot[["N"]] <- NULL
    bot[["level"]] <- ""
    out$cols_extra <- bot
  }

  out <- structure(out, class = c("pmtable", class(out)))

  return(out)
}

#' Return table notes for pt_cat_long
#'
#' See [pt_cat_long()].
#'
#' @param include_n if `TRUE`, add a note for `n`
#' @param note_add additional notes to include
#'
#' @export
pt_cat_long_notes <- function(include_n = TRUE, note_add = NULL) {
  ans <- note_add
  ans <- c(ans, "Summary is count (percent)")
  if(isTRUE(include_n)) {
    ans <- c(ans, "n: number of records summarized")
  }
  ans
}

#' Discrete data summary in long format
#'
#' @inheritParams pt_cont_wide
#' @inheritParams cat_data
#' @param by a grouping variable for the summary; may be given as character
#' vector or quosure.
#' @param summarize where to put an all-data summary; choose `none` to omit the
#' summary from the table.
#' @param complete logical; if `TRUE`, then data the summary will be completed
#' for missing levels of `by`and `panel`.
#'
#' @details
#' The data summary for this table is `count (percent)`. The number of
#' data points for each row is also summarized as `n` on the left hand side
#' of the table (either on the far left or just to the right of the `by`
#' column).
#'
#' When `group` is selected for `denom`, `percent` is calculated with
#' denominator set to `n`, the total for each row. When `total` is selected for
#' `denom`, then `percent` is calculated by the total number of records in the
#' input data.
#'
#' The notes in this table are generated with [pt_cat_wide_notes()].
#'
#' @examples
#' out1 <- pt_cat_wide(pmt_first, cols = "SEXf,ASIANf")
#' stable(out1)
#'
#' out2 <- pt_cat_wide(pmt_first, cols = "SEXf,ASIANf", by = "FORMf")
#' stable(out2)
#'
#' \dontrun{
#' st2report(stable(out1))
#' st2report(stable(out2))
#' }
#'
#' @return
#' An object with class `pmtable`; see [class-pmtable].
#'
#' @export
pt_cat_wide <- function(data, cols, by = ".total", panel = by,
                        table = NULL, all_name = "All data",
                        summarize = c("bottom", "none"), complete = FALSE,
                        denom = c("group", "total")) {

  summarize <- match.arg(summarize)
  summarize_all <- summarize != "none"
  denom <- match.arg(denom)

  has_by <- !missing(by)
  has_panel <- !missing(panel)
  panel_data <- as.panel(panel)
  panel <- panel_data$col

  cols <- new_names(cols, table)
  by <- new_names(by, table)
  panel <- new_names(panel,table)

  assert_that(length(by) == 1)

  data <- data_total_col(data, all_name)

  nby <- length(unique(data[[by]]))

  check_discrete(data = data, cols = cols, others = by)

  summarize_all <- summarize_all & (has_by | has_panel)

  data <- prep_cat_data(data, cols)

  ans <- cat_data(
    data,
    cols,
    by = by,
    panel = panel,
    wide = TRUE,
    complete = complete,
    denom = denom
  )

  ans <- mutate(ans, !!sym(by) := as.character(!!sym(by)))

  if(summarize_all) {

    all <- cat_data(
      data,
      cols,
      by = ".total",
      panel = ".total",
      wide = TRUE,
      complete = complete
    )

    all_name_fmt <- paste0("\\hline \\hline {\\bf ",all_name,"}")

    if(has_panel) {
      if(has_by) {
        all <- mutate(all, !!sym(panel) := ".panel.waiver.")

        all[[by]] <- all_name_fmt
      } else {
        all <- mutate(all, !!sym(panel) := all_name)
      }
    } else {
      if(has_by) {
        all <- mutate(all, !!sym(by) := all_name_fmt)
      }
    }
    ans <- bind_rows(ans, all)
  }

  ans <- ungroup(ans)

  ans[[".total"]] <- NULL

  if("N" %in% names(ans)) {
    ans <- rename(ans, n = "N")
  }

  .panel <- rowpanel(NULL)
  if(has_panel) {
    .panel <- panel_data
    .panel$prefix_skip <- all_name
  }

  if(has_by) {
    names(ans)[names(ans)==by] <- names(by)[1]
  }

  out <- list(
    data = ans,
    span_split = colsplit(sep = '_._'),
    align = cols_center(.outer = 'l'),
    cols_rename = c(.panel$col,by),
    panel = .panel,
    notes = pt_cat_wide_notes()
  )

  out <- structure(out, class = c("pmtable", class(out)))

  return(out)
}

#' Return table notes for pt_cat_wide
#'
#' See [pt_cat_wide()].
#'
#' @param include_n include the note for `n` column
#' @param note_add additional notes to include
#'
#' @export
pt_cat_wide_notes <- function(include_n = TRUE, note_add = NULL) {
  ans <- note_add
  ans <- c(ans, "Summary is count (percent)")
  if(isTRUE(include_n)) {
    ans <- c(ans, "n: number of records summarized")
  }
  ans
}
metrumresearchgroup/pmtables documentation built on Oct. 27, 2024, 5:16 p.m.