R/continuous_table.R

Defines functions invert_panel_by pt_cont_long_notes pt_cont_long pt_cont_wide_notes pt_cont_wide cont_table_data

Documented in cont_table_data pt_cont_long pt_cont_long_notes pt_cont_wide pt_cont_wide_notes

#' Create continuous summary data frame
#'
#' @inheritParams pt_cont_long
#' @param by grouping variable name
#' @param panel paneling variable name
#' @param all_name label for full data summary
#' @param digits named list specifying `digits` argument for `digit_fun`
#' @param wide `logical`; if `TRUE`, output will be wide; output will be long
#' otherwise
#' @param fun continuous data summary function
#' @param id_col the ID column name
#'
#' @export
cont_table_data <- function(data, cols, by = ".total", panel = by, wide = FALSE,
                            all_name = "all", digits = new_digits(), id_col = "ID",
                            na_fill = "--",
                            fun = cont_long_fun) {

  cols <- unname(new_names(cols))
  by <- unname(new_names(by))
  panel <- unname(new_names(panel))

  data <- data_total_col(data, all_name = all_name)
  check_continuous(data,cols)
  check_discrete(data,by)

  assert_that(inherits(digits, "digits"))

  digits <- update_digits(digits,cols)
  digit_fun <- get_digits_fun(digits)
  digit_data <- get_digits_list(digits)

  groups <- c("name")
  if(!is.null(by)) groups <- c(by,groups)
  if(!is.null(panel)) groups <- c(panel,by,groups)
  groups <- unique(groups)

  d0 <- select(data, all_of(unname(c(panel,by,cols))))

  d1 <- pivot_longer(d0,all_of(cols))
  d1 <- mutate(d1, digitn = unlist(digit_data[.data[["name"]]]))
  d1 <- mutate(d1,name = fct_inorder(.data[["name"]]))

  if(!is.null(by)) {
    d1 <- group_by(d1,!!!syms(groups))
    join_cols <- unique(c(panel,by,"name"))
  } else {
    d1 <- group_by(d1,!!!syms(groups))
    join_cols <- "name"
  }

  if(packageVersion("dplyr") < '0.8.99') { # nocov start
    d2 <- group_modify(
      d1,
      ~fun(
        value = .$value,
        digit_fun = digit_fun,
        digits = .$digitn[1],
        name = .$name[1],
        na_fill = na_fill
      ),
      keep = TRUE
    ) # nocov end
  } else {
    d2 <- group_modify(
      d1,
      ~fun(
        value = .$value,
        digit_fun = digit_fun,
        digits = .$digitn[1],
        name = .$name[1],
        na_fill = na_fill
      ),
      .keep = TRUE
    )
  }

  d4 <- rename(d2, outer = !!sym(by))
  if(wide) {
    d4 <- pivot_wider(d4, names_from  = "name", values_from = "summary")
  }
  d4 <- ungroup(d4)
  return(d4)
}

#' Continuous data summary in wide format
#'
#' This function summarizes your data in a specific way and returns an object
#' that can be converted into a `latex` table.
#'
#' @param data the data frame to summarize; the user should filter or subset
#' so that `data` contains exactly the records to be summarized; pmtables will
#' not add or remove rows prior to summarizing `data`
#' @param cols the columns to summarize; may be character vector or quosure
#' @param by a grouping variable; may be character vector or quosure
#' @param panel data set column name to stratify the summary
#' @param table a named list to use for renaming columns (see details and
#' examples)
#' @param units a named list to use for unit lookup (see details and examples)
#' @param digits a `digits` object (see [new_digits()])
#' @param all_name a name to use for the complete data summary
#' @param fun the data summary function (see details)
#' @param na_fill value to fill with when all values in the summary are missing
#' @param id_col the ID column name
#'
#' @return
#' An object with class `pmtable`; see [class-pmtable].
#'
#' @details
#'
#' The default summary function is [cont_wide_fun()]. Please review that
#' documentation for details on the default summary for this table.
#'
#' The notes for this table are generated by [pt_cont_wide_notes()].
#'
#' @section Custom summary function:
#' The summary function (`fun`) should take `value` as the first argument and
#' return a data frame or tibble with one row and one column named `summary`.
#' The function can also accept an `id` argument which is a vector of `IDs`
#' that is the same length as `value`. Be sure to include `...` to the function
#' signature as other arguments will be passed along. Make sure your function
#' completely formats the output ... it will appear in the table as you return
#' from this function. See [cont_wide_fun()] for details on the default
#' implementation.
#'
#' @examples
#' pmtables:::cont_wide_fun(rnorm(100))
#'
#' out <- stable(pt_cont_wide(pmt_first, cols = "AGE,WT,SCR"))
#' out
#'
#' \dontrun{
#' st2report(out)
#' }
#'
#' @export
pt_cont_wide <- function(data, cols,
                         by = ".total",
                         panel = by,
                         table = NULL,
                         units = NULL,
                         digits = new_digits(),
                         all_name = "All data",
                         fun = cont_wide_fun,
                         na_fill = "--",
                         id_col = "ID") {

  if(!missing(id_col)) {
    deprecate_warn("0.5.3", "pt_cont_wide(id_col)")
  }

  has_panel <- !missing(panel)
  panel_data <- as.panel(panel)
  panel <- panel_data$col

  has_by <- !missing(by)

  tst <- fun(c(1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7))
  assert_that(identical(names(tst), "summary"))

  cols <- new_names(cols,table)
  by <- new_names(by,table)
  panel <- new_names(panel,table)

  data <- data_total_col(data, all_name = all_name)

  ans <- cont_table_data(
    data = data,
    cols = cols,
    by = by,
    panel = panel,
    fun = fun,
    na_fill = na_fill,
    digits = digits,
    wide = TRUE
  )

  all_summary <- FALSE
  if(has_panel || has_by) {
    all_summary <- TRUE
    ans2 <- cont_table_data(
      data = data,
      cols = cols,
      by = ".total",
      panel = ".total",
      fun = fun,
      na_fill = na_fill,
      digits = digits,
      wide = TRUE
    )
    all_name_fmt <- paste0("\\hline \\hline {\\bf ",all_name,"}")
    if(has_panel) {
      if(has_by) {
        ans2 <- mutate(ans2, !!sym(panel) := ".panel.waiver.")
        ans2[["outer"]] <- all_name_fmt
      } else {
        ans2 <- mutate(ans2, !!sym(panel) := all_name)
      }
    }
    if(!has_panel && has_by) {
      ans2[["outer"]] <- all_name_fmt
    }
    ans <- bind_rows(ans,ans2)
  }

  if(has_by) {
    ans <- rename(ans, !!sym(by) := outer)
  }

  if(exists(by, ans)) {
    where <- names(ans)==by
    names(ans)[where] <- names(by)
  }

  ans[["outer"]] <- NULL
  ans[[".total"]] <- NULL

  .panel <- rowpanel(NULL)
  if(has_panel) {
    .panel <- panel_data
    .panel$prefix_skip <- all_name
  }

  out <- list(
    data = ans,
    align = cols_left(),
    panel = .panel,
    units = units,
    bold_cols = !has_panel,
    notes = pt_cont_wide_notes()
  )

  if(!all(names(cols)==cols)) {
    out$cols_rename <- cols
  }

  out <- structure(out, class = c("pmtable", class(out)))

  out
}

#' Return table notes for pt_cont_wide
#'
#' See [pt_cont_wide()].
#'
#' @param note_add additional notes to include
#'
#' @export
pt_cont_wide_notes <- function(note_add = NULL) {
  ans <- note_add
  ans <- c(ans, "Summary is mean (sd) [count]")
  ans
}

#' Continuous data summary in long format
#'
#' This function summarizes your data in a specific way and returns an object
#' that can be converted into a `latex` table.
#'
#' @inheritParams pt_cont_wide
#' @param by a grouping variable that will silently overwrite the value of
#' `panel` if `panel` is also passed; see details and the differences in table
#' output when either `panel` or `by` are passed
#' @param summarize_all if `TRUE` then a complete data summary will be appended
#' to the bottom of the table
#'
#' @details
#' Passing the `panel` variable will partition the table in panels defined by
#' the non-repeating values of that data column, and `cols` will form the rows
#' within each panel. Alternatively, passing in the `by` variable will panel by
#' the different levels of `cols` and the levels of `by` will form the rows
#' within each panel.
#'
#' The default summary function is [cont_long_fun()]. Please review that
#' documentation for details on the default summary for this table.
#'
#' The notes for this table are generated by [pt_cont_long_notes()].
#'
#' @section Custom summary function:
#' The summary function (`fun`) should take `value` as the first argument and
#' return a data frame or tibble with one row as many columns as you wish to
#' appear in the table. The function can also accept an `id` argument which is
#' a vector of `IDs` that is the same length as `value`. Be sure to include
#' `...` to the function signature as other arguments will be passed along.
#' Make sure your function completely formats the output ... it will appear in
#' the table as you return from this function. See [cont_long_fun()] for
#' details on the default implementation.
#'
#'
#' @examples
#'
#' ans <- pt_cont_long(pmt_first, cols = dplyr::vars(WT,ALB,CRCL))
#'
#' ans <- pt_cont_long(pmt_first, cols = "WT,CRCL", panel = "SEXf")
#'
#' ans <- pt_cont_long(pmt_first, cols = "WT,CRCL", by = "SEXf")
#'
#' pmtables:::cont_long_fun(rnorm(100))
#'
#' @return
#' An object with class `pmtable`; see [class-pmtable].
#'
#' @export
pt_cont_long <- function(data,
                         cols,
                         panel = ".total",
                         by = NULL,
                         table = NULL,
                         units = NULL,
                         digits = new_digits(),
                         summarize_all = TRUE,
                         all_name = "All data",
                         fun = cont_long_fun,
                         na_fill = "--",
                         id_col = "ID") {

  if(!missing(id_col)) {
    deprecate_warn("0.5.3", "pt_cont_long(id_col)")
  }

  switch_panel_by <- FALSE

  if(!missing(by)) {
    panel <- as.panel(by)
    assert_that(!panel$null, msg = "'by' should not be NULL")
    switch_panel_by <- TRUE
  }

  has_panel <- !missing(panel) || !missing(by)
  panel_data <- as.panel(panel)
  panel <- panel_data$col
  names(panel) <- panel_data$prefix

  by <- panel
  summarize_all <- summarize_all & by != ".total"
  data <- data_total_col(data)

  cols <- new_names(cols,table)
  by <- new_names(by,table)

  ans <- cont_table_data(
    data = data,
    cols = unname(cols),
    by = unname(by),
    fun = fun,
    na_fill = na_fill,
    digits = digits,
    wide = FALSE
  )

  if(by==".total") ans <- mutate(ans, outer = all_name)

  if(summarize_all) {
    ans2 <- cont_table_data(
      data = data,
      cols = unname(cols),
      by = ".total",
      fun = fun,
      na_fill = na_fill,
      digits = digits,
      wide = FALSE
    )
    ans2 <- mutate(ans2, outer = all_name)
    ans <- bind_rows(ans,ans2)
  }

  .name <- as.character(ans$name)
  ans <- mutate(ans, name = as.character(names(cols)[.data[["name"]]]))

  if(is.list(units) & rlang::is_named(units)) {
    has_unit <- .name %in% names(units)
    ans <- mutate(
      ans,
      name   = case_when(
        has_unit ~ paste0(.data[["name"]], " ", units[.name]),
        TRUE ~ .data[["name"]]
      )
    )
  }

  if(names(ans)[1]=="outer") {
    names(ans)[1] <- unname(by)
  }

  for(i in c(1,2)) {
    if(names(ans)[i] == "name") {
      names(ans)[i] <- "Variable"
    }
  }

  ans[[".total"]] <- NULL

  .panel <- rowpanel(NULL)
  if(has_panel) {
    .panel <- panel_data
    .panel$prefix_skip <- all_name
  }

  out <- list(
    data = ans,
    align = cols_center(.outer = "lr"),
    panel = .panel,
    bold_cols = !has_panel,
    notes = pt_cont_long_notes()
  )

  if(switch_panel_by) {
    out <- invert_panel_by(out, panel, units, all_name)
  }

  out <- structure(out, class = c("pmtable", class(out)))

  out
}

#' Return table notes for pt_cont_long
#'
#' See [pt_cont_long()].
#'
#' @param note_add additional notes to include
#'
#' @export
pt_cont_long_notes <- function(note_add = NULL) {
  ans <- note_add
  ans <- c(ans, "n: number of records summarized")
  ans <- c(ans, "SD: standard deviation")
  ans <- c(ans, "Min: minimum; Max: maximum")
  ans
}

invert_panel_by <- function(out, panel, units, all_name) {
  if(out$panel$null) return(out)
  out$data <- mutate(out$data, Variable = fct_inorder(.data[["Variable"]]))
  out$data <- mutate(out$data, !!sym(panel) := fct_inorder(!!sym(panel)))
  out$data <- arrange(out$data, .data[["Variable"]], !!sym(panel))
  if(all_name %in% out$data[[panel]]) {
    out$sumrows <- sumrow(out$data[[panel]] == all_name, it = TRUE, hline = FALSE)
  }
  out$data[["Variable"]] <- paste_units(out$data[["Variable"]], units)
  if(is_named(panel)) {
    out$data <- rename(out$data, !!sym(names(panel)) := !!sym(panel))
  }
  out$panel$col <- "Variable"
  out$panel$prefix <- NULL
  out
}
metrumresearchgroup/pmtables documentation built on Oct. 27, 2024, 5:16 p.m.