R/export.R

Defines functions extract_cells extract_summary extract_body as_word_tbl_body as_word_tbl_header_caption as_word as_rtf as_latex as_raw_html gtsave_filename gtsave_file_ext gt_save_docx gt_save_rtf gt_save_latex gt_save_webshot gt_save_html gtsave

Documented in as_latex as_raw_html as_rtf as_word extract_body extract_cells extract_summary gtsave

#------------------------------------------------------------------------------#
#
#                /$$
#               | $$
#     /$$$$$$  /$$$$$$
#    /$$__  $$|_  $$_/
#   | $$  \ $$  | $$
#   | $$  | $$  | $$ /$$
#   |  $$$$$$$  |  $$$$/
#    \____  $$   \___/
#    /$$  \ $$
#   |  $$$$$$/
#    \______/
#
#  This file is part of the 'rstudio/gt' project.
#
#  Copyright (c) 2018-2024 gt authors
#
#  For full copyright and license information, please look at
#  https://gt.rstudio.com/LICENSE.html
#
#------------------------------------------------------------------------------#


#' Save a **gt** table as a file
#'
#' @description
#'
#' The `gtsave()` function makes it easy to save a **gt** table to a file. The
#' function guesses the file type by the extension provided in the output
#' filename, producing either an HTML, PDF, PNG, LaTeX, or RTF file.
#'
#' @details
#'
#' Output filenames with either the `.html` or `.htm` extensions will produce an
#' HTML document. In this case, we can pass a `TRUE` or `FALSE` value to the
#' `inline_css` option to obtain an HTML document with inlined CSS styles (the
#' default is `FALSE`). More details on CSS inlining are available at
#' [as_raw_html()]. We can pass values to arguments in [htmltools::save_html()]
#' through the `...`. Those arguments are either `background` or `libdir`,
#' please refer to the **htmltools** documentation for more details on the use
#' of these arguments.
#'
#' If the output filename is expressed with the `.rtf` extension then an RTF
#' file will be generated. In this case, there is an option that can be passed
#' through `...`: `page_numbering`. This controls RTF document page numbering
#' and, by default, page numbering is not enabled (i.e., `page_numbering =
#' "none"`).
#'
#' We can create an image file based on the HTML version of the `gt` table. With
#' the filename extension `.png`, we get a PNG image file. A PDF document can be
#' generated by using the `.pdf` extension. This process is facilitated by the
#' **webshot2** package, so, this package needs to be installed before
#' attempting to save any table as an image file. There is the option of passing
#' values to the underlying [webshot2::webshot()] function through `...`. Some of
#' the more useful arguments for PNG saving are `zoom` (defaults to a scale
#' level of `2`) and `expand` (adds whitespace pixels around the cropped table
#' image, and has a default value of `5`), and `selector` (the default value is
#' `"table"`). There are several more options available so have a look at the
#' **webshot2** documentation for further details.
#'
#' If the output filename extension is either of `.tex`, `.ltx`, or `.rnw`, a
#' LaTeX document is produced. An output filename of `.rtf` will generate an RTF
#' document. The LaTeX and RTF saving functions don't have any options to pass
#' to `...`.
#'
#' If the output filename extension is `.docx`, a Word document file is
#' produced. This process is facilitated by the **rmarkdown** package, so this
#' package needs to be installed before attempting to save any table as a
#' `.docx` document.
#'
#' @param data *The gt table data object*
#'
#'   `obj:<gt_tbl>` // **required**
#'
#'   This is the **gt** table object that is commonly created through use of the
#'   [gt()] function.
#'
#' @param filename *Output filename*
#'
#'   `scalar<character>` // **required**
#'
#'   The file name to create on disk. Ensure that an extension compatible with
#'   the output types is provided (`.html`, `.tex`, `.ltx`, `.rtf`, `.docx`). If
#'   a custom save function is provided then the file extension is disregarded.
#'
#' @param path *Output path*
#'
#'   `scalar<character>` // *default:* `NULL` (`optional`)
#'
#'   An optional path to which the file should be saved (combined with
#'   `filename`).
#'
#' @param ... *Additional options*
#'
#'   `<named arguments>`
#'
#'   All other options passed to the appropriate internal saving function.
#'
#' @return The file name (invisibly) if the export process is successful.
#'
#' @section Examples:
#'
#' Using a small subset of the [`gtcars`] dataset, we can create a **gt** table
#' with row labels. We'll add a stubhead label with the [tab_stubhead()]
#' function to describe what is in the stub.
#'
#' ```r
#' tab_1 <-
#'   gtcars |>
#'   dplyr::select(model, year, hp, trq) |>
#'   dplyr::slice(1:5) |>
#'   gt(rowname_col = "model") |>
#'   tab_stubhead(label = "car")
#' ```
#'
#' Export the **gt** table to an HTML file with inlined CSS (which is necessary
#' for including the table as part of an HTML email) using `gtsave()` and the
#' `inline_css = TRUE` option.
#'
#' ```r
#' tab_1 |> gtsave(filename = "tab_1.html", inline_css = TRUE)
#' ```
#'
#' By leaving out the `inline_css` option, we get a more conventional HTML file
#' with embedded CSS styles.
#'
#' ```r
#' tab_1 |> gtsave(filename = "tab_1.html")
#' ```
#'
#' Saving as a PNG file results in a cropped image of an HTML table. The amount
#' of whitespace can be set with the `expand` option.
#'
#' ```r
#' tab_1 |> gtsave("tab_1.png", expand = 10)
#' ```
#'
#' Any use of the `.tex`, `.ltx`, or `.rnw` will result in the output of a LaTeX
#' document.
#'
#' ```r
#' tab_1 |> gtsave("tab_1.tex")
#' ```
#'
#' With the `.rtf` extension, we'll get an RTF document.
#'
#' ```r
#' tab_1 |> gtsave("tab_1.rtf")
#'
#' ```
#' With the `.docx` extension, we'll get a word/docx document.
#'
#' ```r
#' tab_1 |> gtsave("tab_1.docx")
#' ```
#'
#' @family table export functions
#' @section Function ID:
#' 13-1
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @export
gtsave <- function(
    data,
    filename,
    path = NULL,
    ...
) {

  # Perform input object validation
  stop_if_not_gt_tbl_or_group(data = data)

  # Get the lowercased file extension
  file_ext <- gtsave_file_ext(filename)

  # Stop function if a file extension is not provided
  if (file_ext == "") {

    cli::cli_abort(c(
      "A file extension is required in the provided filename.",
      "i" = "We can use:",
      "*" = "`.html`, `.htm` (HTML file)",
      "*" = "`.png`          (PNG file)",
      "*" = "`.pdf`          (PDF file)",
      "*" = "`.tex`, `.rnw`  (LaTeX file)",
      "*" = "`.rtf`          (RTF file)",
      "*" = "`.docx`         (Word file)"
    ))
  }

  # Use the appropriate save function based
  # on the filename extension
  switch(
    file_ext,
    "htm" = ,
    "html" = gt_save_html(data = data, filename, path, ...),
    "ltx" = , # We don't verbally support using `ltx`
    "rnw" = ,
    "tex" = gt_save_latex(data = data, filename, path, ...),
    "rtf" = gt_save_rtf(data = data, filename, path, ...),
    "png" = ,
    "pdf" = gt_save_webshot(data = data, filename, path, ...),
    "docx" = gt_save_docx(data = data, filename, path, ...),
    {
      cli::cli_abort(c(
        "The file extension supplied (`.{file_ext}`) cannot be used.",
        "i" = "We can use:",
        "*" = "`.html`, `.htm` (HTML file)",
        "*" = "`.png`          (PNG file)",
        "*" = "`.pdf`          (PDF file)",
        "*" = "`.tex`, `.rnw`  (LaTeX file)",
        "*" = "`.rtf`          (RTF file)",
        "*" = "`.docx`         (Word file)"
      ))
    }
  )
  if (!is.null(path)) {
    filename <- file.path(path, filename)
  }

  invisible(filename)
}

#' Saving function for an HTML file
#'
#' @noRd
gt_save_html <- function(
    data,
    filename,
    path = NULL,
    ...,
    inline_css = FALSE
) {

  filename <- gtsave_filename(path = path, filename = filename)

  if (is_gt_tbl(data = data)) {

    if (inline_css) {

      html <- as_raw_html(data, inline_css = inline_css)
      html <- htmltools::HTML(html)

    } else {

      html <- htmltools::as.tags(data)
    }

    return(htmltools::save_html(html, filename, ...))

  } else if (is_gt_group(data = data)) {

    seq_tbls <- seq_len(nrow(data$gt_tbls))

    html_tbls <- htmltools::tagList()

    for (i in seq_tbls) {

      html_tbl_i <- as_raw_html(grp_pull(data, which = i), inline_css = inline_css)
      html_tbls <- htmltools::tagList(html_tbls, html_tbl_i)
    }

    return(htmltools::save_html(html_tbls, filename, ...))
  }
}

#' Saving function for an image file via the webshot2 package
#'
#' @noRd
gt_save_webshot <- function(
    data,
    filename,
    path = NULL,
    ...,
    selector = "table",
    zoom = 2,
    expand = 5
) {

  if (is_gt_group(data = data)) {

    cli::cli_abort(c(
      "The `gtsave()` function cannot be used with `gt_group` objects.",
      "*" = "Alternatively, you can use `grp_pull()` -> `gtsave()` for each gt table."
    ))
  }

  filename <- gtsave_filename(path = path, filename = filename)

  # Create a temporary file with the `html` extension
  tempfile_ <- tempfile(fileext = ".html")

  # Reverse slashes on Windows filesystems
  tempfile_ <- tidy_gsub(tempfile_, "\\\\", "/")

  # Save gt table as HTML using the `gt_save_html()` function
  gt_save_html(
    data = data,
    filename = tempfile_,
    path = NULL
  )

  # Saving an image requires the webshot2 package; if it's
  # not present, stop with a message
  rlang::check_installed("webshot2", "to save gt tables as images.")

  # Save the image in the working directory
  webshot2::webshot(
    url = paste0("file:///", tempfile_),
    file = filename,
    selector = selector,
    zoom = zoom,
    expand = expand,
    ...
  )
}

#' Saving function for a LaTeX file
#'
#' @noRd
gt_save_latex <- function(
    data,
    filename,
    path = NULL,
    ...
) {

  filename <- gtsave_filename(path = path, filename = filename)

  if (is_gt_tbl(data = data)) {

    latex_lines <- as_latex(data = data)

  } else if (is_gt_group(data = data)) {

    latex_lines <- c()

    seq_tbls <- seq_len(nrow(data$gt_tbls))

    for (i in seq_tbls) {

      latex_lines_i <- as_latex(grp_pull(data, which = i))

      latex_lines <- c(latex_lines, latex_lines_i)
    }

    latex_lines <-
      paste(
        latex_lines,
        collapse = "\n\\newpage\n\n"
      )
  }

  writeLines(text = latex_lines, con = filename)
}

#' Saving function for an RTF file
#'
#' @noRd
gt_save_rtf <- function(
    data,
    filename,
    path = NULL,
    ...
) {

  filename <- gtsave_filename(path = path, filename = filename)

  if (is_gt_tbl(data = data)) {

    rtf_lines <- as_rtf(data = data)

  } else if (is_gt_group(data = data)) {

    rtf_lines <- c()

    rtf_open <-
      as_rtf(
        grp_pull(data, which = 1),
        incl_open = TRUE,
        incl_header = TRUE,
        incl_page_info = TRUE,
        incl_body = FALSE,
        incl_close = FALSE
      )

    seq_tbls <- seq_len(nrow(data$gt_tbls))

    for (i in seq_tbls) {

      rtf_lines_i <-
        as_rtf(
          grp_pull(data, which = i),
          incl_open = FALSE,
          incl_header = FALSE,
          incl_page_info = FALSE,
          incl_body = TRUE,
          incl_close = FALSE
        )

      rtf_lines <- c(rtf_lines, rtf_lines_i)
    }

    rtf_lines_combined <-
      paste(
        rtf_lines,
        collapse = "\n{\\pard\\fs2\\par}\\page{\\pard\\fs2\\par}\n"
      )

    rtf_lines <- paste0(rtf_open, rtf_lines_combined, "}")
  }

  # Remove the comments specific to knitr since this will be a standalone
  # document not dependent on the knitr package
  rtf_lines <- gsub("!!!!!RAW-KNITR-CONTENT|RAW-KNITR-CONTENT!!!!!", "", rtf_lines)

  writeLines(rtf_lines, con = filename)
}

#' Saving function for a Word (docx) file
#'
#' @noRd
gt_save_docx <- function(
    data,
    filename,
    path = NULL,
    ...,
    open = rlang::is_interactive()
) {

  # Because creation of a .docx container is somewhat difficult, we
  # require the rmarkdown package to be installed to generate this
  # type of output
  rlang::check_installed("rmarkdown", "to save gt tables as Word documents.")

  filename <- gtsave_filename(path = path, filename = filename)

  if (is_gt_tbl(data = data)) {

    word_md_text <-
      paste0(
        c(
          "```{=openxml}",
          enc2utf8(as_word(data = data)),
          "```",
          ""),
        collapse = "\n"
      )

  } else if (is_gt_group(data = data)) {

    word_tbls <- c()

    seq_tbls <- seq_len(nrow(data$gt_tbls))

    for (i in seq_tbls) {
      word_tbl_i <- as_word(grp_pull(data, which = i))
      word_tbls <- c(word_tbls, word_tbl_i)
    }

    word_tbls_combined <-
      paste(
        word_tbls,
        collapse = "\n\n<w:p><w:r><w:br w:type=\"page\" /></w:r></w:p>\n\n"
      )

    word_md_text <-
      paste0(
        c(
          "```{=openxml}",
          enc2utf8(word_tbls_combined),
          "```",
          ""),
        collapse = "\n"
      )
  }

  word_md_file <- tempfile(fileext = ".md")

  writeChar(
    iconv(word_md_text, to = "UTF-8"),
    con = word_md_file
  )

  rmarkdown::pandoc_convert(
    input = word_md_file,
    output = filename
  )

  if (needs_gt_as_word_post_processing(word_md_text)) {
    gt_as_word_post_processing(path = filename)
  }
}

#' Get the lowercase extension from a filename
#'
#' @noRd
gtsave_file_ext <- function(filename) {
  tolower(tools::file_ext(filename))
}

#' Combine `path` with `filename` and normalize the path
#'
#' @noRd
gtsave_filename <- function(path, filename) {

  if (is.null(path)) path <- "."

  # The use of `fs::path_abs()` works around
  # the saving code in `htmltools::save_html()`
  # See htmltools Issue #165 for more details
  as.character(
    fs::path_expand(
      fs::path_abs(
        path = filename,
        start = path
      )
    )
  )
}

#' Get the HTML content of a **gt** table
#'
#' @description
#'
#' Get the HTML content from a `gt_tbl` object as a single-element character
#' vector. By default, the generated HTML will have inlined styles, where CSS
#' styles (that were previously contained in CSS rule sets external to the
#' `<table> element`) are included as `style` attributes in the HTML table's
#' tags. This option is preferable when using the output HTML table in an
#' emailing context.
#'
#' @param data *The gt table data object*
#'
#'   `obj:<gt_tbl>` // **required**
#'
#'   This is the **gt** table object that is commonly created through use of the
#'   [gt()] function.
#'
#' @param inline_css *Use inline CSS*
#'
#'   `scalar<logical>` // *default:* `TRUE`
#'
#'   An option to supply styles to table elements as inlined CSS styles. This is
#'   useful when including the table HTML as part of an HTML email message body,
#'   since inlined styles are largely supported in email clients over using CSS
#'   in a `<style>` block.
#'
#' @section Examples:
#'
#' Use a subset of the [`gtcars`] dataset to create a **gt** table. Add a header
#' with [tab_header()] and then export the table as HTML code with inlined CSS
#' styles using the `as_raw_html()` function.
#'
#' ```r
#' tab_html <-
#'   gtcars |>
#'   dplyr::select(mfr, model, msrp) |>
#'   dplyr::slice(1:5) |>
#'   gt() |>
#'   tab_header(
#'     title = md("Data listing from **gtcars**"),
#'     subtitle = md("`gtcars` is an R dataset")
#'   ) |>
#'   as_raw_html()
#' ```
#'
#' What's returned is a single-element vector containing the HTML for the table.
#' It has only the `<table>...</table>` part so it's not a complete HTML
#' document but rather an HTML fragment.
#'
#' @family table export functions
#' @section Function ID:
#' 13-2
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @export
as_raw_html <- function(
    data,
    inline_css = TRUE
) {

  # Perform input object validation
  stop_if_not_gt_tbl(data = data)

  html_table <- as.character(as.tags.gt_tbl(data))

  if (inline_css) {

    font_vec <- unique(dt_options_get_value(data = data, option = "table_font_names"))
    font_family_attr <- as_css_font_family_attr(font_vec = font_vec)

    html_table <-
      gsub(
        pattern = "<style>html \\{.*?\\}",
        replacement = "<style>",
        x = html_table
      )

    html_table <-
      gsub(
        pattern = ".gt_table {\n",
        replacement = paste0(".gt_table { \n  ", font_family_attr, "\n"),
        x = html_table,
        fixed = TRUE
      )

    # Create inline styles
    html_table <- juicyjuice::css_inline(html = html_table)
  }

  htmltools::HTML(html_table)
}

#' Output a **gt** object as LaTeX
#'
#' @description
#'
#' Get the LaTeX content from a `gt_tbl` object as a `knit_asis` object. This
#' object contains the LaTeX code and attributes that serve as LaTeX
#' dependencies (i.e., the LaTeX packages required for the table). Using
#' `as.character()` on the created object will result in a single-element vector
#' containing the LaTeX code.
#'
#' @param data *The gt table data object*
#'
#'   `obj:<gt_tbl>` // **required**
#'
#'   This is the **gt** table object that is commonly created through use of the
#'   [gt()] function.
#'
#' @details
#'
#' LaTeX packages required to generate tables are:
#' `r paste0(gt:::latex_packages(), collapse = ", ")`.
#'
#' In the event packages are not automatically added during the render phase
#' of the document, please create and include a style file to load them.
#'
#' Inside the document's YAML metadata, please include:
#'
#' \preformatted{
#' output:
#'   pdf_document: # Change to appropriate LaTeX template
#'     includes:
#'       in_header: 'gt_packages.sty'
#' }
#'
#' The `gt_packages.sty` file would then contain the listed dependencies above:
#'
#' \preformatted{
#'   \usepackage{booktabs, caption, longtable, colortbl, array}
#' }
#'
#' @section Examples:
#'
#' Use a subset of the [`gtcars`] dataset to create a **gt** table. Add a header
#' with [tab_header()] and then export the table as LaTeX code using the
#' `as_latex()` function.
#'
#' ```r
#' tab_latex <-
#'   gtcars |>
#'   dplyr::select(mfr, model, msrp) |>
#'   dplyr::slice(1:5) |>
#'   gt() |>
#'   tab_header(
#'     title = md("Data listing from **gtcars**"),
#'     subtitle = md("`gtcars` is an R dataset")
#'   ) |>
#'   as_latex()
#' ```
#'
#' What's returned is a `knit_asis` object, which makes it easy to include in R
#' Markdown documents that are knit to PDF. We can use `as.character()` to get
#' just the LaTeX code as a single-element vector.
#'
#' @family table export functions
#' @section Function ID:
#' 13-3
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @export
as_latex <- function(data) {

  # Perform input object validation
  stop_if_not_gt_tbl(data = data)

  # Build all table data objects through a common pipeline
  data <- build_data(data = data, context = "latex")

  # Composition of LaTeX ----------------------------------------------------

  # Create a LaTeX fragment for the start of the table
  table_start <- create_table_start_l(data = data)

  # Create the heading component
  heading_component <- create_heading_component_l(data = data)

  # Create the columns component
  columns_component <- create_columns_component_l(data = data)

  # Create the body component
  body_component <- create_body_component_l(data = data)

  # Create the footnotes component
  footer_component <- create_footer_component_l(data = data)

  # Create a LaTeX fragment for the ending tabular statement
  table_end <- create_table_end_l()

  # If the `rmarkdown` package is available, use the
  # `latex_dependency()` function to load latex packages
  # without requiring the user to do so
  if (rlang::is_installed("rmarkdown")) {
    latex_packages <- lapply(latex_packages(), rmarkdown::latex_dependency)
  } else {
    latex_packages <- NULL
  }

  table_width_statement <- derive_table_width_statement_l(data = data)

  # Allow user to set a font-size
  fontsize_statement <- create_fontsize_statement_l(data = data)


  # Compose the LaTeX table
  knitr::asis_output(
    paste0(
      "\\begingroup\n",
      table_width_statement,
      fontsize_statement,
      table_start,
      heading_component,
      columns_component,
      body_component,
      table_end,
      footer_component,
      "\\endgroup\n",
      collapse = ""
    ),
    meta = latex_packages
  )
}

#' Output a **gt** object as RTF
#'
#' @description
#'
#' Get the RTF content from a `gt_tbl` object as as a single-element character
#' vector. This object can be used with `writeLines()` to generate a valid .rtf
#' file that can be opened by RTF readers.
#'
#' @param data *The gt table data object*
#'
#'   `obj:<gt_tbl>` // **required**
#'
#'   This is the **gt** table object that is commonly created through use of the
#'   [gt()] function.
#'
#' @param incl_open,incl_close *Include opening/closing braces*
#'
#'   `scalar<logical>` // *default:* `TRUE`
#'
#'   Options that govern whether the opening or closing `"{"` and `"}"` should
#'   be included. By default, both options are `TRUE`.
#'
#' @param incl_header *Include RTF header*
#'
#'   `scalar<logical>` // *default:* `TRUE`
#'
#'   Should the RTF header be included in the output? By default, this is
#'   `TRUE`.
#'
#' @param incl_page_info *Include RTF page information*
#'
#'   `scalar<logical>` // *default:* `TRUE`
#'
#'   Should the RTF output include directives for the document pages? This is
#'   `TRUE` by default.
#'
#' @param incl_body *Include RTF body*
#'
#'   `scalar<logical>` // *default:* `TRUE`
#'
#'   An option to include the body of RTF document. By default, this is `TRUE`.
#'
#' @section Examples:
#'
#' Use a subset of the [`gtcars`] dataset to create a **gt** table. Add a header
#' with [tab_header()] and then export the table as RTF code using the
#' `as_rtf()` function.
#'
#' ```r
#' tab_rtf <-
#'   gtcars |>
#'   dplyr::select(mfr, model) |>
#'   dplyr::slice(1:2) |>
#'   gt() |>
#'   tab_header(
#'     title = md("Data listing from **gtcars**"),
#'     subtitle = md("`gtcars` is an R dataset")
#'   ) |>
#'   as_rtf()
#' ```
#'
#' @family table export functions
#' @section Function ID:
#' 13-4
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @export
as_rtf <- function(
    data,
    incl_open = TRUE,
    incl_header = TRUE,
    incl_page_info = TRUE,
    incl_body = TRUE,
    incl_close = TRUE
) {

  # Perform input object validation
  stop_if_not_gt_tbl(data = data)

  if (dt_options_get_value(data = data, option = "page_numbering")) {

    data <-
      dt_options_set_value(
        data = data,
        option = "page_header_use_tbl_headings",
        value = TRUE
      )
  }

  page_header_use_tbl_headings <-
    dt_options_get_value(data = data, option = "page_header_use_tbl_headings")

  # Build all table data objects through a common pipeline
  data <- build_data(data = data, context = "rtf")

  # Composition of RTF ------------------------------------------------------

  # Create the heading component
  heading_component <- create_heading_component_rtf(data = data)

  # Create the columns component
  columns_component <- create_columns_component_rtf(data = data)

  # Create the body component
  body_component <- create_body_component_rtf(data = data)

  # Create the footer component
  footer_component <- create_footer_component_rtf(data = data)

  # Create the page footer component
  page_footer_component <- create_page_footer_component_rtf(data = data)

  # Compose the RTF table
  rtf_table <-
    as_rtf_string(
      rtf_file(
        data = data,
        document = {
          rtf_table(
            rows = c(
              if (page_header_use_tbl_headings) rtf_raw("{\\header\n\n") else "",
              heading_component,
              columns_component,
              if (page_header_use_tbl_headings) rtf_raw("}\n\n") else "",
              body_component,
              footer_component,
              page_footer_component
            )
          )
        }
      ),
      incl_open = incl_open,
      incl_header = incl_header,
      incl_page_info = incl_page_info,
      incl_body = incl_body,
      incl_close = incl_close
    )

  if (isTRUE(getOption('knitr.in.progress'))) {
    rtf_table <- knitr::raw_output(rtf_table)
  }

  rtf_table
}

#' Output a **gt** object as Word
#'
#' @description
#'
#' Get the Open Office XML table tag content from a `gt_tbl` object as a
#' single-element character vector.
#'
#' @param data *The gt table data object*
#'
#'   `obj:<gt_tbl>` // **required**
#'
#'   This is the **gt** table object that is commonly created through use of the
#'   [gt()] function.
#'
#' @param align *Table alignment*
#'
#'   `scalar<character>` // *default:* `"center"`
#'
#'   An option for table alignment. Can either be `"center"`, `"left"`, or
#'   `"right"`.
#'
#' @param caption_location *Caption location*
#'
#'   `singl-kw:[top|bottom|embed]` // *default:* `"top"`
#'
#'   Determines where the caption should be positioned. This can either be
#'   `"top"`, `"bottom"`, or `"embed"`.
#'
#' @param caption_align *Caption alignment*
#'
#'   Determines the alignment of the caption. This is
#'   either `"left"` (the default), `"center"`, or `"right"`. This option is
#'   only used when `caption_location` is not set as `"embed"`.
#'
#' @param split *Allow splitting of a table row across pages*
#'
#'   `scalar<logical>` // *default:* `FALSE`
#'
#'   A logical value that indicates whether to activate the Word option
#'   `Allow row to break across pages`.
#'
#' @param keep_with_next *Keeping rows together*
#'
#'   `scalar<logical>` // *default:* `TRUE`
#'
#'   A logical value that indicates whether a table should use Word option
#'   `Keep rows together`.
#'
#' @section Examples:
#'
#' Use a subset of the [`gtcars`] dataset to create a **gt** table. Add a header
#' with [tab_header()] and then export the table as OOXML code for Word using the
#' `as_word()` function.
#'
#' ```r
#' tab_rtf <-
#'   gtcars |>
#'   dplyr::select(mfr, model) |>
#'   dplyr::slice(1:2) |>
#'   gt() |>
#'   tab_header(
#'     title = md("Data listing from **gtcars**"),
#'     subtitle = md("`gtcars` is an R dataset")
#'   ) |>
#'   as_word()
#' ```
#'
#' @family table export functions
#' @section Function ID:
#' 13-5
#'
#' @section Function Introduced:
#' `v0.7.0` (August 25, 2022)
#'
#' @export
as_word <- function(
    data,
    align = "center",
    caption_location = c("top", "bottom", "embed"),
    caption_align = "left",
    split = FALSE,
    keep_with_next = TRUE
) {

  # Perform input object validation
  stop_if_not_gt_tbl(data = data)

  caption_location <- rlang::arg_match(caption_location)

  # Build all table data objects through a common pipeline
  value <- build_data(data = data, context = "word")

  gt_xml <- c()

  #
  # Composition of Word table OOXML
  #

  if (caption_location == "top") {

    header_xml <-
      as_word_tbl_header_caption(
        data = value,
        align = caption_align,
        split = split,
        keep_with_next = keep_with_next
      )

    gt_xml <- c(gt_xml, header_xml)
  }

  tbl_xml <-
    as_word_tbl_body(
      data = value,
      align = align,
      split = split,
      keep_with_next = keep_with_next,
      embedded_heading = identical(caption_location, "embed")
    )

  gt_xml <- c(gt_xml, tbl_xml)

  if (caption_location == "bottom") {

    # Set `keep_with_next` to FALSE here to prevent it trying to keep
    # with non-table content
    header_xml <-
      as_word_tbl_header_caption(
        data = value,
        align = caption_align,
        split = split,
        keep_with_next = FALSE
      )

    gt_xml <- c(gt_xml, header_xml)
  }

  gt_xml <- paste0(gt_xml, collapse = "")

  gt_xml
}

#' Generate ooxml for the table caption
#'
#' @param data A processed table object that is created using the `build_data()` function.
#' @param align left (default), center or right.
#' @param split TRUE or FALSE (default) indicating whether activate Word option 'Allow row to break across pages'.
#' @param keep_with_next  TRUE (default) or FALSE indicating whether a table should use Word option 'keep rows
#' together' is activated when TRUE
#'
#' @noRd
as_word_tbl_header_caption <- function(
    data,
    align = "left",
    split = FALSE,
    keep_with_next = TRUE
) {

  # Perform input object validation
  stop_if_not_gt_tbl(data = data)

  # Composition of caption OOXML -----------------------------------------------

  # Create the table caption
  caption_xml <-
    create_table_caption_component_xml(
      data = data,
      align = align,
      keep_with_next = keep_with_next
    )

  caption_xml
}

#' Generate ooxml for the table body
#'
#' @param data A processed table object that is created using the `build_data()`
#'   function.
#' @param align left, center (default) or right.
#' @param split TRUE or FALSE (default) indicating whether activate Word option
#'   'Allow row to break across pages'.
#' @param keep_with_next  TRUE (default) or FALSE indicating whether a table
#'   should use Word option 'keep rows together' is activated when TRUE
#' @param embedded_heading  TRUE or FALSE (default) indicating whether a table
#'   should add the title and subtitle at the top of the table.
#'
#' @noRd
as_word_tbl_body <- function(
    data,
    align = "center",
    split = FALSE,
    keep_with_next = TRUE,
    embedded_heading = FALSE
) {

  # Perform input object validation
  stop_if_not_gt_tbl(data = data)

  #
  # Composition of table Word OOXML
  #

  # Create the table properties component
  table_props_component <-
    create_table_props_component_xml(data = data, align = align)

  # # Create the heading component
  if (embedded_heading) {

    heading_component <-
      create_heading_component_xml(
        data = data,
        split = split,
        keep_with_next = keep_with_next
      )

  } else {
    heading_component <- NULL
  }

  # Create the columns component
  columns_component <-
    create_columns_component_xml(
      data = data,
      split = split,
      keep_with_next = keep_with_next
    )

  # Create the body component
  body_component <-
    create_body_component_xml(
      data = data,
      split = split,
      keep_with_next = keep_with_next
    )

  # Create the footnotes component
  footnotes_component <-
    create_footnotes_component_xml(
      data = data,
      split = split,
      keep_with_next = keep_with_next
    )

  # Create the source notes component
  source_notes_component <-
    create_source_notes_component_xml(
      data = data,
      split = split,
      keep_with_next = keep_with_next
    )

  # Compose the Word OOXML table
  word_tbl <-
    xml_tbl(
      paste0(
        table_props_component,
        heading_component,
        columns_component,
        body_component,
        footnotes_component,
        source_notes_component,
        collapse = ""
      )
    )

  as.character(word_tbl)
}

#' Extract the table body from a **gt** object
#'
#' @description
#'
#' We can extract the body of a **gt** table, even at various stages of its
#' rendering, from a `gt_tbl` object using the `extract_body()` function. By
#' default, the data frame returned will have gone through all of the build
#' stages but we can intercept the table body after a certain build stage.
#' Here are the eight different build stages and some notes about each:
#'
#' 1. `"init"`: the body table is initialized here, entirely with `NA` values.
#' It's important to note that all columns of the are of the `character` type in
#' this first stage. And all columns remain in the same order as the input data
#' table.
#'
#' 2. `"fmt_applied"`: Any cell values that have had formatting applied to them
#' are migrated to the body table. All other cells remain as `NA` values.
#' Depending on the `output` type, the formatting may also be different.
#'
#' 3. `"sub_applied"`: Any cell values that have had substitution functions
#' applied to them (whether or not they were previously formatted) are migrated
#' to the body table or modified in place (if formatted). All cells that had
#' neither been formatted nor undergone substitution remain as `NA` values.
#'
#' 4. `"unfmt_included"`: All cells that either didn't have any formatting or
#' any substitution operations applied are migrated to the body table. `NA`
#' values now become the string `"NA"`, so, there aren't any true missing values
#' in this body table.
#'
#' 5. `"cols_merged"`: The result of column-merging operations (though
#' [cols_merge()] and related functions) is materialized here. Columns that were
#' asked to be hidden will be present here (i.e., hiding columns doesn't remove
#' them from the body table).
#'
#' 6. `"body_reassembled"`: Though columns do not move positions rows can move
#' to different positions, and this is usually due to migration to different row
#' groups. At this stage, rows will be in the finalized order that is seen in
#' the associated display table.
#'
#' 7. `"text_transformed"`: Various `text_*()` functions in **gt** can operate
#' on body cells (now fully formatted at this stage) and return transformed
#' character values. After this stage, the effects of those functions are
#' apparent.
#'
#' 8. `"footnotes_attached"`: Footnote marks are attached to body cell values
#' (either on the left or right of the content). This stage performs said
#' attachment.
#'
#' @param data *The gt table data object*
#'
#'   `obj:<gt_tbl>` // **required**
#'
#'   This is the **gt** table object that is commonly created through use of the
#'   [gt()] function.
#'
#' @param build_stage *The build stage of the formatted R data frame*
#'
#'   `scalar<character>` // *default:* `NULL` (`optional`)
#'
#'   When a **gt** undergoes rendering, the body of the table proceeds through
#'   several build stages. Providing a single stage name will yield a data frame
#'   that has been extracted after completed that stage. Here are the build
#'   stages in order: (1) `"init"`, (2) `"fmt_applied"`, (3) `"sub_applied"`,
#'   (4) `"unfmt_included"`, (5) `"cols_merged"`, (6) `"body_reassembled"`, (7)
#'   `"text_transformed"`, and (8) `"footnotes_attached"`. If not supplying a
#'   value for `build_stage` then the entire build for the table body (i.e., up
#'   to and including the `"footnotes_attached"` stage) will be performed before
#'   returning the data frame.
#'
#' @param output *Output format*
#'
#'   `singl-kw:[html|latex|rtf|word]` // *default:* `"html"`
#'
#'   The output format of the resulting data frame. This can either be
#'   `"html"` (the default), `"latex"`, `"rtf"`, or `"word"`.
#'
#' @return A data frame or tibble object containing the table body.
#'
#' @family table export functions
#' @section Function ID:
#' 13-6
#'
#' @section Function Introduced:
#' `v0.10.0` (October 7, 2023)
#'
#' @export
extract_body <- function(
    data,
    build_stage = NULL,
    output = c("html", "latex", "rtf", "word")
) {

  # Perform input object validation
  stop_if_not_gt_tbl(data = data)

  # Ensure that `output` is matched correctly to one option
  output <- rlang::arg_match(output)

  data <- dt_body_build(data = data)

  if (identical(build_stage, "init")) {
    return(data[["_body"]])
  }

  data <- render_formats(data = data, context = output)

  if (identical(build_stage, "fmt_applied")) {
    return(data[["_body"]])
  }

  data <- render_substitutions(data = data, context = output)

  if (identical(build_stage, "sub_applied")) {
    return(data[["_body"]])
  }

  data <- migrate_unformatted_to_output(data = data, context = output)

  if (identical(build_stage, "unfmt_included")) {
    return(data[["_body"]])
  }

  data <- perform_col_merge(data = data, context = output)

  if (identical(build_stage, "cols_merged")) {
    return(data[["_body"]])
  }

  data <- dt_body_reassemble(data = data)

  if (identical(build_stage, "body_reassembled")) {
    return(data[["_body"]])
  }

  data <- reorder_stub_df(data = data)
  data <- reorder_footnotes(data = data)
  data <- reorder_styles(data = data)

  data <- perform_text_transforms(data = data)

  if (identical(build_stage, "text_transformed")) {
    return(data[["_body"]])
  }

  data <- dt_boxhead_build(data = data, context = output)
  data <- dt_spanners_build(data = data, context = output)
  data <- dt_heading_build(data = data, context = output)
  data <- dt_stubhead_build(data = data, context = output)
  data <- dt_stub_df_build(data = data, context = output)
  data <- dt_source_notes_build(data = data, context = output)
  data <- dt_summary_build(data = data, context = output)
  data <- dt_groups_rows_build(data = data, context = output)
  data <- resolve_footnotes_styles(data = data, tbl_type = "footnotes")
  data <- apply_footnotes_to_output(data = data, context = output)

  if (is.null(build_stage) || identical(build_stage, "footnotes_attached")) {
    return(data[["_body"]])
  }

  data[["_body"]]
}

#' Extract a summary list from a **gt** object
#'
#' @description
#'
#' Get a list of summary row data frames from a `gt_tbl` object where summary
#' rows were added via the [summary_rows()] function. The output data frames
#' contain the `group_id` and `rowname` columns, whereby `rowname` contains
#' descriptive stub labels for the summary rows.
#'
#' @param data *The gt table data object*
#'
#'   `obj:<gt_tbl>` // **required**
#'
#'   This is the **gt** table object that is commonly created through use of the
#'   [gt()] function.
#'
#' @return A list of data frames containing summary data.
#'
#' @section Examples:
#'
#' Use a modified version of [`sp500`] the dataset to create a **gt** table with
#' row groups and row labels. Create summary rows labeled as `min`, `max`, and
#' `avg` for every row group with [summary_rows()]. Then, extract the summary
#' rows as a list object.
#'
#' ```{r}
#' summary_extracted <-
#'   sp500 |>
#'   dplyr::filter(date >= "2015-01-05" & date <="2015-01-30") |>
#'   dplyr::arrange(date) |>
#'   dplyr::mutate(week = paste0("W", strftime(date, format = "%V"))) |>
#'   dplyr::select(-adj_close, -volume) |>
#'   gt(
#'     rowname_col = "date",
#'     groupname_col = "week"
#'   ) |>
#'   summary_rows(
#'     groups = everything(),
#'     columns = c(open, high, low, close),
#'     fns = list(
#'       min = ~min(.),
#'       max = ~max(.),
#'       avg = ~mean(.)
#'     ),
#      fmt = ~ fmt_number(.)
#'   ) |>
#'   extract_summary()
#'
#' summary_extracted
#' ```
#'
#' Use the summary list to make a new **gt** table. The key thing is to use
#' `dplyr::bind_rows()` and then pass the tibble to [gt()].
#'
#' ```r
#' summary_extracted |>
#'   unlist(recursive = FALSE) |>
#'   dplyr::bind_rows() |>
#'   gt(groupname_col = "group_id") |>
#'   cols_hide(columns = row_id)
#' ```
#'
#' \if{html}{\out{
#' `r man_get_image_tag(file = "man_extract_summary_1.png")`
#' }}
#'
#' @family table export functions
#' @section Function ID:
#' 13-7
#'
#' @section Function Introduced:
#' `v0.2.0.5` (March 31, 2020)
#'
#' @export
extract_summary <- function(data) {

  # Perform input object validation
  stop_if_not_gt_tbl(data = data)

  # Stop function if there are no
  # directives to create summary rows
  if (!dt_summary_exists(data = data)) {

    cli::cli_abort(c(
      "There is no summary list to extract.",
      "*" = "Use the `{.help [summary_rows](gt::summary_rows)}()` / `{.help [grand_summary_rows](gt::grand_summary_rows)}()` functions
      to generate summaries."
    ))
  }

  # Build the `data` using the standard
  # pipeline with the `html` context
  built_data <- build_data(data = data, context = "html")

  # Extract the list of summary data frames
  # that contains tidy, unformatted data
  summary_tbl <-
    lapply(
      dt_summary_df_data_get(data = built_data),
      FUN = function(x) {
        lapply(x, function(y) {

          y <-
            dplyr::rename(
              y,
              group_id = dplyr::all_of(group_id_col_private),
              row_id = dplyr::all_of(row_id_col_private),
              rowname = dplyr::all_of(rowname_col_private)
            )

          flattened_rowname <- unname(unlist(y$rowname))

          y[, ][["rowname"]] <- flattened_rowname

          y
        })
      }
    )

  as.list(summary_tbl)
}

#' Extract a vector of formatted cells from a **gt** object
#'
#' @description
#'
#' Get a vector of cell data from a `gt_tbl` object. The output vector will have
#' cell data formatted in the same way as the table.
#'
#' @param data *The gt table data object*
#'
#'   `obj:<gt_tbl>` // **required**
#'
#'   This is the **gt** table object that is commonly created through use of the
#'   [gt()] function.
#'
#' @param columns *Columns to target*
#'
#'   `<column-targeting expression>` // *default:* `everything()`
#'
#'   Can either be a series of column names provided in [c()], a vector of
#'   column indices, or a select helper function. Examples of select helper
#'   functions include [starts_with()], [ends_with()], [contains()],
#'   [matches()], [one_of()], [num_range()], and [everything()].
#'
#' @param rows *Rows to target*
#'
#'   `<row-targeting expression>` // *default:* `everything()`
#'
#'   In conjunction with `columns`, we can specify which of their rows should
#'   form a constraint for extraction. The default [everything()] results in all
#'   rows in `columns` being formatted. Alternatively, we can supply a vector of
#'   row IDs within [c()], a vector of row indices, or a select helper function.
#'   Examples of select helper functions include [starts_with()], [ends_with()],
#'   [contains()], [matches()], [one_of()], [num_range()], and [everything()].
#'   We can also use expressions to filter down to the rows we need (e.g.,
#'   `[colname_1] > 100 & [colname_2] < 50`).
#'
#' @param output *Output format*
#'
#'   `singl-kw:[auto|plain|html|latex|rtf|word]` // *default:* `"auto"`
#'
#'   The output format of the resulting character vector. This can either be
#'   `"auto"` (the default), `"plain"`, `"html"`, `"latex"`, `"rtf"`, or
#'   `"word"`. In **knitr** rendering (i.e., Quarto or R Markdown), the `"auto"`
#'   option will choose the correct `output` value
#'
#' @return A vector of cell data extracted from a **gt** table.
#'
#' @section Examples:
#'
#' Let's create a **gt** table with the [`exibble`] dataset to use in the next
#' few examples:
#'
#' ```r
#' gt_tbl <- gt(exibble, rowname_col = "row", groupname_col = "group")
#' ```
#'
#' We can extract a cell from the table with the `extract_cells()` function.
#' This is done by providing a column and a row intersection:
#'
#' ```r
#' extract_cells(gt_tbl, columns = num, row = 1)
#' ```
#' ```
#' #> [1] "1.111e-01"
#' ```
#'
#' Multiple cells can be extracted. Let's get the first four cells from the
#' `char` column.
#'
#' ```r
#' extract_cells(gt_tbl, columns = char, rows = 1:4)
#' ```
#' ```
#' #> [1] "apricot" "banana" "coconut" "durian"
#' ```
#'
#' We can format cells and expect that the formatting is fully retained after
#' extraction.
#'
#' ```r
#' gt_tbl |>
#'   fmt_number(columns = num, decimals = 2) |>
#'   extract_cells(columns = num, rows = 1)
#' ```
#' ```
#' #> [1] "0.11"
#' ```
#'
#' @family table export functions
#' @section Function ID:
#' 13-8
#'
#' @section Function Introduced:
#' `v0.8.0` (November 16, 2022)
#'
#' @export
extract_cells <- function(
    data,
    columns,
    rows = everything(),
    output = c("auto", "plain", "html", "latex", "rtf", "word")
) {

  # Perform input object validation
  stop_if_not_gt_tbl(data = data)

  # Ensure that `output` is matched correctly to one option
  output <- rlang::arg_match(output)

  if (output == "auto") {
    output <- determine_output_format()
  }

  #
  # Resolution of columns and rows as character vectors
  #

  resolved_columns <-
    resolve_cols_c(
      expr = {{ columns }},
      data = data,
      excl_stub = FALSE
    )

  resolved_rows_idx <-
    resolve_rows_i(
      expr = {{ rows }},
      data = data
    )

  if (!dt_has_built_get(data)) {
    #
    # Partially build the gt table using the resolved `output` as the
    # rendering context; this formats the body cells and applies merging
    # routines and text transforms (but doesn't attach footnote marks)
    #

    data <- dt_body_build(data = data)
    data <- render_formats(data = data, context = output)
    data <- render_substitutions(data = data, context = output)
    data <- migrate_unformatted_to_output(data = data, context = output)
    data <- perform_col_merge(data = data, context = output)
    data <- dt_body_reassemble(data = data)
    data <- reorder_stub_df(data = data)
    data <- reorder_footnotes(data = data)
    data <- reorder_styles(data = data)
    data <- perform_text_transforms(data = data)
    built_data <- data

    # Extract the `_body` component of the built data
    data_body <- built_data[["_body"]]
  } else {
    data_body <- data[["_body"]]
  }

  #
  # Collect a vector of body cells in a specific order
  #

  out_vec <- c()

  for (column in resolved_columns) {
    out_vec_col <- data_body[resolved_rows_idx, ][[column]]
    out_vec <- c(out_vec, out_vec_col)
  }

  out_vec
}
rstudio/gt documentation built on April 29, 2024, 10:37 p.m.