R/annotate_mf_all.R

Defines functions annotate_mf_all

Documented in annotate_mf_all

#' Annotate meaningful formatting for all cells
#'
#' Turns cell formatting into annotations for all values across all variables.
#'
#' @param xlfilepath Path to a single-sheet spreadsheet file (xls or xlsx).
#'
#' @return A tibble with meaningful formatting embedded as text for all rows and
#'   columns.
#' @details Seven popular approaches for meaningful formatting (bold, colored
#'   text, italic, strikethrough, underline, double underline, and cell
#'   highlighting) are supported in this function. `sheets`, `skip`, and `range`
#'   arguments for spreadsheet input are not supported. The hex8 code of the
#'   fill color used for text color and cell highlighting is also appended in
#'   the output. Ensure the data in the spreadsheet are rectangular before
#'   running; this includes blank but formatted cells beyond the data rectangle.
#'
#' @examples
#' example_spreadsheet <- system.file("extdata/boutiques.xlsx", package = "unheadr")
#' annotate_mf_all(example_spreadsheet)
#' @importFrom rlang :=
#' @importFrom rlang .data
#' @export
annotate_mf_all <- function(xlfilepath) {
  spsheet <- readxl::read_excel(xlfilepath)

  if (any(grepl("^\\.\\.\\.", names(spsheet)))) {
    stop("Check the spreadsheet for empty values in the header row")
  }

  m_formatting <- tidyxl::xlsx_cells(xlfilepath)

  m_formatting <-
    dplyr::ungroup(tidyr::complete(dplyr::group_by(m_formatting, col),
                                   row = tidyr::full_seq(row, 1)
    ))

  if (nrow(spsheet) != max(m_formatting$row) - 1) {
    stop("Check spreadsheet for blank cells in seemingly empty rows")
  }

  if (length(unique(stats::na.omit(m_formatting$sheet))) != 1) {
    stop("Data in spreadsheet does not appear to be rectangular (this includes multisheet files)")
  }

  format_defs <- tidyxl::xlsx_formats(xlfilepath)

  bold <- format_defs$local$font$bold
  italic <- format_defs$local$font$italic
  underlined <- format_defs$local$font$underline
  highlighted <- format_defs$local$fill$patternFill$patternType
  hl_color <- format_defs$local$fill$patternFill$fgColor$rgb
  strikethrough <- format_defs$local$font$strike
  text_clr <- format_defs$local$font$color$rgb
  format_opts <- tibble::lst(
    bold, highlighted, hl_color, italic,
    strikethrough, text_clr, underlined
  )
  formatting_indicators <- dplyr::bind_cols(lapply(
    format_opts,
    function(x) x[m_formatting$local_format_id]
  ))
  format_joined <- dplyr::bind_cols(m_formatting, formatting_indicators)

  cols_spsheet <- match(
    names(spsheet),
    format_joined$character
  )

  # annotate target variable
  target_var_fmt <- function(col_ind) {
    orig_format <- dplyr::filter(format_joined, row >= 2 & col == col_ind)
    orig_format <- dplyr::select(orig_format, bold:underlined)
    formatted <- dplyr::bind_cols(spsheet, orig_format)
    formatted <- dplyr::mutate(
      formatted, dplyr::across(
        bold:underlined,
        \(x) replace(x, is.na(x), FALSE)
      )
    )
    # set up multi-category annotations
    formatted$highlighted <- gsub(
      pattern = "FALSE", replacement = "",
      formatted$highlighted
    )
    formatted$text_clr <- gsub(
      pattern = "FALSE", replacement = "",
      formatted$text_clr
    )
    formatted$underlined <- gsub(
      pattern = "FALSE", replacement = "",
      formatted$underlined
    )
    formatted$hl_color <- gsub(
      pattern = "FALSE", replacement = "",
      formatted$hl_color
    )
    formatted <- dplyr::mutate(
      formatted, dplyr::across(
        c(bold, italic, strikethrough),
        as.logical
      )
    )
    # swap na with variable names
    formatted <- dplyr::mutate(
      formatted, dplyr::across(
        bold:underlined,
        \(x) {
          ifelse(x == TRUE, deparse(substitute(x)), x)
        }
      )
    )
    formatted <- dplyr::mutate(
      formatted,
      dplyr::across(bold:underlined, \(x) replace(x, x == "FALSE", ""))
    )
    # build annotation strings
    formatted$highlighted <- ifelse(formatted$highlighted != "",
                                    paste0("highlight", "-", formatted$hl_color), formatted$highlighted
    )
    formatted$hl_color <- NULL
    formatted$text_clr <- ifelse(formatted$text_clr != "",
                                 paste0("color", "-", formatted$text_clr), formatted$text_clr
    )
    formatted$underlined <- ifelse(formatted$underlined != "",
                                   paste0("underlined", "-", formatted$underlined), formatted$underlined
    )
    formatted$newvar <-
      paste(
        formatted$bold, formatted$highlighted,
        formatted$italic, formatted$strikethrough, formatted$text_clr,
        formatted$underlined
      )
    formatted$newvar <- stringr::str_squish(formatted$newvar)
    formatted$newvar <- gsub(" ", ", ", formatted$newvar)
    formatted <- dplyr::select(formatted, -c(bold:underlined))
    formatted[[names(spsheet)[col_ind]]] <-
      paste0("(", formatted$newvar, ") ", formatted[[names(spsheet)[col_ind]]])
    formatted[[names(spsheet)[col_ind]]]
  }

  for (i in seq_along(cols_spsheet)) {
    spsheet[, names(spsheet)[i]] <- target_var_fmt(i)
  }


  dplyr::mutate(spsheet, dplyr::across(
    dplyr::everything(),
    \(x) stringr::str_remove(x, "^\\(\\) ")
  ))
}

Try the unheadr package in your browser

Any scripts or data that you put into this service are public.

unheadr documentation built on May 29, 2024, 4:02 a.m.