R/utils.R

Defines functions detect_void_name is_named convin mm_to_inches cm_to_inches stop_if_not_rpptx stop_if_not_class df_rename sort_dataframe_by_index sort_vec_by_index get_file_index is_doc_open is_windows check_bookmark_id correct_id is.color set_row_span rbind_match_columns simple_lag between as_xpath_content_sel characterise_df read_theme_colors xfrmize fortify_master_xfrm read_xfrm

#' @importFrom xml2 xml_attr<- xml_name<- xml_text<- as_list as_xml_document read_xml
#' write_xml xml_add_child xml_add_parent xml_add_sibling xml_attr xml_attrs xml_child
#' xml_children xml_find_all xml_find_first xml_length xml_missing xml_name xml_ns
#' xml_path xml_remove xml_replace xml_set_attr xml_set_attrs xml_text
#' @importFrom stats setNames


read_xfrm <- function(nodeset, file, name){

  if( length(nodeset) < 1 ){
    return(data.frame(stringsAsFactors = FALSE, type = character(0),
                   id = character(0),
                   ph_label = character(0),
                   ph = character(0),
                   file = character(0),
                   offx = integer(0),
                   offy = integer(0),
                   cx = integer(0),
                   cy = integer(0),
                   rotation = integer(0),
                   name = character(0),
                   fld_id = character(0),
                   fld_type = character(0)
                   ))
  }

  ph <- xml_child(nodeset, "p:nvSpPr/p:nvPr/p:ph")
  type <- xml_attr(ph, "type")
  type[is.na(type)] <- "body"
  id <- xml_attr(xml_child(nodeset, "/p:cNvPr"), "id")
  label <- xml_attr(xml_child(nodeset, "/p:cNvPr"), "name")

  off <- xml_child(nodeset, "p:spPr/a:xfrm/a:off")
  ext <- xml_child(nodeset, "p:spPr/a:xfrm/a:ext")
  rot <- xml_child(nodeset, "p:spPr/a:xfrm")

  fld_id <- xml_attr(xml_child(nodeset, "/p:txBody/a:p/a:fld"), "id")
  fld_type <- xml_attr(xml_child(nodeset, "/p:txBody/a:p/a:fld"), "type")

  data.frame(stringsAsFactors = FALSE, type = type, id = id,
          ph_label = label,
          ph = as.character(ph),
          file = basename(file),
          offx = as.integer(xml_attr(off, "x")),
          offy = as.integer(xml_attr(off, "y")),
          cx = as.integer(xml_attr(ext, "cx")),
          cy = as.integer(xml_attr(ext, "cy")),
          rotation = as.integer(xml_attr(rot, "rot")),
          fld_id,
          fld_type,
          name = name )
}


fortify_master_xfrm <- function(master_xfrm) {
  master_xfrm <- as.data.frame(master_xfrm)
  has_type <- grepl("type=", master_xfrm$ph)
  master_xfrm <- master_xfrm[has_type, ]
  if (nrow(master_xfrm) > 0) {    # see #597
    list_xfrm <- split(master_xfrm, master_xfrm$file)
    list_xfrm <- lapply(list_xfrm, function(x) {
      x[!duplicated(x$type), , drop = FALSE]
    })
    master_xfrm <- do.call("rbind", list_xfrm)
  }

  tmp_names <- names(master_xfrm)

  old_ <- c("offx", "offy", "cx", "cy", "fld_id", "fld_type", "name")
  new_ <- c("offx_ref", "offy_ref", "cx_ref", "cy_ref", "fld_id_ref", "fld_type_ref", "master_name")
  tmp_names[match(old_, tmp_names)] <- new_
  names(master_xfrm) <- tmp_names
  master_xfrm$id <- NULL
  master_xfrm$ph <- NULL
  master_xfrm$ph_label <- NULL
  master_xfrm$rotation <- NULL

  master_xfrm
}


xfrmize <- function(slide_xfrm, master_xfrm) {
  x <- as.data.frame(slide_xfrm)

  master_ref <- unique(data.frame(
    file = master_xfrm$file,
    master_name = master_xfrm$name,
    stringsAsFactors = FALSE
  ))
  master_xfrm <- fortify_master_xfrm(master_xfrm)

  slide_key_id <- paste0(x$master_file, x$type)
  master_key_id <- paste0(master_xfrm$file, master_xfrm$type)

  slide_xfrm_no_match <- x[!slide_key_id %in% master_key_id, ]
  slide_xfrm_no_match <- merge(slide_xfrm_no_match,
    master_ref,
    by.x = "master_file", by.y = "file",
    all.x = TRUE, all.y = FALSE
  )

  x <- merge(x, master_xfrm,
    by.x = c("master_file", "type"),
    by.y = c("file", "type"),
    all = FALSE
  )
  x$offx <- ifelse(!is.finite(x$offx), x$offx_ref, x$offx)
  x$offy <- ifelse(!is.finite(x$offy), x$offy_ref, x$offy)
  x$cx <- ifelse(!is.finite(x$cx), x$cx_ref, x$cx)
  x$cy <- ifelse(!is.finite(x$cy), x$cy_ref, x$cy)
  x$offx_ref <- NULL
  x$offy_ref <- NULL
  x$cx_ref <- NULL
  x$cy_ref <- NULL
  x$fld_id_ref <- NULL
  x$fld_type_ref <- NULL

  x <- rbind(x, slide_xfrm_no_match, stringsAsFactors = FALSE)

  i_master <- get_file_index(x$master_file)
  i_layout <- get_file_index(x$file)
  x <- x[order(i_master, i_layout, x$offy, x$offx), , drop = FALSE] # intuitive sorting: top -> bottom, left -> right
  x <- x[!(is.na(x$offx) | is.na(x$offy) | is.na(x$cx) | is.na(x$cy)),  ]

  x$type_idx <- stats::ave(x$type, x$master_file, x$file, x$type, FUN = seq_along)
  x$type_idx <- as.numeric(x$type_idx) # NB: ave returns character

  x$id <- as.integer(x$id)

  rownames(x) <- NULL # prevent meaningless rownames
  x
}


read_theme_colors <- function(doc, theme){

  nodes <- xml_find_all(doc, "//a:clrScheme/*")

  names_ <- xml_name(nodes)
  col_types_ <- xml_name(xml_children(nodes) )
  vals <- xml_attr(xml_children(nodes), "val")
  last_colors_ <- xml_attr(xml_children(nodes), "lastClr")
  vals <- ifelse(col_types_ == "srgbClr", paste0("#", vals), paste0("#", last_colors_) )
  data.frame(stringsAsFactors = FALSE, name = names_, type = col_types_, value = vals, theme = theme)
}



characterise_df <- function(x){
  names(x) <- htmlEscapeCopy(names(x))
  x <- lapply(x, function( x ) {
    if( is.character(x) ) x
    else if( is.factor(x) ) as.character(x)
    else gsub("(^ | $)+", "", format(x))
  })
  data.frame(x, stringsAsFactors = FALSE, check.names = FALSE)
}


xpath_content_selector <- "*[self::p:cxnSp or self::p:sp or self::p:graphicFrame or self::p:grpSp or self::p:pic]"

as_xpath_content_sel <- function(prefix){
  paste0(prefix, xpath_content_selector)
}


between <- function(x, left, right ){
  x >= left & x <= right
}



simple_lag <- function( x, default=0 ){
  c(default, x[-length(x)])
}

rbind_match_columns <- function(list_df) {

  col <- unique(unlist(lapply(list_df, colnames)))
  x <- Filter(function(x) nrow(x)>0, list_df)
  x <- lapply(x, function(x, col) {
    x[, setdiff(col, colnames(x))] <- NA
    x
  }, col = col)
  do.call(rbind, x)
}

set_row_span <- function( row_details ){
  row_details$first[!row_details$first & !row_details$row_merge] <- TRUE
  row_details$row_merge <- NULL

  row_details <- split(row_details, row_details$cell_id)

  row_details <- mapply(function(dat){
    rowspan_values_at_breaks <- rle(cumsum(dat$first))$lengths
    rowspan_pos_at_breaks <- which(dat$first)
    dat$row_span <- 0L
    dat$row_span[rowspan_pos_at_breaks] <- rowspan_values_at_breaks
    dat
  }, row_details, SIMPLIFY = FALSE)
  row_details <- rbind_match_columns(row_details)
  row_details$first <- NULL
  row_details
}


#' @importFrom grDevices col2rgb rgb
is.color = function(x) {
  # http://stackoverflow.com/a/13290832/3315962
  out = sapply(x, function( x ) {
    tryCatch( is.matrix( col2rgb( x ) ), error = function( e ) F )
  })

  nout <- names(out)
  if( !is.null(nout) && any( is.na( nout ) ) )
    out[is.na( nout )] = FALSE

  out
}

correct_id <- function(doc, int_id){
  all_uid <- xml_find_all(doc, "//*[@id]")
  for(z in seq_along(all_uid) ){
    if(!grepl("[^0-9]", xml_attr(all_uid[[z]], "id"))){
      xml_attr(all_uid[[z]], "id") <- int_id
      int_id <- int_id + 1
    }
  }
  int_id
}


check_bookmark_id <- function(bkm){
  if(!is.null(bkm)){
    invalid_bkm <- is.character(bkm) &&
      length(bkm) == 1 &&
      nchar(bkm) > 0 &&
      grepl("[^:[:alnum:]_-]+", bkm)
    if(invalid_bkm){
      stop("bkm [", bkm, "] should only contain alphanumeric characters, ':', '-' and '_'.", call. = FALSE)
    }
  }
  bkm
}

is_windows <- function() {
  "windows" %in% .Platform$OS.type
}

is_doc_open <- function(file) {
  # The function checks if the `file` is open (a.k.a. is being edited).
  # This function is valid on Windows operating system only.
  suppressWarnings(file.exists(file) && !file.rename(from = file, to = file))
}


# Extract trailing numeric index in .xml filename
#
# Useful to for slideMaster and slideLayout .xml files.
#
# Examples:
#   files <- c("slideLayout1.xml", "slideLayout2.xml", "slideLayout10.xml")
#   get_file_index(files)
#
get_file_index <- function(file) {
  x <- sub(pattern = ".+?(\\d+).xml$", replacement = "\\1", x = basename(file), ignore.case = TRUE)
  as.numeric(x)
}


# Sort xml filenames by trailing numeric index
#
# Useful to for slideMaster and slideLayout xml files.
#
# Examples:
#   files <- c("slideLayout1.xml", "slideLayout2.xml", "slideLayout12.xml")
#   sort_vec_by_index(files)  # => order corresponding to trailing index
#   sort(files)           # => incorrect lexicographical ordering
#
sort_vec_by_index <- function(x) {
  indexes <- get_file_index(x)
  x[order(indexes)]
}


# Sort dataframe column by trailing index
#
# df: A dataframe
# ...: columsn to sort by, comma separated
#
# Examples:
# df <- data.frame(
#   a = paste0("file_", rep(3:1, each = 2), ".xml"),
#   b = paste0("file_", rep(3:1, 2), ".xml")
# )
# sort_dataframe_by_index(df, "a", "b")
# sort_dataframe_by_index(df, "b", "a")
#
sort_dataframe_by_index <- function(df, ...) {
  sort_columns <- c(...)
  l <- lapply(sort_columns, function(.col) {
    get_file_index(df[[.col]])
  })
  df[do.call(order, l), , drop =FALSE]
}


# rename dataframe columns
#
# Examples:
#   df_rename(mtcars, c("mpg", "cyl"), c("A", "B"))
#
df_rename <- function(df, old, new) {
  .nms <- names(df)
  .nms[match(old, .nms)] <- new
  stats::setNames(df, .nms)
}


# replacement for stopifnot() with nicer user feedback
stop_if_not_class <- function(x, class, arg = NULL) {
  check <- inherits(x, what = class)
  if (!check) {
    msg_arg <- ifelse(is.null(arg), "Incorrect input.", "Incorrect input for {.arg {arg}}")
    cli::cli_abort(c(
      msg_arg,
      "x" = "Expected {.cls {class}} but got {.cls {class(x)[1]}}"
    ), call = NULL)
  }
}


stop_if_not_rpptx <- function(x, arg = NULL) {
  stop_if_not_class(x, "rpptx", arg)
}


# htmlEscapeCopy ----

htmlEscapeCopy <- local({

  .htmlSpecials <- list(
    `&` = '&amp;',
    `<` = '&lt;',
    `>` = '&gt;'
  )
  .htmlSpecialsPattern <- paste(names(.htmlSpecials), collapse='|')
  .htmlSpecialsAttrib <- c(
    .htmlSpecials,
    `'` = '&#39;',
    `"` = '&quot;',
    `\r` = '&#13;',
    `\n` = '&#10;'
  )
  .htmlSpecialsPatternAttrib <- paste(names(.htmlSpecialsAttrib), collapse='|')
  function(text, attribute=FALSE) {
    pattern <- if(attribute)
      .htmlSpecialsPatternAttrib
    else
      .htmlSpecialsPattern
    text <- enc2utf8(as.character(text))
    # Short circuit in the common case that there's nothing to escape
    if (!any(grepl(pattern, text, useBytes = TRUE)))
      return(text)
    specials <- if(attribute)
      .htmlSpecialsAttrib
    else
      .htmlSpecials
    for (chr in names(specials)) {
      text <- gsub(chr, specials[[chr]], text, fixed = TRUE, useBytes = TRUE)
    }
    Encoding(text) <- "UTF-8"
    return(text)
  }
})


# metric units -----------------------------------------------
#
cm_to_inches <- function(x) {
  x / 2.54
}

mm_to_inches <- function(x) {
  x / 25.4
}

convin <- function(unit, x) {
  unit <- match.arg(unit, choices = c("in", "cm", "mm"), several.ok = FALSE)
  if (!identical("in", unit)) {
    x <- do.call(paste0(unit, "_to_inches"), list(x = x))
  }
  x
}


# from rlang pkg  ------------------------------------------

is_named <- function(x) {
  nms <- names(x)
  if (is.null(nms)) {
    return(FALSE)
  }
  if (any(detect_void_name(nms))) {
    return(FALSE)
  }
  TRUE
}


detect_void_name <- function(x) {
  x == "" | is.na(x)
}

Try the officer package in your browser

Any scripts or data that you put into this service are public.

officer documentation built on Oct. 10, 2024, 1:06 a.m.