lexicoR: Doing Research with DeepLEX

Documented in sort_rmd_head

#' Sort an R Markdown file with specified order of headers
#'
#' @details Restructure an input R Markdown file by its
#'   level 2 headers (\code{## }). The order is
#'   determined by specifying the h2 headers in the
#'   argument \code{order}.
#'
#' @param infile String. Path to the input Rmd file.
#' @param outfile String. Path to the output Rmd file.
#'   Defaults to \code{NULL}, which overwrites the
#'   input file.
#' @param order Character vector. A list of level 2
#'   titles in the input file which determines the order
#'   of h2 titles in the output file.
#' @keywords internal
sort_rmd_head <- function(in_file, out_file = NULL,
                          order = c("R Markdown2", "R Markdown")) {

  data <- readLines(in_file, encoding = 'utf-8')
  yml_head <- split_yaml_body(data)$yaml
  data <- split_yaml_body(data)$body

  # Code chunk ranges
  odd <- function(x) seq(from = 1, to = length(x), by = 2)
  code_idx <- grep("^```", data)
  code_range <- list(start = code_idx[odd(code_idx)],
                     end = code_idx[-odd(code_idx)])
  code_ranges <- NULL
  for (i in seq_along(code_range$start)) {
    code_ranges <- c(code_ranges, code_range$start[i]:code_range$end[i])
  }

  # Calc h2 ranges (Indexing lines)
  h_start <- grep("^## ", data)

  ## Don't count h2 in code chunks
  h2_in_code <- h_start %in% code_ranges
  h_start[!h2_in_code]
  ## Cal last lines in h2 sections
  h_end <- c(h_start[-1] - 1, length(data))
  ## Cal range before first h2
  before_h2 <- data[1:(h_start[1] - 1)]

  # h2 range Lookup table
  h2_trim <- substr(data[h_start], start = 4,
                   stop = nchar(data[h_start]))
  h2_rng <- data.frame(header =  h2_trim,
                       start = h_start,
                       end = h_end,
                       stringsAsFactors = F)

  ## Sort rows by given 'order'
  h_order_idx <- match(order, h2_rng$header)
  fixed_h2 <- h2_rng[h_order_idx,]
  custom_h2 <- h2_rng[-h_order_idx, ]
  h2_rng <- rbind(fixed_h2, custom_h2)
  h2_rng$new_start <- cumsum(
    c(1, (h2_rng$end - h2_rng$start + 1)[-length(h2_rng$header)])
    )
  h2_rng$new_end <- h2_rng$new_start + (h2_rng$end - h2_rng$start)

  # Create reordered Rmd
  data2 <- rep(NA, length(data))
  for (i in seq_along(h2_rng$header)) {
    start <- h2_rng$start[i]
    end <- h2_rng$end[i]
    n_start <- h2_rng$new_start[i]
    n_end <- h2_rng$new_end[i]
    data2[n_start:n_end] <- data[start:end]
  }

  # Rewrite rmd
  if (is.null(out_file)) out_file <- in_file
  data2 <- c(yml_head, before_h2, data2[!is.na(data2)])
  writeLines(paste(data2, collapse = "\n"),
             out_file)
}


#' Helper function for \code{split_yaml_body}
#'
#' @details This is a function from
#'   \code{blogdown:::yaml_load}
#'
#' @keywords internal
yaml_load <- function (x) {
  yaml::yaml.load(x, handlers = list(seq = function(x) {
      if (length(x) > 0) {
          x = unlist(x, recursive = FALSE)
          attr(x, "yml_type") = "seq"
      }
      x
  }))
}

#' Extract yaml & body from an R Markdown file
#'
#' @details This is a function from
#'   \code{blogdown:::split_yaml_body}
#'
#' @keywords internal
split_yaml_body <- function (x) {
    i = grep("^---\\s*$", x)
    n = length(x)
    res = if (n < 2 || length(i) < 2 ||
              (i[1] > 1 && !knitr:::is_blank(x[seq(i[1] - 1)]))) {
        list(yaml = character(), body = x)
    } else list(yaml = x[i[1]:i[2]], yaml_range = i[1:2],
                body = if (i[2] == n) character() else x[(i[2] + 1):n])
    res$yaml_list = if ((n <- length(res$yaml)) >= 3) {
        yaml_load(res$yaml[-c(1, n)])
    }
    res
}