R/ggpage_build.R
In ggpage: Creates Page Layout Visualizations

#' Creates a data frame for further analysis and plotting
#'
#' This function can be used in combination with \code{ggpage_plot} to get the
#' same result as \code{ggpage_quick}. However by splitting the data.frame
#' construction and plotting we are able to do intermediate analysis which
#' can be included in the visualization.
#'
#' The text MUST be presented in a column named text.
#'
#' @param book Character or data.frame. Can either have each element be a
#'   separate line or having each element being separate words.
#' @param lpp Numeric. Lines Per Page. Number of lines allocated for each page.
#' @param character_height Numeric. Relative size of the height of each letter
#'   compared to its width.
#' @param vertical_space Numeric. Distance between each lines vertically.
#' @param x_space_pages Numeric. Distance between pages along the x-axis.
#' @param y_space_pages Numeric. Distance between pages along the y-axis.
#' @param nrow Numeric. Number of rows of pages, if omitted defaults to square
#'   layout.
#' @param ncol Numeric. Number of columns of pages, if omitted defaults to
#'   square layout.
#' @param bycol Logical. If TRUE (the default) the matrix is filled by
#'   columns, otherwise the matrix is filled by rows.
#' @param wtl logical. If TRUE will convert single word vector into a vector
#'   with full lines. (defaults to FALSE).
#' @param para.fun Function that generates random numbers to determine number
#'  of word in each paragraph.
#' @param page.col column to split the pages by.
#' @param align Type of line alignment. Must be one of "left", "right" or "both".
#' @param line.max Maximal number of characters per line. Defaults to 80.
#' @param ... Extra arguments.
#' @return `data.frame` containing the following columns:
#' \itemize{
#'   \item `word`: Character. The words of the text.
#'   \item `page`: Integer. Page number.
#'   \item `line`: Integer. Line number within the page.
#'   \item `xmin`: Numeric. Border of rectangle, used by \code{ggpage_plot} do not
#'     alter.
#'   \item `xmax`: Numeric. Border of rectangle, used by \code{ggpage_plot} do not
#'     alter.
#'   \item `ymin`: Numeric. Border of rectangle, used by \code{ggpage_plot} do not
#'     alter.
#'   \item `ymax`: Numeric. Border of rectangle, used by \code{ggpage_plot} do not
#'     alter.
#'     }
#' @examples
#' \donttest{
#' library(dplyr)
#' library(stringr)
#' library(ggplot2)
#' library(tidytext)
#' library(ggpage)
#' # build and plot
#' ## data.frame with full lines
#' ggpage_build(tinderbox) %>%
#'   ggpage_plot()
#' ## vector with full lines
#' ggpage_build(book = tinderbox %>%
#'   pull(text)) %>%
#'   ggpage_plot()
#' ## data.frame with single words
#' ggpage_build(tinderbox) %>%
#'   unnest_tokens(text, word) %>%
#'   ggpage_plot()
#' ## vector with single words
#' ggpage_build(tinderbox %>%
#'   unnest_tokens(text, text) %>%
#'   pull(text)) %>%
#'   ggpage_plot()
#'
#' # nrow and ncol
#' ggpage_build(tinderbox, nrow = 2) %>%
#'   ggpage_plot()
#' ggpage_build(tinderbox, ncol = 2) %>%
#'   ggpage_plot()
#'
#' # Include analysis within
#' ggpage_build(tinderbox) %>%
#'   mutate(word_length = str_length(word)) %>%
#'   ggpage_plot(aes(fill = word_length))
#' }
#' @export
ggpage_build <- function(book, lpp = 25, character_height = 3,
                         vertical_space = 1, x_space_pages = 10,
                         y_space_pages = 10, nrow = NULL, ncol = NULL,
                         bycol = TRUE, wtl = NULL,
                         para.fun = NULL, page.col = NULL, align = "left",
                         line.max = 80,
                         ...) {

  if (!any(class(book) %in% c("character", "data.frame"))) {
    stop("Please supply character string or data.frame.")
  }

  book0 <- book

  # Makes strings to data.frames
  if (inherits(book, "character")) {
    book <- data.frame(text = book, stringsAsFactors = FALSE)
  }

  # Makes single words to lines
  single_word_check <- book %>%
    dplyr::slice(1:25) %>%
    dplyr::pull(.data$text) %>%
    trimws("both") %>%
    stringr::str_detect(" ") %>%
    mean() < 0.9

  if (is.null(wtl)) wtl <- single_word_check

  if (wtl) {

    if (is.null(para.fun)) {
      book <- data.frame(text = word_to_line(book), stringsAsFactors = FALSE)
    } else {
      if (!is.function(para.fun)) stop("para.fun must be a function")

      book <- book %>%
        dplyr::mutate(paragraph_id = para_index(NROW(book), para.fun, ...) %>%
                                       break_help())

      book <- purrr::map_df(dplyr::pull(book, .data$paragraph_id) %>% unique(),
                     ~ book %>%
                         dplyr::filter(paragraph_id == .x) %>%
                         word_to_line() %>%
                         data.frame(text = ., stringsAsFactors = FALSE))

    }
  }

  # Data table with full lines needed here
  if (is.null(page.col)) {
    data <- book %>%
      dplyr::mutate(index_line = 1:NROW(book),
                    page = rep(1:ceiling(NROW(book) / lpp),
                               length.out = NROW(book),
                               each = lpp))
  } else {
    page.col <- rlang::quo_name(rlang::enquo(page.col))

    data <- book %>%
      dplyr::mutate(index_line = 1:NROW(book),
                    page = book0[[page.col]])
  }

  data <- data %>%
    page_liner() %>%
    tidytext::unnest_tokens(output = "word", input = "text") %>%
    dplyr::mutate(word_length = stringr::str_length(.data$word)) %>%
    dplyr::group_by(.data$index_line) %>%
    dplyr::mutate(
      first_word = dplyr::lag(.data$line, default = 0) != .data$line,
      x_space_right = cumsum(.data$word_length + 1) - 1,
      x_space_left = cumsum(dplyr::lag(.data$word_length + 1,
                                       default = 0))) %>%
    dplyr::ungroup()

  # Longest line
  max_line_length <- book %>%
    dplyr::pull(.data$text) %>%
    stringr::str_length() %>%
    max()

  # Add page spacing
  num_pages <- data %>%
    dplyr::pull(.data$page) %>%
    dplyr::n_distinct()

  if (!is.null(nrow) || !is.null(ncol)) {
    if (!is.null(nrow)) {
      n_row_y <- nrow
      n_row_x <- ceiling(num_pages / n_row_y)
    }
    if (!is.null(ncol)) {
      n_row_x <- ncol
      n_row_y <- ceiling(num_pages / n_row_x)
    }
  } else {
    n_row_x <- n_row_y <- ceiling(sqrt(num_pages))
  }

  if (bycol) {
    page_spacing <- data.frame(
      page = 1:num_pages,
      x_page = rep(1:n_row_x, length.out = num_pages, each = n_row_y),
      y_page = rep(1:n_row_y, length.out = num_pages)
    )
  } else {
    page_spacing <- data.frame(
      page = 1:num_pages,
      x_page = rep(1:n_row_x, length.out = num_pages),
      y_page = rep(1:n_row_y, length.out = num_pages, each = n_row_x)
    )
  }

  data <- data %>%
    dplyr::left_join(page_spacing, by = "page") %>%
    dplyr::mutate(
      xmin = .data$x_space_right + .data$x_page * (max_line_length + x_space_pages),
      xmax = .data$x_space_left + .data$x_page * (max_line_length + x_space_pages),
      ymin = - .data$line * (character_height + vertical_space) - .data$y_page *
             (lpp * (character_height + vertical_space) + y_space_pages),
      ymax = - .data$line * (character_height + vertical_space) -
             character_height - .data$y_page *
             (lpp * (character_height + vertical_space) + y_space_pages)) %>%
    dplyr::select(-.data$index_line, -.data$word_length, -.data$first_word,
                  -.data$x_space_right, -.data$x_space_left, -.data$x_page,
                  -.data$y_page) %>%
    dplyr::select(.data$word, dplyr::everything())

  # Text alignment
  data %>%
    dplyr::mutate(index_line = stringr::str_c(.data$page, .data$line, sep = "-")) %>%
    dplyr::group_by(.data$index_line) %>%
    dplyr::do(line_align(., line.max, align)) %>%
    dplyr::ungroup()
}