R/pack.R
In pkglite: Compact Package Representations

Documented in pack

#  Copyright (c) 2024 Merck & Co., Inc., Rahway, NJ, USA and its affiliates. All rights reserved.
#
#  This file is part of the pkglite program.
#
#  pkglite is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program.  If not, see <http://www.gnu.org/licenses/>.

#' Pack packages into a text file
#'
#' @param ... One or more file collection objects
#' generated by [collate()].
#' @param output Path to the output text file.
#' If empty, will create a txt file using the lower-cased package name
#' in the current working directory. For multiple packages,
#' will use `"pkglite.txt"`.
#' @param quiet Suppress printing of progress?
#'
#' @return The output file path.
#'
#' @section Specification:
#' \if{latex}{
#'   \itemize{
#'     \item Get the package metadata, for example, package names, from the
#'     input file collection(s).
#'     \item If unspecified, generate a default output file name by the
#'     number of packages.
#'     \item Read each file in each package as DCF blocks.
#'     \item Add header and write to the output file.
#'   }
#' }
#' \if{html}{The contents of this section are shown in PDF user manual only.}
#'
#' @export pack
#'
#' @examples
#' # pack two packages
#' pkg1 <- system.file("examples/pkg1", package = "pkglite")
#' pkg2 <- system.file("examples/pkg2", package = "pkglite")
#'
#' fc1 <- pkg1 %>% collate(file_default())
#' fc2 <- pkg2 %>% collate(file_default())
#'
#' txt <- tempfile(fileext = ".txt")
#' pack(fc1, fc2, output = txt, quiet = TRUE)
#'
#' txt %>%
#'   readLines() %>%
#'   head() %>%
#'   cat(sep = "\n")
#' txt %>%
#'   readLines() %>%
#'   length()
pack <- function(..., output, quiet = FALSE) {
  # handle inputs ----
  lst_fc <- list(...)
  npkgs <- length(lst_fc)
  pkg_names <- get_fc_pkg(lst_fc)
  if (npkgs < 1L) stop("Must provide at least one file collection as input.", call. = FALSE)
  if (!all(sapply(lst_fc, is_file_collection))) stop("All inputs must be file collection objects.", call. = FALSE)

  # determine output file name ----
  output_default <- if (npkgs > 1L) "pkglite.txt" else tolower(paste0(pkg_names[1L], ".txt"))
  output_file <- if (missing(output)) output_default else output

  # read files in each package ----
  if (!quiet) cli_h1("Packing into pkglite file")
  lst_text <- lapply(seq_len(npkgs), function(i) {
    pkg_name <- pkg_names[i]
    if (!quiet) cli_rule("Reading package: ", cli_pkg(pkg_name))
    fc_to_text(lst_fc[[i]], quiet = quiet)
  })

  # write to a text file ----
  if (!quiet) cli_text("Writing to: ", cli_path_dst(output_file))
  vec_text <- unlist(lst_text, recursive = TRUE)
  vec_text <- add_header(vec_text)
  writeLines(vec_text, con = output_file)

  invisible(output_file)
}

#' Get package names from file collections
#'
#' Get the package names from a list of file collection objects.
#'
#' @param lst A list of file collections.
#'
#' @section Specification:
#' \if{latex}{
#'   \itemize{
#'     \item Return the value of \code{pkg_name} from each file collection.
#'   }
#' }
#' \if{html}{The contents of this section are shown in PDF user manual only.}
#'
#' @noRd
get_fc_pkg <- function(lst) {
  sapply(lst, "[[", "pkg_name")
}

#' Generate output-ready string from a file collection
#'
#' Given a file collection, return a list of output-ready string vectors.
#'
#' @param fc File collection.
#' @param quiet Suppress printing of progress?
#'
#' @section Specification:
#' \if{latex}{
#'   \itemize{
#'     \item Read the files in a file collection and return the output-ready,
#'     DCF formatted character vectors.
#'   }
#' }
#' \if{html}{The contents of this section are shown in PDF user manual only.}
#'
#' @noRd
fc_to_text <- function(fc, quiet = FALSE) {
  pkg_name <- fc[["pkg_name"]]
  df <- fc[["df"]]
  nfiles <- nrow(df)
  lapply(seq_len(nfiles), function(i) {
    path_rel <- df[i, "path_rel"]
    if (!quiet) cli_text("Reading ", cli_path_src(path_rel))
    file_to_vec(df[i, "path_abs"], format = df[i, "format"], pkg_name = pkg_name, path_rel = path_rel)
  })
}

#' Read a file and format as a DCF block
#'
#' Read a file and convert to a DCF formatted character vector.
#'
#' @param path File path.
#' @param format File format.
#' @param pkg_name Package name.
#' @param path_rel File path relative to package root.
#' @param indent How many whitespaces to add before each line.
#'
#' @section Specification:
#' \if{latex}{
#'   \itemize{
#'     \item Read a file and return the DCF formatted lines of text
#'     as a character vector.
#'   }
#' }
#' \if{html}{The contents of this section are shown in PDF user manual only.}
#'
#' @noRd
file_to_vec <- function(path, format = c("binary", "text"),
                        pkg_name = NULL, path_rel = NULL, indent = 2L) {
  format <- match.arg(format)

  vec_pkg <- paste0("Package: ", pkg_name)
  vec_file <- paste0("File: ", path_rel)
  vec_format <- paste0("Format: ", format)
  if (format == "text") vec_content <- read_file_text(path)
  if (format == "binary") vec_content <- read_file_binary(path)
  vec_content <- add_indent(vec_content, n = indent)
  vec_content <- c("Content:", vec_content)

  c(vec_pkg, vec_file, vec_format, vec_content, "")
}

#' Read a text file as a character vector
#'
#' @param path Text file path.
#'
#' @section Specification:
#' \if{latex}{
#'   \itemize{
#'     \item Read the text file in \code{path} using \code{readLines()} and
#'     return a character vector.
#'   }
#' }
#' \if{html}{The contents of this section are shown in PDF user manual only.}
#'
#' @noRd
read_file_text <- function(path) {
  readLines(path, warn = FALSE)
}

#' Read a binary file as a formatted vector
#'
#' Read a binary file and return it as a character vector
#' with maximum 128 characters per element by default.
#'
#' @param path Binary file path.
#' @param nmax The maximum length of each string,
#' divided by 2 due to the hex representation.
#'
#' @section Specification:
#' \if{latex}{
#'   \itemize{
#'     \item Read the binary file as a hex string.
#'     \item Segment the string into character vectors with maximum
#'     \code{nmax} characters in each element.
#'     \item Return the vector.
#'   }
#' }
#' \if{html}{The contents of this section are shown in PDF user manual only.}
#'
#' @noRd
read_file_binary <- function(path, nmax = 64) {
  vec_char <- readbin_to_char(path)
  lst_char <- seg_char(vec_char, nmax = nmax)
  sapply(lst_char, paste0, collapse = "")
}

#' Get file size
#'
#' @param file File path.
#'
#' @section Specification:
#' \if{latex}{
#'   \itemize{
#'     \item Get the (binary) file size using \code{file.info()}.
#'   }
#' }
#' \if{html}{The contents of this section are shown in PDF user manual only.}
#'
#' @noRd
get_file_size <- function(file) {
  file.info(file)$size
}

#' Read a binary file as string
#'
#' Read a binary file and covert to a character string.
#'
#' @param path The binary file path.
#'
#' @section Specification:
#' \if{latex}{
#'   \itemize{
#'     \item Read the binary file and return a character string of hex code.
#'   }
#' }
#' \if{html}{The contents of this section are shown in PDF user manual only.}
#'
#' @noRd
readbin_to_char <- function(path) {
  as.character(readBin(path, what = "raw", n = get_file_size(path)))
}

#' Segment string by maximum length
#'
#' Segment a character string into a list of strings of the same maximum length.
#'
#' @param x A character string.
#' @param nmax The maximum length of each segmented string.
#'
#' @section Specification:
#' \if{latex}{
#'   \itemize{
#'     \item Use \code{nmax} and the number of characters in the string to
#'     decide the starting character index and the ending character index
#'     of each segment.
#'     \item Handle the edge case where the number of characters in the
#'     string is smaller than \code{nmax}.
#'     \item Return the segmented strings as a vector.
#'   }
#' }
#' \if{html}{The contents of this section are shown in PDF user manual only.}
#'
#' @noRd
seg_char <- function(x, nmax) {
  nchars <- length(x)
  pos <- seq(from = 1L, to = nchars, by = nmax)
  short <- nchars <= nmax
  nlines <- if (short) 1L else length(pos)
  pos_start <- if (short) 1L else pos[seq_len(nlines)]
  pos_end <- if (short) nchars else c(pos[2L:nlines] - 1L, nchars)
  lapply(seq_len(nlines), function(i) x[pos_start[i]:pos_end[i]])
}

#' Add indentation to a file content vector
#'
#' @param x A character vector of the file content.
#' @param n How many whitespaces to add before each line.
#'
#' @section Specification:
#' \if{latex}{
#'   \itemize{
#'     \item Add \code{n} whitespaces to the start of each element in \code{x}.
#'   }
#' }
#' \if{html}{The contents of this section are shown in PDF user manual only.}
#'
#' @noRd
add_indent <- function(x, n) {
  y <- paste0(rep(" ", n), collapse = "")
  paste0(y, x)
}

#' Add header to vector
#'
#' Add header to the text vector before writing.
#'
#' @param x A character vector of the original text.
#'
#' @section Specification:
#' \if{latex}{
#'   \itemize{
#'     \item Add header text lines to the text vector.
#'   }
#' }
#' \if{html}{The contents of this section are shown in PDF user manual only.}
#'
#' @noRd
add_header <- function(x) {
  y <- c(
    "# Generated by pkglite: do not edit by hand",
    "# Use pkglite::unpack() to restore the packages",
    ""
  )
  c(y, x)
}