R/getFileAsText.R

#' read a file as text, converting if necessary
#'
#' @param f full path to file.
#'
#' @return a vector of lines in the file, after converting to text format
#'
#' @details The type of the file is determined using the linux \code{file}
#' utility.  These type conversions are known:
#'
#' \itemize{
#' \item Rich Text Format: the file is converted using \code{unrtf --text},
#' and the leading comments are stripped.
#' \item ASCII text: the file is read as-is.
#' }
#'
#' @note If the file \code{f} is empty, this returns a vector of length 0.
#'
#' @export
#'
#' @author John Brzustowski \email{jbrzusto@@REMOVE_THIS_PART_fastmail.fm}

getFileAsText = function(f) {
    type = safeSys("file", "--brief", f)
    if (grepl("^Rich Text Format", type, perl=TRUE)) {
        rv = safeSys("unrtf", "--text", f, splitOutput=TRUE)
        ## strip leading comment lines generated by unrtf; the --quiet option
        ## is supposed to prevent unrtf from generating these, but doesn't work:
        ## http://savannah.gnu.org/bugs/?48895
        cmt = grep("^###", rv, perl=TRUE)
        rv = rv[- cmt[cmt == seq(along=cmt)]]
    } else if (grepl("^(ASCII text|UTF-8 Unicode text)", type, perl=TRUE)) {
        rv = readLines(f)
    } else {
        stop("unknown text file format for ",f)
    }
    return(rv)
}
jbrzusto/motus-R-package documentation built on May 18, 2019, 7:03 p.m.