R/fwf2csv.R

Defines functions fwf2csv

Documented in fwf2csv

#' Creates a CSV Representation of Data Accoding to Stacks of Whitespace
#' 
#' Uses awk to convert a fixed-width file to a CSV based on stacks of whitespace.
#' 
#' @param infile The input file. Can also be `"clipboard"` to read directly
#' from the clipboard.
#' @param toDF Logical. Should the file be read in while we are at it? Defaults
#' to `FALSE`.
#' @param \dots Other arguments to be passed to `read.table`.
#' @return A vector or a `data.frame`, depending on the value in `toDF`.
#' @author Ananda Mahto and [Ed Morton](http://stackoverflow.com/users/1745001/ed-morton).
#' @references <http://stackoverflow.com/q/30868600/1270695>
#' @note Only tested on Linux.
#' @examples
#' 
#' myfile <- tempfile(fileext = ".txt")
#' Lines <- c("aaa  b b ccc      345", "ddd  fgt f u      3456", "e r  der der      5 674")
#' cat(Lines, sep = "\n")
#' cat(Lines, sep = "\n", file = myfile)
#' fwf2csv(myfile)
#' fwf2csv(myfile, TRUE, header = FALSE)
#'  
#' @export fwf2csv
fwf2csv <- function(infile, toDF = FALSE, ...) {
  if (infile == "clipboard") {
    infile <- tempfile()
    writeLines(readClip(), infile)
  }
  
  a <- tempfile()
  text <- 'BEGIN{ FS=OFS=""; ARGV[ARGC]=ARGV[ARGC-1]; ARGC++ }
NR==FNR {
    for (i=1;i<=NF;i++) {
        if ($i == " ") {
            space[i]
        }
        else {
            nonSpace[i]
        }
    }
    next
}
FNR==1 {
    for (i in nonSpace) {
        delete space[i]
    }
}
{
    for (i in space) {
        $i = ","
    }
    gsub(/,+/,",")
    print
}'
  writeLines(text, a)
  command <- sprintf("awk -f %s %s", a, infile)
  if (isTRUE(toDF)) read.csv(text = system(command, intern = TRUE), ...)
  else system(command, intern = TRUE)
}
NULL
mrdwab/SOfun documentation built on June 20, 2020, 6:15 p.m.