R/das_read.R

Defines functions .das_read das_read

Documented in das_read

#' Read DAS file(s)
#'
#' Read one or more fixed-width DAS text file(s) generated by WinCruz into a data frame,
#'   where each line is data for a specific event
#' @param file filename(s) of one or more DAS files
#' @param skip integer; see \code{\link[readr]{read_fwf}}. Default is 0
#' @param ... ignored
#'
#' @details Reads/parses DAS data into columns of a data frame.
#'   If \code{file} contains multiple filenames, then the individual
#'   data frames will be concatenated.
#'
#'   The provided DAS file must adhere to the following column number and format specifications:
#'   \tabular{lrr}{
#'     \emph{Item}  \tab \emph{Columns} \tab \emph{Format}\cr
#'     Event number \tab 1-3   \tab \cr
#'     Event        \tab 4     \tab \cr
#'     Effort dot   \tab 5     \tab \cr
#'     Time         \tab 6-11  \tab HHMMSS or HHMM\cr
#'     Date         \tab 13-18 \tab MMDDYY\cr
#'     Latitude     \tab 20-28 \tab NDD:MM.MM\cr
#'     Longitude    \tab 30-39 \tab WDDD:MM.MM\cr
#'     Data1        \tab 40-44 \tab \cr
#'     Data2        \tab 45-49 \tab \cr
#'     Data3        \tab 50-54 \tab \cr
#'     Data4        \tab 55-59 \tab \cr
#'     Data5        \tab 60-64 \tab \cr
#'     Data6        \tab 65-69 \tab \cr
#'     Data7        \tab 70-74 \tab \cr
#'     Data8        \tab 75-79 \tab \cr
#'     Data9        \tab 80-84 \tab \cr
#'     Data10       \tab 85-89 \tab \cr
#'     Data11       \tab 90-94 \tab \cr
#'     Data12       \tab 95+   \tab \cr
#'   }
#'
#'   See \code{\link{das_format_pdf}} for more information about DAS format requirements, and
#'   note that 'Data#' columns may be referred to as 'Field#' columns in other documentation.
#'
#' @return A \code{das_dfr} object, which is also a data frame, with DAS data read into columns.
#'   The data are read into the data frame as characters as described in 'Details',
#'   with the following exceptions:
#'   \tabular{lll}{
#'     \emph{Name} \tab \emph{Class} \tab \emph{Details}\cr
#'     EffortDot \tab logical   \tab \code{TRUE} if "." was present, and \code{FALSE} otherwise\cr
#'     DateTime  \tab POSIXct   \tab combination of 'Date' and 'Time' columns\cr
#'     Lat       \tab numeric   \tab 'Latitude' column converted to decimal degrees in range [-90, 90]\cr
#'     Lon       \tab numeric   \tab 'Longitude' column converted to decimal degrees in range [-180, 180]\cr
#'     Data#     \tab character \tab leading/trailing whitespace trimmed for non-comment events (i.e. where 'Event' is not "C")\cr
#'     EventNum  \tab character \tab leading/trailing whitespace trimmed; left as character for some project-specific codes\cr
#'     file_das  \tab character \tab base filename, extracted from the \code{file} argument\cr
#'     line_num  \tab integer   \tab line number of each data row\cr
#'   }
#'
#'   DateTime values have a (meaningless) time zone value of "UTC".
#'   See the OffsetGMT column from \code{\link{das_process}}
#'   for relevant time zone information
#'
#'   Warnings are printed if any unexpected events have \code{NA} DateTime/Lat/Lon values,
#'   or if any Lat/Lon values cannot be converted to numeric values.
#'   Events that are 'expected' to have \code{NA} DateTime/Lat/Lon values are:
#'   C, ?, 1, 2, 3, 4, 5, 6, 7, 8
#'
#' @examples
#' y <- system.file("das_sample.das", package = "swfscDAS")
#' das_read(y)
#'
#' @export
das_read <- function(file, skip = 0, ...) {
  stopifnot(inherits(file, "character"))

  if (length(file) < 1)
    stop("file must be a vector of one or more filename(s)")

  if (!all(file.exists(file)))
    stop("The supplied character string(s) does (do) not (all) name ",
         "an existing file(s), ",
         "aka file.exists(file) is FALSE")

  do.call(rbind, lapply(file, .das_read, skip = skip))
}


.das_read <- function(file, skip) {
  #--------------------------------------------------------
  # Input checks
  stopifnot(inherits(file, "character"))

  # Start and end (inclusive) column indices
  fwf.start <- c(1,4,5, 06,13, 20,21,24, 30,31,35, 40,45,50,55,60,65,70,75,80,85,90,95)
  fwf.end   <- c(3,4,5, 11,18, 20,22,28, 30,33,39, 44,49,54,59,64,69,74,79,94,89,94,NA)

  # suppressWarnings() is for lines that do not have data in all columns
  x <- suppressWarnings(read_fwf(
    file, col_positions = fwf_positions(start = fwf.start, end = fwf.end),
    na = c("", " ", "  ", "   ", "    ", "     ", "      "),
    col_types = cols(.default = col_character()),
    trim_ws = FALSE, skip = skip, skip_empty_rows = FALSE
  ))

  names(x) <- c(
    "EventNum", "Event", "EffortDot", "Time", "Date",
    "Lat1", "Lat2", "Lat3", "Lon1", "Lon2", "Lon3",
    "Data1", "Data2", "Data3", "Data4", "Data5", "Data6", "Data7",
    "Data8", "Data9", "Data10", "Data11", "Data12"
  )

  # Check for if lines should be skipped
  if (sum(is.na(c(x$Event[1], x$Time[1], x$Date[1],
                  x$Lon1[1], x$Lon2[1], x$Lon3[1],
                  x$Lat1[1], x$Lat2[1], x$Lat3[1]))) > 7)
    warning("There are a lot of blank columns in row 1; should you use ",
            "the skip argument? See `?das_read`",
            immediate. = TRUE)

  # Process some data, and add file and line number columns
  x$EffortDot <- ifelse(is.na(x$EffortDot), FALSE, TRUE)
  EventNum <- trimws(x$EventNum)
  file_das  <- basename(file)
  line_num  <- as.integer(seq_len(nrow(x)) + skip)

  #--------------------------------------------------------
  # Convert lat and lon values to decimal degrees
  Lat <- ifelse(x$Lat1 == "N", 1, -1) * (as.numeric(x$Lat2) + as.numeric(x$Lat3)/60)
  Lon <- ifelse(x$Lon1 == "E", 1, -1) * (as.numeric(x$Lon2) + as.numeric(x$Lon3)/60)

  # Print warning if unable to coerce lat/lon to numeric
  ll.num.na <- unique(
    c(.numeric_na(x$Lat2), .numeric_na(x$Lat3),
      .numeric_na(x$Lon2), .numeric_na(x$Lon3))
  )
  if (length(ll.num.na) > 0)
    warning("The following line number(s) have values in the ",
            "Latitude and/or Longitude ",
            "columns that could not be converted to a numeric value:\n",
            .print_file_line(file_das, line_num, ll.num.na))
  rm(ll.num.na)


  # Lat/lon NA check
  ll.na <- is.na(Lat) | is.na(Lon)
  ll.na.event <- c("C", "?", 1:8)
  ll.na.which <- which((!(x$Event %in% ll.na.event)) & ll.na)
  if (length(ll.na.which) > 0) {
    warning("There are unexpected Lat and/or Lon NAs (i.e. for events other than ",
            paste(ll.na.event, collapse = ", "),
            ") in the following:\n",
            .print_file_line(file_das, line_num, ll.na.which))
  }
  rm(ll.na, ll.na.event, ll.na.which)

  #--------------------------------------------------------
  # Extract date/time
  DateTime <- strptime(paste(x$Date, x$Time), "%m%d%y %H%M%S", "UTC")
  dt.na <- is.na(DateTime)
  DateTime[dt.na] <- strptime(paste(x$Date, x$Time), "%m%d%y %H%M", "UTC")[dt.na]
  dt.na <- is.na(DateTime)

  # Datetime NA check
  dt.na.event <- c("*", "#", "?", "C", 1:8)
  dt.na.which <- which((!(x$Event %in% dt.na.event) & dt.na))
  if (length(dt.na.which) > 0) {
    warning("There are unexpected DateTime NAs (i.e. for events other than ",
            paste(dt.na.event, collapse = ", "),
            ") in the following:\n",
            .print_file_line(file_das, line_num, dt.na.which))
  }
  rm(dt.na, dt.na.event, dt.na.which)

  #--------------------------------------------------------
  # Extract Data# values, and trim whitespace as necessary
  data.df <- data.frame(
    Data1 = ifelse(x$Event == "C", x$Data1, trimws(x$Data1)),
    Data2 = ifelse(x$Event == "C", x$Data2, trimws(x$Data2)),
    Data3 = ifelse(x$Event == "C", x$Data3, trimws(x$Data3)),
    Data4 = ifelse(x$Event == "C", x$Data4, trimws(x$Data4)),
    Data5 = ifelse(x$Event == "C", x$Data5, trimws(x$Data5)),
    Data6 = ifelse(x$Event == "C", x$Data6, trimws(x$Data6)),
    Data7 = ifelse(x$Event == "C", x$Data7, trimws(x$Data7)),
    Data8 = ifelse(x$Event == "C", x$Data8, trimws(x$Data8)),
    Data9 = ifelse(x$Event == "C", x$Data9, trimws(x$Data9)),
    Data10 = ifelse(x$Event == "C", x$Data10, trimws(x$Data10)),
    Data11 = ifelse(x$Event == "C", x$Data11, trimws(x$Data11)),
    Data12 = ifelse(x$Event == "C",
                    ifelse(trimws(x$Data12) == "", NA, x$Data12),
                    trimws(x$Data12)),
    stringsAsFactors = FALSE
  )
  # Data12 extra ^ is for entries with >6 spaces (eg "       ")
  data.df[data.df == ""] <- NA

  # Coerce data frame to das_dfr object and return
  as_das_dfr(data.frame(
    Event = x$Event, EffortDot = x$EffortDot, DateTime, Lat, Lon, data.df,
    EventNum, file_das, line_num,
    stringsAsFactors = FALSE
  ))
}

Try the swfscDAS package in your browser

Any scripts or data that you put into this service are public.

swfscDAS documentation built on Aug. 10, 2023, 9:06 a.m.