R/airdas_read.R

Defines functions .airdas_read_general .airdas_read_turtle .airdas_read_survey .airdas_read_phocoena airdas_read

Documented in airdas_read

#' Read AirDAS file(s)
#'
#' Read one or more fixed-width aerial survey DAS text file(s) 
#'   generated by TURTLEP, or another AirDAS program, into a data frame, 
#'   where each line is data for a specific event
#'
#' @param file filename(s) of one or more AirDAS files
#' @param file.type character; indicates the program used to create \code{file}.
#'   Must be one of: "turtle", "caretta", "survey", or "phocoena" (case sensitive). 
#'   Default is "turtle"
#' @param skip integer: see \code{\link[readr]{read_fwf}}. Default is 0
#' @param tz character; see \code{\link[base]{strptime}}. Default is UTC
#' @param ... ignored
#'
#' @details Reads/parses aerial survey DAS data into columns of a data frame.
#'   If \code{file} contains multiple filenames, then the individual 
#'   data frames will be combined using \code{\link[base:cbind]{rbind}}
#'   
#'   See \code{\link{airdas_format_pdf}} for information about 
#'   AirDAS format requirements for the specific file types (programs)
#'   
#' @return An \code{airdas_dfr} object, which is also a data frame, 
#'   with AirDAS data read into columns.
#'   The data are read into the data frame as characters,
#'   with the following exceptions:
#'   \tabular{lll}{
#'     \emph{Name} \tab \emph{Class} \tab \emph{Details}\cr
#'     EffortDot \tab logical   \tab \code{TRUE} if "." was present, and \code{FALSE} otherwise\cr
#'     DateTime  \tab POSIXct   \tab combination of 'Date' and 'Time' columns, with time zone \code{tz}\cr
#'     Lat       \tab numeric   \tab 'Latitude' columns converted to decimal degrees in range [-90, 90]\cr
#'     Lon       \tab numeric   \tab 'Longitude' columns converted to decimal degrees in range [-180, 180]\cr
#'     Data#     \tab character \tab leading/trailing whitespace trimmed for non-comment events (i.e. where 'Event' is not "C" )\cr
#'     file_das  \tab character \tab base filename, extracted from the \code{file} argument\cr
#'     line_num  \tab integer   \tab line number of each data row\cr
#'     file_type \tab character \tab \code{file.type} argument
#'   }
#'
#' @examples
#' y <- system.file("airdas_sample.das", package = "swfscAirDAS")
#' airdas_read(y, file.type = "turtle")
#'
#' @export
airdas_read <- function(file, file.type = c("turtle", "caretta", "survey", "phocoena"), 
                        skip = 0, tz = "UTC", ...) {
  # Input checks
  stopifnot(
    inherits(file, "character"),
    inherits(file.type, "character")
  )
  
  file.type <- match.arg(file.type)
  
  
  if (length(file) < 1)
    stop("file must be a vector of one or more filename(s)")
  
  if (!all(file.exists(file)))
    stop("The supplied character string does not all name an existing file(s), ",
         "meaning file.exists(file) is FALSE")
  
  
  # Call appropriate read function
  call.read <- switch(file.type, 
                      phocoena = .airdas_read_phocoena, 
                      survey = .airdas_read_survey,
                      caretta = .airdas_read_turtle, 
                      turtle = .airdas_read_turtle)
  
  do.call(
    rbind, 
    lapply(file, function(i) {
      x <- call.read(file = i, skip = skip, tz = tz)
      .airdas_read_general(file = i, x = x, file.type = file.type, skip = skip)
    })
  )
}


#------------------------------------------------------------------------------
#------------------------------------------------------------------------------
# Read data from the PHOCOENA program
.airdas_read_phocoena <- function(file, skip, tz, ...) {
  # Start and end (inclusive) column indices
  fwf.start <- c(1,3, 08,17, 26,27,30, 36,37,41, 47, 51,56,61,66,71,76,81)
  fwf.end   <- c(1,5, 15,24, 26,28,34, 36,39,45, 50, 55,60,65,70,75,80,NA)
  
  # suppressWarnings() is for lines that do not have data in all columns
  x <- suppressWarnings(read_fwf(
    file, col_positions = fwf_positions(start = fwf.start, end = fwf.end),
    na = c("", " ", "  ", "   ", "    ", "     ", "      "),
    col_types = cols(.default = col_character()),
    trim_ws = FALSE, skip = skip, skip_empty_rows = FALSE
  )) 
  
  names(x) <- c(
    "Event", "EventNum", "Date", "Time",
    "Lat1", "Lat2", "Lat3", "Lon1", "Lon2", "Lon3", "Alt", 
    "Data1", "Data2", "Data3", "Data4", "Data5", "Data6", "Data7"
  )
  
  # Process specifics
  x$DateTime <- strptime(paste(x$Date, x$Time), "%m/%d/%y %H:%M:%S", tz = tz)
  x$EffortDot <- NA
  
  x
}


#------------------------------------------------------------------------------
# Read data from the SURVEY program
.airdas_read_survey <- function(file, skip, tz, ...) {
  stop("This package does not yet support the SURVEY method")
}


#------------------------------------------------------------------------------
# Read data from the CARETTA or TURTLE* program
.airdas_read_turtle <- function(file, skip, tz, ...) {
  # Start and end (inclusive) column indices
  fwf.start <- c(1,4,5, 06,13, 20,21,24, 30,31,35, 40,45,50,55,60,65,70)
  fwf.end   <- c(3,4,5, 11,18, 20,22,28, 30,33,39, 44,49,54,59,64,69,NA)
  
  # suppressWarnings() is for lines that do not have data in all columns
  x <- suppressWarnings(read_fwf(
    file, col_positions = fwf_positions(start = fwf.start, end = fwf.end),
    na = c("", " ", "  ", "   ", "    ", "     ", "      "),
    col_types = cols(.default = col_character()),
    trim_ws = FALSE, skip = skip, skip_empty_rows = FALSE
  )) 
  
  names(x) <- c(
    "EventNum", "Event", "EffortDot", "Time", "Date",
    "Lat1", "Lat2", "Lat3", "Lon1", "Lon2", "Lon3", 
    "Data1", "Data2", "Data3", "Data4", "Data5", "Data6", "Data7"
  )
  
  # Process specifics
  x$DateTime <- strptime(paste(x$Date, x$Time), "%m%d%y %H%M%S", tz = tz)
  x$EffortDot <- ifelse(is.na(x$EffortDot), FALSE, TRUE)
  
  x
}


#------------------------------------------------------------------------------
# Portion of internal read function consistent across read methods
.airdas_read_general <- function(file, x, file.type, skip) {
  # x: data frame from method-specific airdas_read function
  # Note: do not need to do NA checks for Lat/Lon/DateTime because NA values
  #   are not allowed in these columns for an airdas_df object
  
  # Process 1
  x$Lat <- ifelse(x$Lat1 == "N", 1, -1) * (as.numeric(x$Lat2) + as.numeric(x$Lat3)/60)
  x$Lon <- ifelse(x$Lon1 == "E", 1, -1) * (as.numeric(x$Lon2) + as.numeric(x$Lon3)/60)
  
  # Check for if lines should be skipped
  if (sum(c((is.na(x$Event[1]) | x$Event[1] == "C"), 
            is.na(x$DateTime[1]), 
            is.na(c(x$Lon[1], x$Lat[1])))) > 3)
    warning("The data in row 1 appears to be improperly formatted; ", 
            "should you use the skip argument? See `?airdas_read`",
            immediate. = TRUE)
  
  # Process 2
  x$file_type <- file.type
  file_das  <- basename(file)
  line_num  <- as.integer(seq_along(x$Event) + skip)
  
  data.df <- data.frame(
    Data1 = ifelse(toupper(x$Event) == "C", x$Data1, trimws(x$Data1)), 
    Data2 = ifelse(toupper(x$Event) == "C", x$Data2, trimws(x$Data2)), 
    Data3 = ifelse(toupper(x$Event) == "C", x$Data3, trimws(x$Data3)), 
    Data4 = ifelse(toupper(x$Event) == "C", x$Data4, trimws(x$Data4)), 
    Data5 = ifelse(toupper(x$Event) == "C", x$Data5, trimws(x$Data5)), 
    Data6 = ifelse(toupper(x$Event) == "C", x$Data6, trimws(x$Data6)), 
    Data7 = ifelse(toupper(x$Event) == "C", 
                   ifelse(trimws(x$Data7) == "", NA, x$Data7), 
                   trimws(x$Data7)), 
    stringsAsFactors = FALSE
  )
  # Data7 extra ^ is for entries with >6 spaces (eg "       ")
  data.df[data.df == ""] <- NA
  
  # Data frame to return
  as_airdas_dfr(data.frame(
    Event = x$Event, EffortDot = x$EffortDot, DateTime = x$DateTime, 
    Lat = x$Lat, Lon = x$Lon, data.df, EventNum = x$EventNum, 
    file_das, line_num, file_type = x$file_type, 
    stringsAsFactors = FALSE
  ))
}

Try the swfscAirDAS package in your browser

Any scripts or data that you put into this service are public.

swfscAirDAS documentation built on Aug. 9, 2023, 1:06 a.m.