R/wrcc_parseData.R

Defines functions wrcc_parseData

Documented in wrcc_parseData

#' @keywords WRCC
#' @export
#' @importFrom MazamaCoreUtils logger.trace logger.debug logger.warn logger.error
#'
#' @title Parse WRCC data string
#'
#' @param fileString character string containing WRCC data
#' @description Raw character data from WRCC are parsed into a tibble.
#' The incoming \code{fileString}
#' can be read in directly from WRCC using \code{wrcc_downloadData()} or from a local
#' file using \code{readr::read_file()}.
#'
#' The type of monitor represented by this fileString is inferred from the column names
#' using \code{wrcc_identifyMonitorType()} and appropriate column types are assigned.
#' The character data are then processed, read into a tibble and augmented in the following ways:
#' \enumerate{
#' \item{Spaces at the beginning and end of each line are moved.}
#' \item{All header lines beginning with ':' are removed.}
#' }
#' @return Dataframe of WRCC raw monitor data.
#' @references \href{https://wrcc.dri.edu/cgi-bin/smoke.pl}{Fire Cache Smoke Monitoring Archive}
#' @examples
#' \dontrun{
#' fileString <- wrcc_downloadData(20150701, 20150930, unitID = 'SM16')
#' tbl <- wrcc_parseData(fileString)
#' }

wrcc_parseData <- function(fileString) {

  logger.debug(" ----- wrcc_parseData() ----- ")

  # Identify monitor type
  monitorTypeList <- wrcc_identifyMonitorType(fileString)

  monitorType <- monitorTypeList$monitorType
  rawNames <- monitorTypeList$rawNames
  columnNames <- monitorTypeList$columnNames
  columnTypes <- monitorTypeList$columnTypes

  # Convert the fileString into individual lines
  lines <- readr::read_lines(fileString)

  if ( length(lines) <= 4 ) {
    logger.warn("No valid PM2.5 data")
    stop(paste0("No valid PM2.5 data"))
  }

  # NOTE:  Here is an example header from WRCC ASCII output:
  # NOTE:
  # NOTE:  [1] " Smoke #11 "
  # NOTE:  [2] ":       GMT\t Deg \t Deg \t     \tser #\tug/m3\t Unk \t l/m \tDeg C\t  %  \t Unk \tdeg C\t  %  \t m/s \t Deg \tvolts\t     "
  # NOTE:  [3] ": Date/Time\t  GPS  \t  GPS  \tType   \tSerial \tConc   \t Misc  \t Ave.  \t Av Air\t  Rel  \t Misc  \tSensor \tSensor \t  Wind \t Wind  \tBattery\tAlarm  "
  # NOTE:  [4] ":YYMMDDhhmm\t  Lat. \t  Lon. \t       \tNumber \t RT    \t  #1   \tAir Flw\t  Temp \tHumidty\t  #2   \tInt AT \tInt RH \t  Speed\t Direc \tVoltage\t       "
  # NOTE:
  # NOTE:  It appears that, after 1024 lines, the 3 header lines are repeated.
  # NOTE:  Sometimes (always?) an empty string appears in the last line.

  # Strip spaces from the beginning and end but retain "\t" (This is why we can't use stringr::str_trim)
  lines <- stringr::str_replace(lines,'^ *','')
  lines <- stringr::str_replace(lines,' *$','')

  # Get monitorName from first line and then remove that line
  monitorName <- lines[1]
  lines <- lines[-1]

  # Remove header lines beginning with ":", leaving only data
  goodLines <- !is.na(lines) & !stringr::str_detect(lines,'^:')

  # Read the data into a tibble
  fakeFile <- paste0(lines[goodLines], collapse='\n')
  tbl <- readr::read_tsv(fakeFile, col_names=columnNames, col_types=columnTypes)

  # Add monitor name
  tbl$monitorName <- monitorName

  # Add monitor type (determined from the 'Type' column after reading in the data)
  monitorTypeCode <- unique(tbl$Type)
  # NOTE:  Drop all negative values to get rid of -9999 or other missing value flags.
  # NOTE:  Conversion of -9999 to NA happens in the ~QualityControl scripts so that
  # NOTE:  all raw data modifications can be found in one place.
  monitorTypeCode <- monitorTypeCode[monitorTypeCode >= 0]

  # Sanity check
  if ( length(monitorTypeCode) > 1 ) {
    logger.warn("More than one monitor type detected: %s", paste(monitorTypeCode,collapse=", "))
    # Pick the most common Type
    typeTable <- table(tbl$Type)
    monitorTypeCode <- names(typeTable)[which(typeTable == max(typeTable))]
  }

  # 0=E-BAM PM2.5, 1=E-BAM PM10, 9=E-Sampler. We only want PM2.5 measurements
  if ( monitorTypeCode == 0 ) {
    tbl$monitorType <- 'EBAM'
  } else if ( monitorTypeCode == 9 ) {
    tbl$monitorType <- 'ESAM'
  } else if ( monitorTypeCode == 1 ) {
    logger.error("EBAM PM10 data parsing is not supported")
    stop(paste0("EBAM PM10 data parsing is not supported"))
  } else {
    logger.error("Unsupported monitor type code: %d",monitorTypeCode)
    stop(paste0("Unsupported monitor type code: %d",monitorTypeCode))
  }

  return(tbl)

}

Try the PWFSLSmoke package in your browser

Any scripts or data that you put into this service are public.

PWFSLSmoke documentation built on July 8, 2020, 7:19 p.m.