R/airsis_createMetaDataframe.R

Defines functions airsis_createMetaDataframe

Documented in airsis_createMetaDataframe

#' @keywords AIRSIS
#' @export
#' @import MazamaCoreUtils
#' @title Create AIRSIS site location metadata dataframe
#' @param tbl single site AIRSIS tibble after metadata enhancement
#' @param provider identifier used to modify baseURL \code{['APCD'|'USFS']}
#' @param unitID character or numeric AIRSIS unit identifier
#' @param pwfslDataIngestSource identifier for the source of monitoring data, e.g. \code{'AIRSIS'}
#' @param existingMeta existing 'meta' dataframe from which to obtain metadata for known monitor deployments
#' @param addGoogleMeta logicial specifying wheter to use Google elevation and reverse geocoding services
#' @param addEsriMeta logicial specifying wheter to use ESRI elevation and reverse geocoding services
#' @return A \code{meta} dataframe for use in a \emph{ws_monitor} object.
#' @description After an AIRSIS tibble has been enhanced with
#' additional columns generated by \code{addClustering} we are ready to
#' pull out site information associated with unique deployments.
#'
#' These will be rearranged into a dataframe organized as deployment-by-property
#' with one row for each monitor deployment.
#'
#' This site information found in \code{tbl} is augmented so that we end up with a uniform
#' set of properties associated with each monitor deployment. The list of
#' columns in the returned \code{meta} dataframe is:
#'
#' \preformatted{
#' > names(p$meta)
#'  [1] "monitorID"             "longitude"             "latitude"
#'  [4] "elevation"             "timezone"              "countryCode"
#'  [7] "stateCode"             "siteName"              "agencyName"
#' [10] "countyName"            "msaName"               "monitorType"
#' [13] "monitorInstrument"     "aqsID"                 "pwfslID"
#' [16] "pwfslDataIngestSource" "telemetryAggregator"   "telemetryUnitID"
#' }
#'
#' @seealso \code{\link{addMazamaMetadata}}


airsis_createMetaDataframe <- function(
  tbl,
  provider = as.character(NA),
  unitID = as.character(NA),
  pwfslDataIngestSource = 'AIRSIS',
  existingMeta = NULL,
  addGoogleMeta = FALSE,
  addEsriMeta = FALSE
) {

  logger.debug(" ----- airsis_createMetaDataframe() ----- ")

  # Sanity check -- tbl must have a monitorType
  if ( !'monitorType' %in% names(tbl) ) {
    logger.error("No 'monitorType' column found in 'tbl' tibble with columns: %s", paste0(names(tbl), collapse=", "))
    stop(paste0("No 'monitorType' column found in 'tbl' tibble."))
  }

  monitorType <- unique(tbl$monitorType)

  # Sanity check -- tbl must have only one monitorType
  if ( length(monitorType) > 1 ) {
    logger.error("Multiple monitor types found in 'tbl' tibble: %s", paste0(monitorType, collapse=", "))
    stop(paste0("Multiple monitor types found in 'tbl' tibble."))
  }

  monitorType <- monitorType[1]

  # Sanity check -- deploymentID must exist
  if ( !'deploymentID' %in% names(tbl) ) {
    logger.error("No 'deploymentID' column found in 'tbl' tibble with columns %s", paste0(names(tbl), collapse=", "))
    stop(paste0("No 'deploymentID' column found in 'tbl' tibble.  Have you run addClustering()?"))
  }

  # Pull out unique deployments
  tbl <- tbl[!duplicated(tbl$deploymentID),]

  logger.trace("Dataframe contains %d unique deployment(s)", nrow(tbl))

  # Our tibble now contains the following columns:
  #
  # > names(tbl)
  #  [1] "MasterTable_ID"        "UnitID"                "Alias"                 "Latitude"
  #  [5] "Longitude"             "Date.Time.GMT"         "Start.Date.Time..GMT." "COncRT"
  #  [9] "ConcHr"                "Flow"                  "W.S"                   "W.D"
  # [13] "AT"                    "RHx"                   "RHi"                   "BV"
  # [17] "FT"                    "Alarm"                 "Type"                  "Serial.Number"
  # [21] "Version"               "Sys..Volts"            "TimeStamp"             "PDate"
  # [25] "monitorName"           "monitorType"           "datetime"              "medoidLon"
  # [29] "medoidLat"             "deploymentID"
  #
  # The PWFSLSmoke v1.0 data model contains the following parameters
  #
  # > names(meta)
  #  [1] "monitorID"             "longitude"             "latitude"              "elevation"
  #  [5] "timezone"              "countryCode"           "stateCode"             "siteName"
  #  [9] "agencyName"            "countyName"            "msaName"               "monitorType"
  # [13] "siteID"                "instrumentID"          "aqsID"                 "pwfslID"
  # [17] "pwfslDataIngestSource" "telemetryAggregator"   "telemetryUnitID"

  meta <- createEmptyMetaDataframe(nrow(tbl))

  # Assign data where we have it
  meta$longitude <- as.numeric(tbl$medoidLon)
  meta$latitude <- as.numeric(tbl$medoidLat)
  meta$elevation <- as.numeric(NA)
  meta$timezone <- as.character(NA)
  meta$countryCode <- as.character(NA)
  meta$stateCode <- as.character(NA)
  meta$siteName <- as.character(tbl$monitorName)
  meta$countyName <- as.character(NA)
  meta$msaName <- as.character(NA)
  meta$agencyName <- as.character(NA)
  meta$monitorType <- as.character(tbl$monitorType)
  meta$siteID <- as.character(tbl$deploymentID) # TODO:  This will be obtained from the "known_location" service
  meta$instrumentID <- paste0(tolower(provider),'.',unitID)
  meta$aqsID <- as.character(NA)
  meta$pwfslID <- as.character(tbl$deploymentID) # TODO:  This will be obtained from the "known_location" service
  meta$pwfslDataIngestSource <- as.character(pwfslDataIngestSource)
  meta$telemetryAggregator <- paste0(tolower(provider), '.airsis')
  meta$telemetryUnitID <- as.character(unitID)

  meta$monitorID <- paste(meta$siteID, meta$instrumentID, sep='_')

  # Add timezones, state and country codes
  meta <- addMazamaMetadata(meta, existingMeta=existingMeta)

  # TODO:  Could assign other spatial identifiers like EPARegion, etc.

  # agencyName
  if ( monitorType == "EBAM" ) {
    NPSMask <- stringr::str_detect(tbl$Alias,'^NPS ')
    USFSMask <- stringr::str_detect(tbl$Alias,'^USFS')
    meta$agencyName[NPSMask] <- 'National Park Service'
    meta$agencyName[USFSMask] <- 'United States Forest Service'
  }

  if ( addGoogleMeta ) {
    # Add elevation
    result <- try( meta <- addGoogleElevation(meta, existingMeta=existingMeta),
                   silent=TRUE )
    if ( "try-error" %in% class(result) ) {
      logger.warn("Unable to add Google elevations: %s", geterrmessage())
    }
    # # Add siteName and countyName
    # result <- try( meta <- addGoogleAddress(meta, existingMeta=existingMeta),
    #                silent=TRUE )
    # if ( "try-error" %in% class(result) ) {
    #   logger.warn("Unable to add Google addresses: %s", geterrmessage())
    # }
  }

  if ( addEsriMeta ) {
    # Add siteName and countyName
    result <- try( meta <- addEsriAddress(meta, existingMeta=existingMeta),
                   silent=TRUE )
    if ( "try-error" %in% class(result) ) {
      logger.warn("Unable to add ESRI addresses: %s", geterrmessage())
    }
  }

  # Assign rownames
  rownames(meta) <- meta$monitorID

  logger.trace("Created 'meta' dataframe with %d rows and %d columns", nrow(meta), ncol(meta))

  return(meta)

}

Try the PWFSLSmoke package in your browser

Any scripts or data that you put into this service are public.

PWFSLSmoke documentation built on July 8, 2020, 7:19 p.m.