R/wrcc_createMetaDataframe.R

Defines functions wrcc_createMetaDataframe

Documented in wrcc_createMetaDataframe

#' @keywords WRCC
#' @export
#' @import MazamaCoreUtils
#'
#' @title Create WRCC site location metadata dataframe
#'
#' @param tbl single site WRCC tibble after metadata enhancement
#' @param unitID character or numeric WRCC unit identifier
#' @param pwfslDataIngestSource identifier for the source of monitoring data, e.g. \code{'WRCC'}
#' @param existingMeta existing 'meta' dataframe from which to obtain metadata for known monitor deployments
#' @param addGoogleMeta logicial specifying wheter to use Google elevation and reverse geocoding services
#' @param addEsriMeta logicial specifying wheter to use ESRI elevation and reverse geocoding services
#' @return A \code{meta} dataframe for use in a \emph{ws_monitor} object.
#' @description After a WRCC tibble has been enhanced with
#' additional columns generated by \code{addClustering} we are ready to
#' pull out site information associated with unique deployments.
#'
#' These will be rearranged into a dataframe organized as deployment-by-property
#' with one row for each monitor deployment.
#'
#' This site information found in \code{tbl} is augmented so that we end up with a uniform
#' set of properties associated with each monitor deployment. The list of
#' columns in the returned \code{meta} dataframe is:
#'
#' \preformatted{
#' > names(p$meta)
#'  [1] "monitorID"             "longitude"             "latitude"
#'  [4] "elevation"             "timezone"              "countryCode"
#'  [7] "stateCode"             "siteName"              "agencyName"
#' [10] "countyName"            "msaName"               "monitorType"
#' [13] "monitorInstrument"     "aqsID"                 "pwfslID"
#' [16] "pwfslDataIngestSource" "telemetryAggregator"   "telemetryUnitID"
#' }
#'
#' @seealso \code{\link{addMazamaMetadata}}


wrcc_createMetaDataframe <- function(
  tbl,
  unitID = as.character(NA),
  pwfslDataIngestSource = 'WRCC',
  existingMeta = NULL,
  addGoogleMeta = FALSE,
  addEsriMeta = FALSE
) {

  logger.debug(" ----- wrcc_createMetaDataframe() ----- ")

  # Sanity check -- tbl must have a monitorType
  if ( !'monitorType' %in% names(tbl) ) {
    logger.error("No 'monitorType' column found in 'tbl' tibble with columns: %s", paste0(names(tbl), collapse=", "))
    stop(paste0("No 'monitorType' column found in 'tbl' tibble."))
  }

  monitorType <- unique(tbl$monitorType)

  # Sanity check -- tbl must have only one monitorType
  if ( length(monitorType) > 1 ) {
    logger.error("Multilpe monitor types found in 'tbl' tibble: %s", paste0(monitorType, collapse=", "))
    stop(paste0("Multiple monitor types found in 'tbl' tibble."))
  }

  monitorType <- monitorType[1]

  # Sanity check -- deploymentID must exist
  if ( !'deploymentID' %in% names(tbl) ) {
    logger.error("No 'deploymentID' column found in 'tbl' tibble with columns: %s", paste0(names(tbl), collapse=", "))
    stop(paste0("No 'deploymentID' column found in 'tbl' tibble.  Have you run addClustering()?"))
  }

  # Pull out unique deployments
  tbl <- tbl[!duplicated(tbl$deploymentID),]

  logger.trace("Tibble contains %d unique deployment(s)", nrow(tbl))

  # Our tibble now contains the following columns:
  #
  # > names(tbl)
  #  [1] "DateTime"       "GPSLat"         "GPSLon"         "Type"           "SerialNumber"
  #  [6] "ConcRT"         "Misc1"          "AvAirFlw"       "AvAirTemp"      "RelHumidity"
  # [11] "BaromPress"     "SensorIntAT"    "SensorIntRH"    "WindSpeed"      "WindDir"
  # [16] "BatteryVoltage" "Alarm"          "monitorName"    "monitorType"    "datetime"
  # [21] "medoidLon"      "medoidLat"      "deploymentID"
  #
  # The PWFSLSmoke v1.0 data model contains the following parameters
  #
  #  [1] "monitorID"             "longitude"             "latitude"
  #  [4] "elevation"             "timezone"              "countryCode"
  #  [7] "stateCode"             "siteName"              "agencyName"
  # [10] "countyName"            "msaName"               "monitorType"
  # [13] "monitorInstrument"     "aqsID"                 "pwfslID"
  # [16] "pwfslDataIngestSource" "telemetryAggregator"   "telemetryUnitID"

  # The PWFSLSmoke v1.0 data model contains the following parameters
  #
  # > names(meta)
  #  [1] "monitorID"             "longitude"             "latitude"              "elevation"
  #  [5] "timezone"              "countryCode"           "stateCode"             "siteName"
  #  [9] "agencyName"            "countyName"            "msaName"               "monitorType"
  # [13] "siteID"                "instrumentID"          "aqsID"                 "pwfslID"
  # [17] "pwfslDataIngestSource" "telemetryAggregator"   "telemetryUnitID"

  meta <- createEmptyMetaDataframe(nrow(tbl))

  # Assign data where we have it
  meta$longitude <- as.numeric(tbl$medoidLon)
  meta$latitude <- as.numeric(tbl$medoidLat)
  meta$elevation <- as.numeric(NA)
  meta$timezone <- as.character(NA)
  meta$countryCode <- as.character(NA)
  meta$stateCode <- as.character(NA)
  meta$siteName <- as.character(NA)
  meta$countyName <- as.character(NA)
  meta$msaName <- as.character(NA)
  meta$agencyName <- as.character(NA)
  meta$monitorType <- as.character(tbl$monitorType)
  meta$siteID <- as.character(tbl$deploymentID) # TODO:  This will be obtained from the "known_location" service
  meta$instrumentID <- paste0('wrcc.',unitID)
  meta$aqsID <- as.character(NA)
  meta$pwfslID <- as.character(tbl$deploymentID) # TODO:  This will be obtained from the "known_location" service
  meta$pwfslDataIngestSource <- as.character(pwfslDataIngestSource)
  meta$telemetryAggregator <- paste0('wrcc')
  meta$telemetryUnitID <- as.character(unitID)

  meta$monitorID <- paste(meta$siteID, meta$instrumentID, sep='_')

  # Add timezones, state and country codes
  meta <- addMazamaMetadata(meta, existingMeta=existingMeta)

  # TODO:  Could assign other spatial identifiers like EPARegion, etc.

  # agencyName
  NPSMask <- stringr::str_detect(tbl$monitorName,'^Smoke NPS')
  USFSMask <- stringr::str_detect(tbl$monitorName,'^Smoke USFS')
  meta$agencyName[NPSMask] <- 'National Park Servsice'
  meta$agencyName[USFSMask] <- 'United States Forest Service'

  if ( addGoogleMeta ) {
    # Add elevation
    result <- try( meta <- addGoogleElevation(meta, existingMeta=existingMeta),
                   silent=TRUE )
    if ( "try-error" %in% class(result) ) {
      logger.warn("Unable to add Google elevations: %s", geterrmessage())
    }
    # # Add siteName and countyName
    # result <- try( meta <- addGoogleAddress(meta, existingMeta=existingMeta),
    #                silent=TRUE )
    # if ( "try-error" %in% class(result) ) {
    #   logger.warn("Unable to add Google addresses: %s", geterrmessage())
    # }
  }

  if ( addEsriMeta ) {
    # Add siteName and countyName
    result <- try( meta <- addEsriAddress(meta, existingMeta=existingMeta),
                   silent=TRUE )
    if ( "try-error" %in% class(result) ) {
      logger.warn("Unable to add ESRI addresses: %s", geterrmessage())
    }
  }

  # Assign rownames
  rownames(meta) <- meta$monitorID

  logger.trace("Created 'meta' dataframe with %d rows and %d columns", nrow(meta), ncol(meta))

  return(meta)

}
MazamaScience/PWFSLSmoke documentation built on July 3, 2023, 11:03 a.m.