R/echo.R

## Retrieves WWTP discharge data, hopefully

## Helpful: https://echo.epa.gov/help/reports/effluent-charts-help
##          https://echo.epa.gov/tools/web-services/rest-documentation#/
## Some Parameter codes: 00300 = DO; 50050 = Flow


# functions ---------------------------------------------------------------

# makeDates ===============================================================

#' makeDates
#'
#' Internal functions used by \code{echoGetEffluent()} to determine if
#' "0" should be placed at the front of a character vector
#'
#' @param x character representing month
#' @param n integer typically 2
makeDates <- function(x, n) {
  if(nchar(as.character(x)) <n) {
    paste0(c("0"),as.character(x))
  } else {
    paste0(as.character(x))
  }
}



# echoGetEffluent =========================================================

#' Downloads EPA ECHO data
#'
#' \code{echoGetEffluent()} downloads specified wastewater discharge montitoring
#' report data using ECHO API, \code{\link[httr]{GET}}, and \code{jsonlite}.
#'
#' @param permit character. The NPDES permit number of the wastewater treatment
#'   plant. Typically in format \code{"st0123456"} where `st` is the state
#'   abreviation, followed by the permit number
#' @param parameter character. The reporting parameter number, see
#'   https://echo.epa.gov/help/reports/effluent-charts-help
#' @param start character. The start date in format \code{"yyyy-mm-dd"}
#' @param end character the end date in format \code{"yyyy-mm-dd"}
#' @return The output will be a tibble of reported parameter statistics
#' @import httr
#' @import jsonlite
#' @import tibble
#' @export
#' @examples \dontrun{echoGetEffluent("tx0119407", "50050", "2010-01-01",
#'   "2011-01-01")}

echoGetEffluent <- function(permit, parameter, start, end) {

  ## format the dates to paste as characters into the GET string
  start <- lubridate::ymd(start)
  end <- lubridate::ymd(end)

  ## use the make date function to format dates into appropriate strings
  startMonth <- makeDates(lubridate::month(start),2)

  startDay <- makeDates(lubridate::day(start), 2)

  startYear <- as.character(lubridate::year(start))

  endMonth <- makeDates(lubridate::month(end), 2)

  endDay <- makeDates(lubridate::day(end), 2)

  endYear <- as.character(lubridate::year(end))

  ## request URL statement
  request <- GET(paste0('https://ofmpub.epa.gov/echo/eff_rest_services.get_effluent_chart?p_id=',
                        permit,'&parameter_code=', parameter,'&start_date=',startMonth,
                        '%2F',startDay,'%2F',startYear,'&end_date=',
                        endMonth,'%2F',endDay,'%2F',endYear,
                        '&output=json'), accept_json())
  print(paste("# Status message:", http_status(request)))

  contentJSON <- content(request, as = "text") #Download JSON as text

  info <- fromJSON(contentJSON,simplifyDataFrame = FALSE) #read as a JSON

  ## Obtain permit information used to make the dataframe
  CWPName <- info[["Results"]][["CWPName"]] #Grabs the permitted name
  SourceID <- info[["Results"]][["SourceId"]] #Grabs the permitted id
  RegistryID <- info[["Results"]][["RegistryId"]] #Grabs the registry id
  Location <- info[["Results"]][["CWPStreet"]] #Location Descriptor
  City <- info[["Results"]][["CWPCity"]] #Grabs the city on the permit
  State <- info[["Results"]][["CWPState"]] #Grabs the state on the permit
  Zip <- info[["Results"]][["CWPZip"]] #Grab the zipcode on the permit
  Status <- info[["Results"]][["CWPZip"]] #Grab the current permit status
  nOutfalls <- seq_along(info[["Results"]][["PermFeatures"]]) #grab number of outfall features

  output <- data_frame()

  ## can I do this with purr::map? ##
  for (i in nOutfalls){

    DMR <- info[["Results"]][["PermFeatures"]][[i]][["Parameters"]][[1]][["DischargeMonitoringReports"]] #Specify the DMRs for the intended outfall
    outfallNumber <- info[["Results"]][["PermFeatures"]][[i]][["PermFeatureNmbr"]] #Grab the outfall if number

    buildOutput <- tibble(
      Name = CWPName,
      Outfall = outfallNumber,
      ID = SourceID,
      RegistryID = RegistryID,
      Location = Location,
      City = City,
      State = State,
      Zip = Zip,
      Status = Status,
      LimitBeginDate = lubridate::dmy(purrr::map_chr(DMR, "LimitBeginDate", .default = NA)),
      LimitEndDate = lubridate::dmy(purrr::map_chr(DMR, "LimitEndDate", .default = NA)),
      LimitValueNmbr = as.numeric(purrr::map_chr(DMR, "LimitValueNmbr", .default = NA)),
      LimitUnitCode = purrr::map_chr(DMR, "LimitUnitCode", .default = NA),
      LimitUnitDesc = purrr::map_chr(DMR, "LimitUnitDesc", .default = NA),
      StdUnitCode = purrr::map_chr(DMR, "StdUnitDesc", .default = NA),
      StdUnitDesc = purrr::map_chr(DMR, "StdUnitDesc", .default = NA),
      LimitValueStdUnit = purrr::map_chr(DMR, "LimitValueStdUnit", .default = NA),
      StatisticalBaseCode = purrr::map_chr(DMR, "StatisticalBaseCode", .default = NA),
      StatisticalBaseDesc = purrr::map_chr(DMR, "StatisticalBaseDesc", .default = NA),
      StatisticalBaseTypeCode = purrr::map(DMR, "StatisticalBaseTypeCode", .default = NA),
      StatisticalBaseTypeDesc = purrr::map(DMR, "StatisticalBaseTypeDesc", .default = NA),
      DMREventId = purrr::map_chr(DMR, "DMREventId", .default = NA),
      MonitoringPeriodEndDate = lubridate::dmy(purrr::map_chr(DMR, "MonitoringPeriodEndDate", .default = NA)),
      DMRFormValueId = purrr::map_chr(DMR, "DMRFormValueId", .default = NA),
      ValueTypeCode = purrr::map(DMR, "ValueTypeCode", .default = NA),
      ValueTypeDesc = purrr::map(DMR, "ValueTypeDesc", .default = NA),
      DMRValueId = purrr::map_chr(DMR, "DMRValueId", .default = NA),
      DMRValueNmbr = as.numeric(purrr::map(DMR, "DMRValueNmbr", .default = NA)),
      DMRUnitCode = purrr::map(DMR, "DMRUnitCode", .default = NA),
      DMRUnitDesc = purrr::map(DMR, "DMRUnitDesc",.default = NA),
      DMRValueStdUnits = as.numeric(purrr::map(DMR, "DMRValueStdUnits", .default = NA)),
      DMRQualifierCode = purrr::map(DMR, "DMRQualifierCode", .default = NA),
      ValueReceivedDate = lubridate::dmy(purrr::map_chr(DMR, "ValueReceivedDate", .default = NA)),
      DaysLate = as.integer(purrr::map(DMR, "DaysLate", .default = NA)),
      NODICode = purrr::map(DMR, "NODICode", .default = NA),
      NODEDesc = purrr::map(DMR, "NODEDesc", .default = NA),
      ExceedancePct = purrr::map(DMR, "ExceedancePct", .default = NA),
      NPDESViolations = purrr::map(DMR, "NPDESViolations", .default = NA)
    )

    output <- rbind(output, buildOutput)
  }

  output
}



# echoGetFacilities =========================================================

#' Downloads EPA ECHO facility data
#'
#' \code{echoGetEffluent()} downloads specified wastewater discharge montitoring
#' report data using ECHO API, \code{\link[httr]{GET}}, and \code{jsonlite}.
#'
#' @param \dots see \url{https://echo.epa.gov/tools/web-services/facility-search-water#!/Facility_Information/get_cwa_rest_services_get_facility_info} for a complete list of parameter options. Examples provided below.
#'
#' @return The output will be a tibble of facility details
#' @import httr
#' @import jsonlite
#' @import tibble
#' @return A data frame, the number of variables will depend on the reporting requirements of the retrived plants
#'
#' @export
#' @examples \dontrun{
#' ## Not run:
#' ## Retrieve facilities by bounding box
#' echoGetFacilities(xmin = "-96.407563", ymin = "30.554395", xmax = "-96.25947", ymax = "30.751984")
#' }
#'
echoGetFacilities <- function(...) {
  #dots <- list(...)
  #print(dots)
  valuesList <- readEchoGetFacilitiesDots(...)

  ## request URL statement
  baseURL <- "https://ofmpub.epa.gov/echo/cwa_rest_services.get_facility_info?"
  appendURL <- paste(paste(names(valuesList),valuesList,sep="="),collapse="&")
  getURL <- paste0(baseURL,appendURL)

  request <- GET(getURL, accept_json())
  print(paste("# Status message:", http_status(request)))

  contentJSON <- content(request, as = "text") #Download JSON as text

  info <- fromJSON(contentJSON,simplifyDataFrame = FALSE) #read as a JSON

  ### build the output.
  ### Output will depend on what the state and individual permit report requires.
  ### A better method would be to map the variable names from retrieved plant
  ### and build output from that with map

  len <- purrr::map(info[["Results"]][["Facilities"]], length) # return a list of lengths
  maxIndex <- which.max(len) # if a different number of columns is returned per plant, we want to map values to the longest
                             # this might fail if a entirely different columns are returned. Need to find out if there is some
                             # consisteny in the returned columns
  cNames <- names(info[["Results"]][["Facilities"]][[maxIndex]])

  buildOutput <- purrr::map_df(info[["Results"]][["Facilities"]], safe_extract, cNames)

  ## Commented out, I'm not certain if I should be explicit about what columns are returned
  ## or generalize it. Below is explicit, above is generalized
  # buildOutput <- tibble(
  #   SourceID = purrr::map(info[["Results"]][["Facilities"]], "SourceID"),
  #   EPASystem = purrr::map(info[["Results"]][["Facilities"]], "EPASystem"),
  #   RegistryID = purrr::map(info[["Results"]][["Facilities"]], "RegistryID"),
  #   Statute = purrr::map(info[["Results"]][["Facilities"]], "Statute"),
  #   CWPName = purrr::map(info[["Results"]][["Facilities"]], "CWPName"),
  #   CWPStreet = purrr::map(info[["Results"]][["Facilities"]], "CWPStreet"),
  #   CWPState = purrr::map(info[["Results"]][["Facilities"]], "CWPState"),
  #   CWPStateDistrict = purrr::map(info[["Results"]][["Facilities"]], "CWPStateDistrict"),
  #   CWPZip = purrr::map(info[["Results"]][["Facilities"]], "CWPZip"),
  #   CWPCounty = purrr::map(info[["Results"]][["Facilities"]], "CWPCounty"),
  #   CWPEPARegion = purrr::map(info[["Results"]][["Facilities"]], "CWPEPARegion"),
  #   CWStatus = purrr::map(info[["Results"]][["Facilities"]], "CWStatus"),
  #   CWPIndianCntryFlg = purrr::map(info[["Results"]][["Facilities"]], "CWPIndianCntryFlg"),
  #   CWPTRIbalLandCode = purrr::map(info[["Results"]][["Facilities"]], "CWPTRIbalLandCode"),
  #   CWPVersionNmbr = purrr::map(info[["Results"]][["Facilities"]], "CWPVersionNmbr"),
  #   CWPPermitStatusCode = purrr::map(info[["Results"]][["Facilities"]], "CWPPermitStatusCode"),
  #   CWPPermitStatusDesc = purrr::map(info[["Results"]][["Facilities"]], "CWPPermitStatusDesc"),
  #   CWPPermitTypeCode = purrr::map(info[["Results"]][["Facilities"]], "CWPPermitTypeCode"),
  #   CWPPermitTypeDesc = purrr::map(info[["Results"]][["Facilities"]], "CWPPermitTypeDesc"),
  #   CWPExpirationDate = purrr::map(info[["Results"]][["Facilities"]], "CWPExpirationDate"),
  #   CWPFacilityTypeIndicator = purrr::map(info[["Results"]][["Facilities"]], "CWPFacilityTypeIndicator"),
  #   CWPMajorMinorStatusFlag = purrr::map(info[["Results"]][["Facilities"]], "CWPMajorMinorStatusFlag"),
  #   FacPercentMinority = purrr::map(info[["Results"]][["Facilities"]], "FacPercentMinority"),
  #   FacPopDen = purrr::map(info[["Results"]][["Facilities"]], "FacPopDen"),
  #   FacFederalAgencyCode = purrr::map(info[["Results"]][["Facilities"]], "FacFederalAgencyCode"),
  #   CWPCurrentSNCStatus = purrr::map(info[["Results"]][["Facilities"]], "CWPCurrentSNCStatus"),
  #   CWPQtrsInNC = purrr::map(info[["Results"]][["Facilities"]], "CWPQtrsInNC"),
  #   CWPCurrentViol = purrr::map(info[["Results"]][["Facilities"]], "CWPCurrentViol"),
  #   CWPQtrsInSNC = purrr::map(info[["Results"]][["Facilities"]], "CWPQtrsInSNC"),
  #   CWPInspectionCount = purrr::map(info[["Results"]][["Facilities"]], "CWPInspectionCount"),
  #   CWPDateLastInspection = purrr::map(info[["Results"]][["Facilities"]], "CWPDateLastInspection"),
  #   CWPDateLastInformalEa = purrr::map(info[["Results"]][["Facilities"]], "CWPDateLastInformalEa"),
  #   CWPInformalActCount = purrr::map(info[["Results"]][["Facilities"]], "CWPInformalActCount"),
  #   FacLat = purrr::map(info[["Results"]][["Facilities"]], "FacLat"),
  #   FacLong = purrr::map(info[["Results"]][["Facilities"]], "FacLong"),
  #   FacUsMexBorderFlg = purrr::map(info[["Results"]][["Facilities"]], "FacUsMexBorderFlg"),
  #   CWPImpWaterFlg = purrr::map(info[["Results"]][["Facilities"]], "CWPImpWaterFlg"),
  #   FacDerivedHuc = purrr::map(info[["Results"]][["Facilities"]], "FacDerivedHuc"),
  #   CWPCaseIDs = purrr::map(info[["Results"]][["Facilities"]], "CWPCaseIDs"),
  #   CWPTotalDesignFlowNmbr = purrr::map(info[["Results"]][["Facilities"]], "CWPTotalDesignFlowNmbr"),
  #   CWPActualAverageFlowNmbr = purrr::map(info[["Results"]][["Facilities"]], "CWPActualAverageFlowNmbr"),
  #   CWPStateWaterBodyCode = purrr::map(info[["Results"]][["Facilities"]], "CWPStateWaterBodyCode"),
  #   CWPStateWaterBodyName = purrr::map(info[["Results"]][["Facilities"]], "CWPStateWaterBodyName"),
  #   RadGnisName = purrr::map(info[["Results"]][["Facilities"]], "RadGnisName"),
  #   RadReachcode = purrr::map(info[["Results"]][["Facilities"]], "RadReachcode"),
  #   RadWBDHuc12 = purrr::map(info[["Results"]][["Facilities"]], "RadWBDHuc12"),
  #   AttainsSource = purrr::map(info[["Results"]][["Facilities"]], "AttainsSource"),
  #   NPDESIDs = purrr::map(info[["Results"]][["Facilities"]], "NPDESIDs"),
  #   CWPTotalPenalties = purrr::map(info[["Results"]][["Facilities"]], "CWPTotalPenalties")
  #
  # )
  return(buildOutput)

}


readEchoGetFacilitiesDots <- function(...){

  if(length(list(...)) == 0){
    stop("No arguments supplied")
  }

  matchReturn <- convertLists(...)

  if(anyNA(unlist(matchReturn))){
    stop("NA's are not allowed in query")
  }

  values <- sapply(matchReturn, function(x) as.character(paste(eval(x),collapse=",",sep="")))
  values
}


convertLists <- function(...){
  matchReturn <- c(do.call("c",list(...)[sapply(list(...), class) == "list"]), #get the list parts
                   list(...)[sapply(list(...), class) != "list"]) # get the non-list parts
  return(matchReturn)

}


safe_extract <- function(l, wut) {
  res <- l[wut]
  null_here <- purrr::map_lgl(res, is.null)
  res[null_here] <- NA
  res
}
mps9506/watertools documentation built on May 20, 2019, 3:32 p.m.