R/newave-data.R

Defines functions confhd_data read_confhd nw_down path_confhd_file nw_urls url_exists

Documented in confhd_data nw_down nw_urls path_confhd_file read_confhd url_exists

#' Check if a URL exists
#' @param x a single URL
#' @param non_2xx_return_value what to do if the site exists but the
#'        HTTP status code is not in the `2xx` range. Default is to return `FALSE`.
#' @param quiet if not `FALSE`, then every time the `non_2xx_return_value` condition
#'        arises a warning message will be displayed. Default is `FALSE`.
#' @param ... other params (`timeout()` would be a good one) passed directly
#'        to `httr::HEAD()` and/or `httr::GET()`
#' @keywords internal
#' @source \url{https://stackoverflow.com/questions/52911812/check-if-url-exists-in-r}
url_exists <- function(x, non_2xx_return_value = FALSE, quiet = FALSE,...) {

  # you don't need thse two functions if you're alread using `purrr`
  # but `purrr` is a heavyweight compiled pacakge that introduces
  # many other "tidyverse" dependencies and this doesnt.

  capture_error <- function(code, otherwise = NULL, quiet = TRUE) {
    tryCatch(
      list(result = code, error = NULL),
      error = function(e) {
        if (!quiet)
          message("Error: ", e$message)

        list(result = otherwise, error = e)
      },
      interrupt = function(e) {
        stop("Terminated by user", call. = FALSE)
      }
    )
  }

  safely <- function(.f, otherwise = NULL, quiet = TRUE) {
    function(...) capture_error(.f(...), otherwise, quiet)
  }

  sHEAD <- safely(httr::HEAD)
  sGET <- safely(httr::GET)

  # Try HEAD first since it's lightweight
  res <- sHEAD(x, ...)

  if (is.null(res$result) ||
      ((httr::status_code(res$result) %/% 200) != 1)) {

    res <- sGET(x, ...)

    if (is.null(res$result)) return(NA) # or whatever you want to return on "hard" errors

    if (((httr::status_code(res$result) %/% 200) != 1)) {
      if (!quiet) warning(sprintf("Requests for [%s] responded but without an HTTP status code in the 200-299 range", x))
      return(non_2xx_return_value)
    }

    return(TRUE)

  } else {
    return(TRUE)
  }

}



#' Build the link to the monthly file of NEWAVE model CCEE deck of prices.
#'
#' @param YYYYMM character vector with year (YYYY) and month (MM). Accepted
#' formats are for example "201809", "2018", "2018.02", "2018-03",
#' "2018-04-01", "2018/05".
#'
#' @return A character string naming the URL of the zip file to be downloaded.
#' @keywords internal
#' @seealso \code{\link[HEgis]{confhd_data}}
# examples
# yyyymm = c("201809", "2018", "2018.02", "2018-03", "2018-04-01", "2018/05")
# nw_urls(yyyymm)
nw_urls <- function(YYYYMM = "201809"){

  YYYYMM <- as.character(YYYYMM)
  checkmate::assert_character(
    YYYYMM,
    min.chars = 4,
    pattern = "[0-9]{4,}",
    all.missing = FALSE,
    min.len = 1
  )

  dates <- lubridate::ymd(YYYYMM, truncated = 2)
  YYYYMM <- format.Date(dates, "%Y%m")

  nw_zip_urls <- glue::glue(
    "https://www.ccee.org.br/ccee/documentos/NW{YYYYMM}"
  )
  nw_zip_urls
}

# Get the file path to CONFHD.DAT
# @keywords internal
path_confhd_file <- function(path) {
  # path = dir_ext
  path_confhd_file <- grep(pattern = "CONFHD.DAT",
       x = fs::dir_ls(path),
       value = TRUE,
       ignore.case = TRUE
  )
  if(length(path_confhd_file) == 0){
    message("CONFHD.DAT file not found.")
    message("The folder expected to contain this file
            has the folowing content:")
    print(fs::dir_ls(path))

    .find_deep <- function(path){
      zips <- fs::dir_ls(path, glob = "*.zip")
      last_zip <- sort(zips)[length(zips)]
      dir_ext <- fs::path_ext_remove(last_zip)
      unzip(zip_dest, exdir = dir_ext)
      checkmate::assert_directory_exists(dir_ext)
      path_confhd_file(dir_ext)
    }

  }
  path_confhd_file
}

#' Download zip file from link provided by `nw_urls()`
#'
#' @param link url to download data
#' @param confhd_path logical. Default: TRUE, will return the path
#' to `CONFHD.DAT` file, otherwise the path to the temporary directory of
#' extracted data.
#' @keywords internal
#'
#'@return Default is the path to `CONFHD.DAT` file, otherwise the
#'temporary directory of extracted data.
nw_down <- function(link, confhd_path = TRUE, quiet = TRUE){
  #link = "https://www.ccee.org.br/ccee/documentos/NW201809" # 500 MB!?
  # link = "https://www.ccee.org.br/ccee/documentos/NW202012"
  checkmate::assert_character(link)
  checkmate::assert_true(curl::has_internet())
  #checkmate::assert_true(RCurl::url.exists(link))

  ## taking to much time
  #checkmate::assert_true(url_exists(link, quiet = quiet))

  zip_dest <- fs::file_temp(ext = "zip")

  utils::download.file(link, destfile = zip_dest, mode = "wb")
  #unzip(zip_dest, list = TRUE)
  dir_ext <- fs::path_ext_remove(zip_dest)
  unzip(zip_dest, exdir = dir_ext)
  checkmate::assert_directory_exists(dir_ext)

  if(confhd_path) {
    confhd_file <- path_confhd_file(dir_ext)
    checkmate::assert_file_exists(confhd_file)
    return(confhd_file)
  }

  dir_ext
}

#' Read data from text file `CONFHD.DAT`
#'
#' Read data file that contains the names from Hydro Power plants and ID from
#' the streamflow gauge ('posto').
#'
#' @param confhd_file character, file path.
#'
#' @return a [tibble][tibble::tibble-package].
#' @export
#'
read_confhd <- function(confhd_file) {

  confhd_header <- readr::read_fwf(
    file = confhd_file,
    col_positions = readr::fwf_empty(confhd_file, n = 1),
    col_types = readr::cols(),
    n_max = 1
  ) %>%
    t() %>%
    c() %>%
    janitor::make_clean_names()
    #rattle::normVarNames()

  confhd_data <- readr::read_fwf(
    file = confhd_file,
    col_positions = readr::fwf_empty(
      confhd_file,
      skip = 2,
      col_names = confhd_header),
    col_types = readr::cols(),
    #locate = locale(encoding = "latin1"),
    skip = 2
  )

  confhd_data <- dplyr::mutate(confhd_data,
                               num = as.integer(num),
                               posto = as.integer(posto),
                               jus = as.integer(jus),
                               #ree = as.integer(ree),
                               v_inic = as.numeric(v_inic),
                               modif = as.integer(modif),
                               inic_hist = as.integer(inic_hist),
                               fim_hist = as.integer(fim_hist)
                               )
  # some times "REE" is not present in the data
  if("ree" %in% names(confhd_data)){
    confhd_data <- dplyr::mutate(confhd_data, ree = as.integer(ree))
  }
  if("ssis" %in% names(confhd_data)){
    confhd_data <- dplyr::mutate(confhd_data, ssis = as.integer(ssis))
  }
  confhd_data
}

#' Get data from `CONFHD.DAT` file for the a given year and month
#'
#' @inheritParams nw_urls
#' @inheritParams nw_down
#' @source Data are available in \url{https://www.ccee.org.br/ccee/documentos}
#' since July/2012.
#' @return a [tibble][tibble::tibble-package] with tidy data.
#' @export
#' @note A data request for the current month may not yet be available for UHEs
#' configuration data. That is why it is recommended to use the date in the
#' previous 40 days to ensure that the data will be available.
#' @examples
#' if(FALSE){
#' # Get the table for the previous month to ensure that the data already exists
#' uhes_info <- confhd_data(format.Date(Sys.Date()-40, "%Y%m"))
#' }
#'
confhd_data <- function(
                        YYYYMM = format.Date(Sys.Date() - 40, "%Y%m"),
                        confhd_path = TRUE) {
  read_confhd(nw_down(nw_urls(YYYYMM), confhd_path))
}
lhmet-ped/HEgis documentation built on July 23, 2022, 2:59 a.m.