R/adapter_CL_DGA.R
In hydrodownloadR: Hydrologic Station Catalogs and Time Series from Public APIs

# ==== Chile (DGA via CR2 Explorador) adapter =================================
# Stations:
#   - Official DGA station list (Excel) linked from:
#       https://dga.mop.gob.cl/estadisticas-estaciones-dga/
#     "Listado de estaciones activas de la Direcci\u00F3n General de Aguas"
#     Columns (per site description): c\u00F3digo, nombre, tipo de estaci\u00F3n, cuenca,
#     coordenadas.  We auto-detect columns by name and coerce the code column
#     to text to preserve leading zeros.
#
# Time series:
#   - Daily mean discharge compiled by CR2 (Caudales Medios Diarios - CR2),
#     exposed via Explorador Clim\u00E1tico:
#       https://explorador.cr2.cl
#     We use the same "request.php?options=..." pattern as the existing
#     `chile()` helper you showed, for `variable.id = "qflxDaily"`.
#   - We always download the full series for a gauge and then filter by
#     date range in R (like your `chile()` function).
#
# Notes:
#   - Adapter exposes only parameter = "water_discharge".
#   - Station metadata comes from DGA; time series from CR2 (which compiles
#     DGA + other official data).
#   - Daily data are returned with a `timestamp` at midnight UTC.

# -----------------------------------------------------------------------------
# Registration
# -----------------------------------------------------------------------------

#' @keywords internal
#' @noRd
register_CL_DGA <- function() {
  register_service_usage(
    provider_id   = "CL_DGA",
    provider_name = "Direcci\u00F3n General de Aguas (via CR2 Explorador)",
    country       = "Chile",
    base_url      = "https://explorador.cr2.cl",
    rate_cfg      = list(n = 1L, period = 1),  # 1 request / second
    auth          = list(type = "none")
  )
}

#' @export
timeseries_parameters.hydro_service_CL_DGA <- function(x, ...) {
  # currently only daily discharge
  c("water_discharge")
}

# -----------------------------------------------------------------------------
# Station list (from DGA Excel)
# -----------------------------------------------------------------------------

.cl_dga_stationlist_page <- function() {
  "https://dga.mop.gob.cl/estadisticas-estaciones-dga/"
}

.cl_dga_stationlist_url <- function() {
  # Scrape the Estadisticas page to find the current .xlsx with the
  # "Listado de estaciones vigentes / activas".
  page_url <- .cl_dga_stationlist_page()

  req <- httr2::request(page_url) |>
    httr2::req_user_agent(
      "hydrodownloadR (+https://github.com/your-org/hydrodownloadR)"
    )

  resp <- try(perform_request(req), silent = TRUE)
  if (inherits(resp, "try-error")) {
    rlang::abort("CL_DGA: failed to fetch Estadisticas page for station list.")
  }

  html <- try(
    xml2::read_html(httr2::resp_body_string(resp)),
    silent = TRUE
  )
  if (inherits(html, "try-error")) {
    rlang::abort("CL_DGA: could not parse Estadisticas HTML for station list.")
  }

  links <- rvest::html_elements(html, "a")
  hrefs <- trimws(rvest::html_attr(links, "href"))
  hrefs <- hrefs[!is.na(hrefs)]

  # Heuristic: the station list is an .xlsx whose path typically contains
  # "Listado-estaciones" and/or "Nacional".
  xlsx <- hrefs[grepl("\\.xlsx($|\\?)", hrefs, ignore.case = TRUE)]

  if (!length(xlsx)) {
    rlang::abort(
      "CL_DGA: could not find any .xlsx link on Estadisticas page."
    )
  }


  preferred <- xlsx[
    grepl("Listado",     xlsx, ignore.case = TRUE) |
      grepl("Estacion",  xlsx, ignore.case = TRUE) |
      grepl("Estaciones",xlsx, ignore.case = TRUE) |
      grepl("Nacional",  xlsx, ignore.case = TRUE) |
      grepl("Informe",   xlsx, ignore.case = TRUE)
  ]

  cand <- if (length(preferred)) preferred else xlsx
  url <- cand[1]
  if (!grepl("^https?://", url)) {
    url <- xml2::url_absolute(url, page_url)
  }
  url
}

.cl_dga_read_stationlist <- function() {
  url <- .cl_dga_stationlist_url()

  tmp <- tempfile(fileext = ".xlsx")

  req <- httr2::request(url) |>
    httr2::req_user_agent(
      "hydrodownloadR (+https://github.com/your-org/hydrodownloadR)"
    )

  resp <- try(perform_request(req), silent = TRUE)
  if (inherits(resp, "try-error")) {
    rlang::abort(paste("CL_DGA: failed to download station list from", url))
  }

  bin <- httr2::resp_body_raw(resp)
  writeBin(bin, tmp)

  # First pass: detect which column is the station code (e.g. "Estaci\u00F3n")
  raw0 <- readxl::read_xlsx(tmp, sheet = 1, skip = 15)
  nm0  <- names(raw0)

  id_idx <- which(
    grepl("estaci", nm0, ignore.case = TRUE) |
      grepl("c[o\u00F3]digo", nm0, ignore.case = TRUE)
  )[1]

  if (is.na(id_idx)) {
    # Fallback: just use this read if we didn't find a clear id column
    return(raw0)
  }

  col_types <- rep("guess", length(nm0))
  col_types[id_idx] <- "text"  # preserve leading zeros in codes

  readxl::read_xlsx(
    tmp,
    sheet     = 1,
    skip      = 15,
    col_types = col_types
  )
}

# Datum = UTM zone (18, 19, ...); we map to EPSG:327xx (southern hemisphere).
.cl_dga_utm_to_wgs84 <- function(east, north, datum) {
  east  <- suppressWarnings(as.numeric(east))
  north <- suppressWarnings(as.numeric(north))
  zone  <- suppressWarnings(as.integer(datum))

  lon <- rep(NA_real_, length(east))
  lat <- rep(NA_real_, length(east))

  ok <- !is.na(east) & !is.na(north) & !is.na(zone) &
    zone >= 1 & zone <= 60

  if (!any(ok)) {
    return(list(lon = lon, lat = lat))
  }

  zones <- sort(unique(zone[ok]))

  for (z in zones) {
    idx <- ok & zone == z
    if (!any(idx)) next

    epsg <- 32700 + z  # WGS84 / UTM zone zS (Chile is in southern hemisphere)
    pts  <- data.frame(E = east[idx], N = north[idx])

    sf_pts <- sf::st_as_sf(
      pts,
      coords = c("E", "N"),
      crs    = epsg,
      remove = FALSE
    )
    sf_ll   <- sf::st_transform(sf_pts, 4326)
    coords  <- sf::st_coordinates(sf_ll)

    lon[idx] <- coords[, "X"]
    lat[idx] <- coords[, "Y"]
  }

  list(lon = lon, lat = lat)
}

.cl_dga_dms_to_dd <- function(x, is_lon = FALSE) {
  if (is.null(x)) return(NA_real_)

  x <- trimws(as.character(x))
  x[x == ""] <- NA_character_

  res  <- rep(NA_real_, length(x))
  sign <- rep(1, length(x))

  # explicit hemisphere letters (if present)
  s_idx <- grepl("[Ss]$", x)
  w_idx <- grepl("[WwOo]$", x)  # O = Oeste
  sign[s_idx | w_idx] <- -1

  # Extract D / M / S groups
  m <- regexec("(-?\\d+)\\D+(\\d+)\\D+(\\d+(?:\\.\\d*)?)", x)
  parts <- regmatches(x, m)

  for (i in seq_along(parts)) {
    p <- parts[[i]]
    if (length(p) != 4) next

    d  <- suppressWarnings(as.numeric(p[2]))
    m_ <- suppressWarnings(as.numeric(p[3]))
    s  <- suppressWarnings(as.numeric(p[4]))
    if (is.na(d) || is.na(m_) || is.na(s)) next

    val    <- d + m_ / 60 + s / 3600
    res[i] <- val * sign[i]
  }

  # Chile is S/W; if still positive, flip sign
  pos <- !is.na(res) & res > 0
  if (any(pos)) {
    res[pos] <- -res[pos]
  }

  res
}

.cl_dga_split_station_name <- function(estacion_col) {
  # estacion_col: vector from "Estaci\u00F3n" column
  original <- normalize_utf8(trimws(as.character(estacion_col)))
  river    <- rep(NA_character_, length(original))
  location <- original

  # Match things like "RIO CAMARONES EN CONANOXA"
  has_pat <- grepl("R[\u00cdI]O\\s+.+\\s+EN\\s+.+",
                   original,
                   ignore.case = TRUE)

  river[has_pat] <- sub(
    "^(R[\u00cdI]O\\s+.+?)\\s+EN\\s+(.+)$",
    "\\1",
    original[has_pat],
    ignore.case = TRUE
  )

  location[has_pat] <- sub(
    "^(R[\u00cdI]O\\s+.+?)\\s+EN\\s+(.+)$",
    "\\2",
    original[has_pat],
    ignore.case = TRUE
  )

  list(
    original = original,
    river    = river,
    location = location
  )
}


#' @export
stations.hydro_service_CL_DGA <- function(x, ...) {
  raw_tbl <- .cl_dga_read_stationlist()

  if (!NROW(raw_tbl)) {
    rlang::abort("CL_DGA: station list .xlsx appears to be empty.")
  }

  # --- filter by Tipo de Estaci\u00F3n -------------------------------------------
  tipo_col <- col_or_null(raw_tbl, "Tipo de Estaci\u00F3n")
  station_type <- NULL

  if (!is.null(tipo_col)) {
    tipo_chr <- normalize_utf8(trimws(as.character(tipo_col)))

    # only keep stations where type string contains FLUVIOMETRICA
    has_fl <- grepl("FLUVIOMETRICA", tipo_chr, ignore.case = TRUE)

    keep_type <- has_fl

    raw_tbl      <- raw_tbl[keep_type, , drop = FALSE]
    station_type <- tipo_chr[keep_type]
  }

  if (!NROW(raw_tbl)) {
    # no stations match -> return empty tibble
    return(tibble::tibble(
      country               = x$country,
      provider_id           = x$provider_id,
      provider_name         = x$provider_name,
      station_id            = character(),
      station_name          = character(),
      river                 = character(),
      lat                   = numeric(),
      lon                   = numeric(),
      area                  = numeric(),
      altitude              = numeric(),
      station_name_original = character(),
      station_type          = character()
    ))
  }

  # if there was no tipo_col at all, but we still have rows:
  if (is.null(station_type)) {
    station_type <- rep(NA_character_, NROW(raw_tbl))
  }

  # --- columns via col_or_null() ---------------------------------------------

  # Station code (ID)
  station_id_col <- col_or_null(raw_tbl, "C\u00F3digo")

  # Station name (full DGA label, e.g. "RIO CAMARONES EN CONANOXA")
  estacion_col <- col_or_null(raw_tbl, "Estaci\u00F3n")

  # Coordinates (UTM and Datum)
  utm_n_col <- col_or_null(raw_tbl, "UTM WGS84 Norte(m)")
  utm_e_col <- col_or_null(raw_tbl, "UTM WGS84 Este(m)")
  datum_col <- col_or_null(raw_tbl, "Datum")

  # DMS coordinates
  lat_dms_col <- col_or_null(raw_tbl, "Latitud")
  lon_dms_col <- col_or_null(raw_tbl, "Longitud")

  # Altitude
  alt_col <- col_or_null(raw_tbl, "Altitud m.s.n.m")

  # --- basic cleaning --------------------------------------------------------

  station_id <- trimws(as.character(station_id_col))

  # Split station name into river + location, but keep original label
  name_split <- .cl_dga_split_station_name(estacion_col)
  station_name_original <- name_split$original
  river                  <- name_split$river
  station_name           <- name_split$location

  utm_n   <- utm_n_col
  utm_e   <- utm_e_col
  datum   <- datum_col
  lat_dms <- lat_dms_col
  lon_dms <- lon_dms_col
  alt_raw <- alt_col

  # --- 1) primary: UTM to WGS84 ----------------------------------------------

  utm_coords <- .cl_dga_utm_to_wgs84(
    east  = utm_e,
    north = utm_n,
    datum = datum
  )
  lon_num <- utm_coords$lon
  lat_num <- utm_coords$lat

  # --- 2) fallback: DMS Latitud/Longitud ------------------------------------

  lat_dms_dd <- .cl_dga_dms_to_dd(lat_dms, is_lon = FALSE)
  lon_dms_dd <- .cl_dga_dms_to_dd(lon_dms, is_lon = TRUE)

  missing_lat <- is.na(lat_num) & !is.na(lat_dms_dd)
  missing_lon <- is.na(lon_num) & !is.na(lon_dms_dd)

  lat_num[missing_lat] <- lat_dms_dd[missing_lat]
  lon_num[missing_lon] <- lon_dms_dd[missing_lon]

  altitude <- suppressWarnings(as.numeric(as.character(alt_raw)))

  # --- final tibble ----------------------------------------------------------

  out <- tibble::tibble(
    country               = x$country,
    provider_id           = x$provider_id,
    provider_name         = x$provider_name,
    station_id            = station_id,
    station_name_original = station_name_original, # full DGA label
    station_name          = station_name,          # location only
    river                 = river,                 # e.g. "RIO CAMARONES"
    lat                   = lat_num,
    lon                   = lon_num,
    area                  = NA_real_,
    altitude              = altitude,
    station_type          = station_type           # only types with FLUVIOMETRICA
  )

  # Drop rows without a usable ID
  out <- out[!is.na(out$station_id) & nzchar(out$station_id), ]

  out
}

# -----------------------------------------------------------------------------
# Internal helpers for time series (CR2 Explorador)
# -----------------------------------------------------------------------------

.cl_cr2_download_q_daily <- function(site) {
  # site: 8-digit DGA code as character, e.g. "01001002"

  original <- paste0(
    "https://explorador.cr2.cl/request.php?options=",
    "{%22variable%22:{%22id%22:%22qflxDaily%22,%22var%22:%22caudal%22,",
    "%22intv%22:%22daily%22,%22season%22:%22year%22,%22stat%22:%22mean%22,",
    "%22minFrac%22:80},%22time%22:{%22start%22:-946771200,%22end%22:",
    "1631664000,%22months%22:%22A%C3%B1o%20completo%22},",
    "%22anomaly%22:{%22enabled%22:false,%22type%22:%22dif%22,",
    "%22rank%22:%22no%22,%22start_year%22:1980,%22end_year%22:2010,",
    "%22minFrac%22:70},%22map%22:{%22stat%22:%22mean%22,%22minFrac%22:10,",
    "%22borderColor%22:%227F7F7F%22,%22colorRamp%22:%22Jet%22,",
    "%22showNaN%22:false,%22limits%22:{%22range%22:[5,95],",
    "%22size%22:[4,12],%22type%22:%22prc%22}},%22series%22:{%22sites%22:[%22"
  )

  ending <- paste0(
    "%22],%22start%22:null,%22end%22:null},%22export%22:{%22map%22:",
    "%22Shapefile%22,%22series%22:%22CSV%22,%22view%22:{%22frame%22:",
    "%22Vista%20Actual%22,%22map%22:%22roadmap%22,%22clat%22:-18.0036,",
    "%22clon%22:-69.6331,%22zoom%22:5,%22width%22:461,%22height%22:2207}},",
    "%22action%22:[%22export_series%22]}"
  )

  website <- paste0(original, site, ending)

  # be polite with CR2 server
  Sys.sleep(0.25)

  s <- rvest::session(website)
  body_txt <- s |>
    rvest::html_element("body") |>
    rvest::html_text()

  # Extract CSV URL from the HTML-ish response
  page <- gsub("(.*)(https://.*)(\"}}})", "\\2", body_txt)
  page <- as.character(page)

  outpath <- tempfile(fileext = ".csv")
  utils::download.file(page, outpath, quiet = TRUE)

  original_data <- readr::read_delim(outpath, show_col_types = FALSE)
  names(original_data) <- sub("\\s+", "", names(original_data))

  tibble::as_tibble(original_data)
}

.cl_fetch_q_daily <- function(x,
                              station_id,
                              rng,
                              mode) {
  site <- trimws(as.character(station_id))

  ts_raw <- try(.cl_cr2_download_q_daily(site), silent = TRUE)
  if (inherits(ts_raw, "try-error") || !NROW(ts_raw)) {
    rlang::warn(paste0("CL_DGA/CR2: download failed for station ", site))
    return(tibble::tibble())
  }

  required_cols <- c("agno", "mes", "dia", "valor")
  if (!all(required_cols %in% names(ts_raw))) {
    rlang::warn(paste0(
      "CL_DGA/CR2: unexpected columns for station ", site,
      " (missing one of: ", paste(required_cols, collapse = ", "), ")."
    ))
    return(tibble::tibble())
  }

  year  <- suppressWarnings(as.integer(ts_raw$agno))
  month <- suppressWarnings(as.integer(ts_raw$mes))
  day   <- suppressWarnings(as.integer(ts_raw$dia))

  date_chr <- sprintf("%04d-%02d-%02d", year, month, day)
  date     <- suppressWarnings(as.Date(date_chr))

  value <- suppressWarnings(as.numeric(ts_raw$valor))

  keep <- !is.na(date) & !is.na(value)

  if (identical(mode, "range")) {
    keep <- keep &
      date >= rng$start_date &
      date <= rng$end_date
  }

  if (!any(keep)) {
    return(tibble::tibble())
  }

  date  <- date[keep]
  value <- value[keep]

  ts_final <- as.POSIXct(date, tz = "UTC")

  tibble::tibble(
    country       = x$country,
    provider_id   = x$provider_id,
    provider_name = x$provider_name,
    station_id    = site,
    parameter     = "water_discharge",
    timestamp     = ts_final,
    value         = value,
    unit          = "m^3/s",
    quality_code  = NA_character_,
    source_url    = x$base_url
  )
}



#' @export
timeseries.hydro_service_CL_DGA <- function(x,
                                            parameter = c("water_discharge"),
                                            stations    = NULL,
                                            start_date  = NULL,
                                            end_date    = NULL,
                                            mode        = c("complete", "range"),
                                            exclude_quality = NULL,
                                            ...) {
  parameter <- match.arg(parameter)
  mode      <- match.arg(mode)

  if (!identical(parameter, "water_discharge")) {
    rlang::abort("CL_DGA: only parameter = 'water_discharge' is supported.")
  }

  rng <- resolve_dates(mode, start_date, end_date)

  # --------------------------------------------------------------------------
  # station_id vector
  # - default: only stations with station_type containing "FLUVIOMETRICA"
  # - if stations are given explicitly, use them as-is
  # --------------------------------------------------------------------------
  if (is.null(stations)) {
    st <- stations.hydro_service_CL_DGA(x)

    if ("station_type" %in% names(st)) {
      is_fl <- grepl("FLUVIOMETRICA", st$station_type, ignore.case = TRUE)
      st    <- st[is_fl & !is.na(st$station_id), , drop = FALSE]
    }

    station_vec <- st$station_id
  } else {
    station_vec <- stations
  }

  station_vec <- unique(trimws(as.character(station_vec)))
  if (!length(station_vec)) {
    return(tibble::tibble())
  }

  # batching + rate limit -----------------------------------------------------

  batches <- chunk_vec(station_vec, 20L)

  pb <- progress::progress_bar$new(
    total  = length(batches),
    format = "CL_DGA [:bar] :current/:total (:percent) eta: :eta"
  )

  fetch_one <- function(st_id) {
    .cl_fetch_q_daily(
      x          = x,
      station_id = st_id,
      rng        = rng,
      mode       = mode
    )
  }

  limited <- ratelimitr::limit_rate(
    fetch_one,
    rate = ratelimitr::rate(
      n      = x$rate_cfg$n %||% 1L,
      period = x$rate_cfg$period %||% 1
    )
  )

  out <- lapply(batches, function(batch) {
    pb$tick()
    dplyr::bind_rows(lapply(batch, limited))
  })

  dplyr::bind_rows(out)
}
Any scripts or data that you put into this service are public.
hydrodownloadR documentation built on Feb. 25, 2026, 5:08 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
hydrodownloadR
Hydrologic Station Catalogs and Time Series from Public APIs

R/adapter_CL_DGA.R
In hydrodownloadR: Hydrologic Station Catalogs and Time Series from Public APIs

Try the hydrodownloadR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hydrodownloadR Hydrologic Station Catalogs and Time Series from Public APIs

R/adapter_CL_DGA.R In hydrodownloadR: Hydrologic Station Catalogs and Time Series from Public APIs

Try the hydrodownloadR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

hydrodownloadR
Hydrologic Station Catalogs and Time Series from Public APIs

R/adapter_CL_DGA.R
In hydrodownloadR: Hydrologic Station Catalogs and Time Series from Public APIs