test_waterdata_stats.R
In dataRetrieval: Retrieval Functions for USGS and EPA Hydrology and Water Quality Data

# ------------------------------------------------------------------------------
# cleanup_cols_stats()
# ------------------------------------------------------------------------------

test_that("values column is unnested and converted to numeric", {
  DT <- data.table(
    values = c("1.2", "3.4"),
    percentiles = c("10", "90"),
    computation = "percentile",
    start_date = "2000-01-01",
    end_date = "2000-12-31",
    interval_type = "month",
    ts_id = "1",
    sample_count = 10,
    approval_status = "approved",
    computation_id = "abc"
  )

  out <- cleanup_cols_stats(DT)

  expect_equal(out$value, c(1.2, 3.4))
  expect_equal(out$percentile, c(10, 90))
})

test_that("value column is used when values is absent", {
  DT <- data.table(
    value = "2.5",
    computation = "arithmetic_mean",
    start_date = "2000-01-01",
    end_date = "2000-12-31",
    interval_type = "year",
    sample_count = 1,
    ts_id = "1",
    approval_status = "approved",
    computation_id = "xyz"
  )

  out <- cleanup_cols_stats(DT)

  expect_equal(out$value, 2.5)
  expect_true(is.na(out$percentile))
})

test_that("'nan' values are converted to NA_real_", {
  DT <- data.table(
    values = "nan",
    computation = "arithmetic_mean",
    start_date = "2000-01-01",
    end_date = "2000-12-31",
    interval_type = "month",
    ts_id = "1",
    sample_count = 1,
    approval_status = "approved",
    computation_id = "nan-test"
  )

  out <- cleanup_cols_stats(DT)

  expect_true(is.na(out$value))
})

test_that("percentile is inferred from computation type", {
  DT <- data.table(
    value = "5",
    computation = c("minimum", "median", "maximum"),
    start_date = "2000-01-01",
    end_date = "2000-12-31",
    interval_type = "year",
    ts_id = "1",
    sample_count = 1,
    approval_status = "approved",
    computation_id = 1:3
  )

  out <- cleanup_cols_stats(DT)

  expect_equal(
    out[order(computation)]$percentile,
    c(100, 50, 0) # maximum, median, minimum
  )
})

test_that("scalar percentile is recycled to match values length", {
  DT <- data.table(
    values = c("1", "2", "3"),
    percentiles = "50",
    computation = "percentile",
    start_date = "2000-01-01",
    end_date = "2000-12-31",
    interval_type = "month",
    ts_id = "1",
    sample_count = 3,
    approval_status = "approved",
    computation_id = "recycle"
  )

  out <- cleanup_cols_stats(DT)

  expect_equal(out$percentile, c(50, 50, 50))
})

test_that("missing value and values yields NA value", {
  DT <- data.table(
    computation = "arithmetic_mean",
    start_date = "2000-01-01",
    end_date = "2000-12-31",
    interval_type = "year",
    ts_id = "1",
    sample_count = 1,
    approval_status = "approved",
    computation_id = "missing"
  )

  out <- cleanup_cols_stats(DT)

  expect_true(is.na(out$value))
})

# ------------------------------------------------------------------------------
# construct_statistics_request()
# ------------------------------------------------------------------------------

test_that("construct_statistics_request builds correct URL path", {
  req <- construct_statistics_request(service = "Normals")

  expect_true(grepl("statistics/v0/observationNormals", req$url))
})

test_that("Intervals service path is correct", {
  req <- construct_statistics_request(service = "Intervals")

  expect_true(grepl("statistics/v0/observationIntervals", req$url))
})

# ------------------------------------------------------------------------------
# explode_query integration (no HTTP)
# ------------------------------------------------------------------------------

test_that("arguments are passed as query parameters", {
  args <- list(
    state_code = "WI",
    computation_type = "median",
    page_size = 10
  )

  req <- construct_statistics_request("Normals")
  full <- explode_query(req, POST = FALSE, x = args)

  qs <- httr2::req_get_url(full)

  expect_match(qs, "state_code=WI")
  expect_match(qs, "computation_type=median")
  expect_match(qs, "page_size=10")
})

# ------------------------------------------------------------------------------
# get_statistics_data() (mocked)
# ------------------------------------------------------------------------------

test_that("get_statistics_data returns sf object with attributes", {
  fake_page <-
    sf::st_sf(
      monitoring_location_id = c("USGS-01432160", "USGS-01432110"),
      monitoring_location_name = c(
        "DELAWARE RIVER AT BARRYVILLE NY",
        "Lackawaxen River at Rowland, PA"
      ),
      site_type = c("Stream", "Stream"),
      site_type_code = c("ST", "ST"),
      country_code = c("US", "US"),
      state_code = c("42", "42"),
      county_code = c("103", "103"),
      geometry = sf::st_sfc(
        sf::st_point(c(0, 0)),
        sf::st_point(c(1, 1))
      ),
      data = c(
        # First site
        jsonlite::toJSON(
          list(
            list(
              parameter_code = "00060",
              unit_of_measure = "ft^3/s",
              parent_time_series_id = "1692e9a29c8c4276add4497c5da872fa",
              values = list(
                list(
                  start_date = "2018-10-01",
                  end_date = "2018-10-31",
                  interval_type = "month",
                  value = "8032.903",
                  sample_count = 31,
                  approval_status = "approved",
                  computation_id = "d98ebe80-d476-4144-b7ce-c6ca32f015de",
                  computation = "arithmetic_mean"
                ),
                list(
                  start_date = "2018-11-01",
                  end_date = "2018-11-30",
                  interval_type = "month",
                  value = "12521.667",
                  sample_count = 30,
                  approval_status = "approved",
                  computation_id = "93f45097-93ad-4b48-83d4-b27b5fc137dc",
                  computation = "arithmetic_mean"
                )
              )
            )
          ),
          auto_unbox = TRUE
        ),
        # Second site
        jsonlite::toJSON(
          list(
            list(
              parameter_code = "00060",
              unit_of_measure = "ft^3/s",
              parent_time_series_id = "7dda388ea270420dbe7324e56b6f907f",
              values = list(
                list(
                  start_date = "2007-07-27",
                  end_date = "2007-07-31",
                  interval_type = "month",
                  values = c("1", "2"),
                  percentiles = c("50", "100"),
                  sample_count = 5,
                  approval_status = "approved",
                  computation_id = "4579653d-f26f-4e9f-99ea-c3fa3fc68686",
                  computation = "percentile"
                )
              )
            )
          ),
          auto_unbox = TRUE
        )
      ),
      stringsAsFactors = FALSE
    )

  with_mocked_bindings(
    walk_pages = function(...) list(fake_page),
    {
      out <- get_statistics_data(list(), "Normals")

      expect_s3_class(out, "sf")
      expect_true("request" %in% names(attributes(out)))
      expect_true("queryTime" %in% names(attributes(out)))

      expect_equal(nrow(out), 4)
      expect_equal(out$value, c(8032.903, 12521.667, 1, 2))
      expect_equal(out$percentile, c(NA, NA, 50, 100))
      expect_equal(out$sample_count, c(31, 30, 5, 5))
    }
  )
})

test_that("get_statistics_data handles empty response", {
  with_mocked_bindings(
    walk_pages = function(...) list(),
    {
      out <- get_statistics_data(list(), "Normals")

      expect_s3_class(out, "sf")
      expect_equal(nrow(out), 0)
    }
  )
})

# ------------------------------------------------------------------------------
# High-level API tests (skipped on CRAN)
# ------------------------------------------------------------------------------

test_that("read_waterdata_stats_por returns data", {
  skip_on_cran()
  skip_if_offline()

  out <- read_waterdata_stats_por(
    monitoring_location_id = "USGS-01646500",
    parameter_code = "00060",
    computation_type = "median",
    page_size = 5
  )

  expect_s3_class(out, "sf")
  expect_true(nrow(out) > 0)
})

test_that("read_waterdata_stats_daterange returns data", {
  skip_on_cran()
  skip_if_offline()

  out <- read_waterdata_stats_daterange(
    monitoring_location_id = "USGS-01646500",
    parameter_code = "00060",
    computation_type = "maximum",
    page_size = 5
  )

  expect_s3_class(out, "sf")
  expect_true(nrow(out) > 0)
})

Any scripts or data that you put into this service are public.

dataRetrieval documentation built on March 31, 2026, 5:07 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

dataRetrieval
Retrieval Functions for USGS and EPA Hydrology and Water Quality Data

tests/testthat/test_waterdata_stats.R
In dataRetrieval: Retrieval Functions for USGS and EPA Hydrology and Water Quality Data

Try the dataRetrieval package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

dataRetrieval Retrieval Functions for USGS and EPA Hydrology and Water Quality Data

tests/testthat/test_waterdata_stats.R In dataRetrieval: Retrieval Functions for USGS and EPA Hydrology and Water Quality Data

Try the dataRetrieval package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

dataRetrieval
Retrieval Functions for USGS and EPA Hydrology and Water Quality Data

tests/testthat/test_waterdata_stats.R
In dataRetrieval: Retrieval Functions for USGS and EPA Hydrology and Water Quality Data