test-calcPercentile.R
In openair: Tools for the Analysis of Air Pollution Data

if (!identical(Sys.getenv("NOT_CRAN"), "true")) {
  return()
}

# calcPercentile is a thin wrapper over timeAverage — fast even on a full year
dat <- selectByDate(mydata, year = 2003)

# Run the most-used configurations once and reuse
pct_single <- calcPercentile(
  dat,
  pollutant = "o3",
  avg.time = "month",
  percentile = 95
)
pct_multi <- calcPercentile(
  dat,
  pollutant = "o3",
  avg.time = "month",
  percentile = c(5, 50, 95)
)

# --- Output structure --------------------------------------------------------

test_that("calcPercentile returns a data frame with a date column", {
  expect_s3_class(pct_single, "data.frame")
  expect_true("date" %in% names(pct_single))
})

test_that("single percentile produces exactly one percentile column", {
  pct_cols <- grep("^percentile\\.", names(pct_single), value = TRUE)
  expect_length(pct_cols, 1L)
  expect_true("percentile.95" %in% names(pct_single))
})

test_that("multiple percentiles produce one column per value with correct names", {
  expect_true(all(
    c("percentile.5", "percentile.50", "percentile.95") %in% names(pct_multi)
  ))
  pct_cols <- grep("^percentile\\.", names(pct_multi), value = TRUE)
  expect_length(pct_cols, 3L)
})

test_that("avg.time = 'month' returns 12 rows for a full year", {
  expect_equal(nrow(pct_single), 12L)
})

# --- Numerical correctness ---------------------------------------------------

test_that("percentile values are non-decreasing across quantile levels", {
  # p5 <= p50 <= p95 at every row
  ok <- !is.na(pct_multi$percentile.5) &
    !is.na(pct_multi$percentile.50) &
    !is.na(pct_multi$percentile.95)
  expect_true(all(pct_multi$percentile.5[ok] <= pct_multi$percentile.50[ok]))
  expect_true(all(pct_multi$percentile.50[ok] <= pct_multi$percentile.95[ok]))
})

test_that("percentile values stay within the observed range of the pollutant", {
  obs_range <- range(dat$o3, na.rm = TRUE)
  vals <- pct_single$percentile.95
  expect_true(all(vals >= obs_range[1], na.rm = TRUE))
  expect_true(all(vals <= obs_range[2], na.rm = TRUE))
})

test_that("percentile.0 equals the monthly minimum and percentile.100 the maximum", {
  pct_bounds <- calcPercentile(
    dat,
    pollutant = "o3",
    avg.time = "month",
    percentile = c(0, 100),
    data.thresh = 0
  )
  daily <- timeAverage(
    dat,
    avg.time = "month",
    statistic = "min",
    progress = FALSE
  )
  expect_equal(pct_bounds$percentile.0, daily$o3, tolerance = 1e-6)
})

# --- avg.time ----------------------------------------------------------------

test_that("avg.time = 'year' returns a single row", {
  annual <- calcPercentile(
    dat,
    pollutant = "o3",
    avg.time = "year",
    percentile = 50
  )
  expect_equal(nrow(annual), 1L)
})

test_that("avg.time = 'day' returns one row per day", {
  daily <- calcPercentile(
    dat,
    pollutant = "o3",
    avg.time = "day",
    percentile = 50
  )
  expect_equal(nrow(daily), 365L)
})

# --- prefix ------------------------------------------------------------------

test_that("custom prefix is reflected in output column names", {
  result <- calcPercentile(
    dat,
    pollutant = "o3",
    avg.time = "month",
    percentile = 75,
    prefix = "p"
  )
  expect_true("p75" %in% names(result))
  expect_false("percentile.75" %in% names(result))
})

# --- data.thresh -------------------------------------------------------------

test_that("data.thresh = 100 produces more NAs than data.thresh = 0", {
  dat_gaps <- dat
  dat_gaps$o3[sample(nrow(dat_gaps), 500)] <- NA

  res_low <- calcPercentile(
    dat_gaps,
    pollutant = "o3",
    avg.time = "month",
    percentile = 50,
    data.thresh = 0
  )
  res_high <- calcPercentile(
    dat_gaps,
    pollutant = "o3",
    avg.time = "month",
    percentile = 50,
    data.thresh = 100
  )

  expect_gte(
    sum(is.na(res_high$percentile.50)),
    sum(is.na(res_low$percentile.50))
  )
})

# --- Input validation --------------------------------------------------------

test_that("calcPercentile errors when pollutant is not in mydata", {
  expect_error(
    calcPercentile(
      dat,
      pollutant = "no_such_col",
      avg.time = "month",
      percentile = 50
    ),
    regexp = "not present"
  )
})

Any scripts or data that you put into this service are public.

openair documentation built on April 2, 2026, 9:07 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

openair
Tools for the Analysis of Air Pollution Data

tests/testthat/test-calcPercentile.R
In openair: Tools for the Analysis of Air Pollution Data

Try the openair package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

openair Tools for the Analysis of Air Pollution Data

tests/testthat/test-calcPercentile.R In openair: Tools for the Analysis of Air Pollution Data

Try the openair package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

openair
Tools for the Analysis of Air Pollution Data

tests/testthat/test-calcPercentile.R
In openair: Tools for the Analysis of Air Pollution Data