test-timeAverage.R
In openair: Tools for the Analysis of Air Pollution Data

# timeAverage tests
# Use a small slice for speed; one full year gives clean monthly/annual rows
dat <- selectByDate(mydata, year = 2003)
dat1 <- selectByDate(mydata, year = 2003, month = 1)

# Shared results — most tests reuse these
ta_day <- timeAverage(dat, avg.time = "day", progress = FALSE)
ta_month <- timeAverage(dat, avg.time = "month", progress = FALSE)
ta_year <- timeAverage(dat, avg.time = "year", progress = FALSE)

# --- Output structure --------------------------------------------------------

test_that("timeAverage returns a data frame (tibble) with a date column", {
  expect_s3_class(ta_day, "data.frame")
  expect_true("date" %in% names(ta_day))
  expect_s3_class(ta_day$date, "POSIXct")
})

test_that("timeAverage preserves numeric pollutant columns", {
  expect_true(all(c("no2", "o3", "nox", "ws", "wd") %in% names(ta_day)))
})

# --- Row counts --------------------------------------------------------------

test_that("avg.time = 'day' returns one row per day", {
  expect_equal(nrow(ta_day), 365L)
})

test_that("avg.time = 'month' returns one row per month", {
  expect_equal(nrow(ta_month), 12L)
})

test_that("avg.time = 'year' returns one row", {
  expect_equal(nrow(ta_year), 1L)
})

test_that("avg.time = '2 month' returns 6 rows for a full year", {
  result <- timeAverage(dat, avg.time = "2 month", progress = FALSE)
  expect_equal(nrow(result), 6L)
})

# --- Aggregation correctness -------------------------------------------------

test_that("daily mean is between hourly min and max", {
  safe_stat <- function(fn, x) {
    if (all(is.na(x))) {
      NA_real_
    } else {
      fn(x, na.rm = TRUE)
    }
  }
  daily_min <- tapply(
    dat$no2,
    lubridate::date(dat$date),
    \(x) safe_stat(min, x)
  )
  daily_max <- tapply(
    dat$no2,
    lubridate::date(dat$date),
    \(x) safe_stat(max, x)
  )
  non_na <- !is.na(ta_day$no2)

  expect_true(all(ta_day$no2[non_na] >= daily_min[non_na] - 1e-9))
  expect_true(all(ta_day$no2[non_na] <= daily_max[non_na] + 1e-9))
})

test_that("annual mean matches a direct calculation", {
  expected <- mean(dat$no2, na.rm = TRUE)
  expect_equal(ta_year$no2, expected, tolerance = 1e-6)
})

# --- statistic argument ------------------------------------------------------

test_that("statistic = 'max' returns values >= statistic = 'mean'", {
  ta_max <- timeAverage(
    dat,
    avg.time = "month",
    statistic = "max",
    progress = FALSE
  )
  non_na <- !is.na(ta_month$no2) & !is.na(ta_max$no2)
  expect_true(all(ta_max$no2[non_na] >= ta_month$no2[non_na]))
})

test_that("statistic = 'min' returns values <= statistic = 'mean'", {
  ta_min <- timeAverage(
    dat,
    avg.time = "month",
    statistic = "min",
    progress = FALSE
  )
  non_na <- !is.na(ta_month$no2) & !is.na(ta_min$no2)
  expect_true(all(ta_min$no2[non_na] <= ta_month$no2[non_na]))
})

test_that("statistic = 'sum' exceeds statistic = 'mean' for positive values", {
  ta_sum <- timeAverage(
    dat,
    avg.time = "month",
    statistic = "sum",
    progress = FALSE
  )
  non_na <- !is.na(ta_month$no2) & !is.na(ta_sum$no2)
  expect_true(all(ta_sum$no2[non_na] >= ta_month$no2[non_na]))
})

test_that("statistic = 'frequency' returns integer counts <= hours in period", {
  ta_freq <- timeAverage(
    dat1,
    avg.time = "day",
    statistic = "frequency",
    progress = FALSE
  )
  expect_true(all(ta_freq$no2 <= 24, na.rm = TRUE))
  expect_true(all(ta_freq$no2 >= 0, na.rm = TRUE))
})

test_that("statistic = 'percentile' with percentile = 95 is >= mean", {
  ta_p95 <- timeAverage(
    dat,
    avg.time = "month",
    statistic = "percentile",
    percentile = 95,
    progress = FALSE
  )
  non_na <- !is.na(ta_month$no2) & !is.na(ta_p95$no2)
  expect_true(all(ta_p95$no2[non_na] >= ta_month$no2[non_na]))
})

# --- Wind direction ----------------------------------------------------------

test_that("wind direction stays in [0, 360]", {
  non_na <- !is.na(ta_day$wd)
  expect_true(all(ta_day$wd[non_na] >= 0))
  expect_true(all(ta_day$wd[non_na] <= 360))
})

test_that("vector-averaged wd handles the 350/10 degree case correctly", {
  # Exact 350 + 10 average should be 0 (or 360), not 180
  two_hours <- data.frame(
    date = as.POSIXct(c("2023-01-01 00:00", "2023-01-01 01:00"), tz = "GMT"),
    wd = c(350, 10),
    ws = c(2, 2)
  )
  result <- timeAverage(two_hours, avg.time = "day", progress = FALSE)
  # The result should be close to 0 or 360, not near 180
  avg_wd <- result$wd
  expect_true(avg_wd <= 10 || avg_wd >= 350)
})

# --- data.thresh -------------------------------------------------------------

test_that("data.thresh = 100 produces more NAs than data.thresh = 0", {
  dat_gaps <- dat1
  dat_gaps$no2[sample(nrow(dat_gaps), 300)] <- NA

  res_low <- timeAverage(
    dat_gaps,
    avg.time = "day",
    data.thresh = 0,
    progress = FALSE
  )
  res_high <- timeAverage(
    dat_gaps,
    avg.time = "day",
    data.thresh = 100,
    progress = FALSE
  )

  expect_gte(sum(is.na(res_high$no2)), sum(is.na(res_low$no2)))
})

# --- Expansion (avg.time < data interval) ------------------------------------

test_that("expanding hourly to 15-min produces 4x as many rows", {
  result <- timeAverage(
    dat1,
    avg.time = "15 min",
    fill = TRUE,
    progress = FALSE
  )
  expect_equal(nrow(result), (nrow(dat1) * 4L) - 3)
})

# --- avg.time = 'season' -----------------------------------------------------

test_that("avg.time = 'season' returns 5 rows for a full year", {
  result <- timeAverage(dat, avg.time = "season", progress = FALSE)
  expect_equal(nrow(result), 5L)
})

# --- Input validation --------------------------------------------------------

test_that("invalid statistic raises an error", {
  expect_error(
    timeAverage(
      dat,
      avg.time = "day",
      statistic = "geometric_mean",
      progress = FALSE
    ),
    regexp = "statistic"
  )
})

test_that("data.thresh outside [0, 100] raises an error", {
  expect_error(
    timeAverage(dat, avg.time = "day", data.thresh = 101, progress = FALSE),
    regexp = "capture"
  )
})

test_that("percentile outside [0, 100] raises an error", {
  expect_error(
    timeAverage(
      dat,
      avg.time = "day",
      statistic = "percentile",
      percentile = 110,
      progress = FALSE
    ),
    regexp = "Percentile"
  )
})

test_that("invalid avg.time unit raises an error", {
  expect_error(
    timeAverage(dat, avg.time = "fortnight", progress = FALSE),
    regexp = "not recognised"
  )
})

Any scripts or data that you put into this service are public.

openair documentation built on April 2, 2026, 9:07 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

openair
Tools for the Analysis of Air Pollution Data

tests/testthat/test-timeAverage.R
In openair: Tools for the Analysis of Air Pollution Data

Try the openair package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

openair Tools for the Analysis of Air Pollution Data

tests/testthat/test-timeAverage.R In openair: Tools for the Analysis of Air Pollution Data

Try the openair package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

openair
Tools for the Analysis of Air Pollution Data

tests/testthat/test-timeAverage.R
In openair: Tools for the Analysis of Air Pollution Data