#' Calculate rolling mean pollutant values
#'
#' This is a utility function mostly designed to calculate rolling mean
#' statistics relevant to some pollutant limits, e.g., 8 hour rolling means for
#' ozone and 24 hour rolling means for PM10. However, the function has a more
#' general use in helping to display rolling mean values in flexible ways with
#' the rolling window width left, right or centre aligned. The function will try
#' and fill in missing time gaps to get a full time sequence but return a data
#' frame with the same number of rows supplied.
#'
#' @param mydata A data frame containing a `date` field. `mydata` must contain a
#' `date` field in `Date` or `POSIXct` format. The input time series must be
#' regular, e.g., hourly, daily.
#' @param pollutant The name of a pollutant, e.g., `pollutant = "o3"`.
#' @param width The averaging period (rolling window width) to use, e.g., `width
#' = 8` will generate 8-hour rolling mean values when hourly data are
#' analysed.
#' @param type Used for splitting the data further. Passed to [cutData()].
#' @param data.thresh The % data capture threshold. No values are calculated if
#' data capture over the period of interest is less than this value. For
#' example, with `width = 8` and `data.thresh = 75` at least 6 hours are
#' required to calculate the mean, else `NA` is returned.
#' @param align Specifies how the moving window should be aligned. `"right"`
#' means that the previous hours (including the current) are averaged.
#' `"left"` means that the forward hours are averaged. `"centre"` (or
#' `"center"` - the default) centres the current hour in the window.
#' @param new.name The name given to the new column. If not supplied it will
#' create a name based on the name of the pollutant and the averaging period
#' used.
#' @param ... Additional parameters passed to [cutData()]. For use with `type`.
#' @export
#' @author David Carslaw
#' @examples
#' # rolling 8-hour mean for ozone
#' mydata <- rollingMean(mydata,
#' pollutant = "o3", width = 8, new.name =
#' "rollingo3", data.thresh = 75, align = "right"
#' )
rollingMean <- function(
mydata,
pollutant = "o3",
width = 8L,
type = "default",
data.thresh = 75,
align = c("centre", "center", "left", "right"),
new.name = NULL,
...
) {
# check inputs
align <- rlang::arg_match(align, multiple = FALSE)
# data.thresh must be between 0 & 100
if (data.thresh < 0 || data.thresh > 100) {
cli::cli_abort(
"{.field data.thresh} must be between {.val {0L}} and {.val {100L}}."
)
}
# pollutant should be numeric
if (!is.numeric(mydata[[pollutant]])) {
cli::cli_abort(
"mydata{.field ${pollutant}} is not numeric - it is {class(mydata[[pollutant]])}."
)
}
# create new name if not provided
if (is.null(new.name)) {
new.name <- paste("rolling", width, pollutant, sep = "")
}
# setting width < 1 crashes R
if (width < 1L) {
width <- 1L
}
# deal with type
if (type == "default" & "site" %in% names(mydata)) {
# back compatibility to avoid breaking change
cli::cli_inform(
'{.field type} is "default" but {.code site} column identified - setting {.field type} to "site".'
)
type <- "site"
}
mydata <- cutData(mydata, type = type, ...)
# function to perform rolling average
calc.rolling <- function(mydata) {
# need to know whether dates added
dates <- mydata$date
# pad missing hours
mydata <- date.pad(mydata)
# make sure function is not called with window width longer than data
if (width > nrow(mydata)) {
return(mydata)
}
# call C code
mydata[[new.name]] <- .Call(
"rollMean",
mydata[[pollutant]],
width,
data.thresh,
align,
PACKAGE = "openair"
)
# return what was put in; avoids adding missing data e.g. for factors
if (length(dates) != nrow(mydata)) {
mydata <- mydata[mydata$date %in% dates, ]
}
# return
return(mydata)
}
# split if several sites
mydata <-
purrr::map(split(mydata, mydata[[type]], drop = TRUE), calc.rolling) %>%
purrr::list_rbind()
# drop default column
if (type == "default") {
mydata$default <- NULL
}
# return
return(mydata)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.