#' @export
#' @importFrom rlang .data
#'
#' @title Apply quality control on PurpleAir Timeseries object
#'
#' @param pat PurpleAir Timeseries \emph{pat} object
#' @param removeOutOfSpec Logical determining whether measurements that are
#' out of instrument specs should be invalidated.
#' @param max_humidity Maximum humidity threshold above which pm25 measurements
#' are invalidated. Disabled unless explicitly set.
#'
#' @description Optionally applies QC thresholds to a \emph{pat} object
#' based on the documented specs of the PurpleAir sensor.
#'
#' The \code{pat_load()} function returns raw "engineering" data for a
#' PurpleAir Sensor. The very first level of QC that should always be applied
#' is the removal of out-of-spec values that should never be generated by the
#' sensor components. Out-of-spec values imply an electrical or software problem
#' and can never be considered valid measurements.
#'
#' Setting a \code{max_humidity} threshold is less fundamental. There are many
#' cases where PM2.5 readings during periods of high humidity should be called
#' into question which is why this QC option is provided. However, this type of
#' filtering is dependent upon a properly functioning humidity sensor. Humidity
#' filtering is disabled by default because it can result in the invalidation
#' of many potentially valid PM2.5 measurements.
#'
#' @details Out of spec thresholds are set so that anything outside of
#' these the given range should represent a value that is not physically possible
#' in an ambient setting on planet Earth.
#'
#' \itemize{
#' \item{\code{humidity} -- [0:100]}
#' \item{\code{temperature} -- [-40:185]}
#' \item{\code{pm25} -- [0:2000]}
#' }
#'
#' @return A cleaned up \emph{pat} object.
#'
#' @references \href{https://www2.purpleair.com/products/purpleair-pa-ii}{PA-II specs}
#'
#' @examples
#' \donttest{
#' library(AirSensor)
#'
#' # Use a sensor with problems
#' pat <- example_pat
#'
#' # Basic plot shows out-of-spec values for humidity
#' pat %>% pat_multiPlot(sampleSize = NULL)
#'
#' # Applying QC removes these records
#' pat %>% pat_qc() %>% pat_multiPlot(sampleSize = NULL)
#'
#' # We can also remove PM2.5 data at high humidities
#' pat %>% pat_qc(max_humidity = 80) %>% pat_multiPlot(sampleSize = NULL)
#' }
pat_qc <- function(
pat = NULL,
removeOutOfSpec = TRUE,
max_humidity = NULL
) {
# ----- Validate parameters --------------------------------------------------
MazamaCoreUtils::stopIfNull(pat)
if ( !pat_isPat(pat) )
stop("Parameter 'pat' is not a valid 'pa_timeseries' object.")
if ( !is.logical(removeOutOfSpec) )
stop("Parameter 'removeOutOfSpec' must be logical.")
# Remove any duplicate data records
pat <- pat_distinct(pat)
if ( !is.null(max_humidity) ) {
if ( !is.numeric(max_humidity) ) {
stop("Parameter 'max_humidity' must be numeric")
}
}
# ----- Invalidate out-of-spec values ----------------------------------------
if ( removeOutOfSpec ) {
# PA sensor specs: https://www2.purpleair.com/products/purpleair-pa-ii
# https://stackoverflow.com/questions/27909000/set-certain-values-to-na-with-dplyr
data <- pat$data
data <-
data %>%
# 0 <= humidity <= 100
dplyr::mutate(humidity = replace(.data$humidity, which(.data$humidity < 0), NA) ) %>%
dplyr::mutate(humidity = replace(.data$humidity, which(.data$humidity > 100), NA) ) %>%
# -40 <= temperature <= 185
dplyr::mutate(temperature = replace(.data$temperature, which(.data$temperature < -40), NA) ) %>%
dplyr::mutate(temperature = replace(.data$temperature, which(.data$temperature > 185), NA) ) %>%
# 0 <= pm25 <= 2000
# NOTE: The 2000 number is based on Jon's 2020-05-14 conversation with
# NOTE: Adrian Dybwayd, founder of PurpleAir.
dplyr::mutate(pm25_A = replace(.data$pm25_A, which(.data$pm25_A < 0), NA) ) %>%
dplyr::mutate(pm25_A = replace(.data$pm25_A, which(.data$pm25_A > 2000), NA) ) %>%
dplyr::mutate(pm25_B = replace(.data$pm25_B, which(.data$pm25_B < 0), NA) ) %>%
dplyr::mutate(pm25_B = replace(.data$pm25_B, which(.data$pm25_B > 2000), NA) )
pat$data <- data
}
# ----- Invalidate hi-humidity pm25 values -----------------------------------
if ( !is.null(max_humidity) ) {
data <- pat$data
data <-
data %>%
# pm25 is invalid at high humidities
dplyr::mutate(pm25_A = replace(.data$pm25_A, which(.data$humidity > max_humidity), NA) ) %>%
dplyr::mutate(pm25_B = replace(.data$pm25_B, which(.data$humidity > max_humidity), NA) )
pat$data <- data
}
# ----- Return ---------------------------------------------------------------
# Remove any duplicate data records
pat <- pat_distinct(pat)
return(pat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.