R/pat_qc.R

Defines functions pat_qc

Documented in pat_qc

#' @export
#' @importFrom rlang .data
#' 
#' @title Apply quality control on PurpleAir Timeseries object
#' 
#' @param pat PurpleAir Timeseries \emph{pat} object
#' @param removeOutOfSpec Logical determining whether measurements that are
#' out of instrument specs should be invalidated.
#' @param max_humidity Maximum humidity threshold above which pm25 measurements
#' are invalidated. Disabled unless explicitly set.
#' 
#' @description Optionally applies QC thresholds to a \emph{pat} object
#' based on the documented specs of the PurpleAir sensor.
#' 
#' The \code{pat_load()} function returns raw "engineering" data for a
#' PurpleAir Sensor. The very first level of QC that should always be applied
#' is the removal of out-of-spec values that should never be generated by the
#' sensor components. Out-of-spec values imply an electrical or software problem
#' and can never be considered valid measurements.
#' 
#' Setting a \code{max_humidity} threshold is less fundamental. There are many
#' cases where PM2.5 readings during periods of high humidity should be called
#' into question which is why this QC option is provided. However, this type of 
#' filtering is dependent upon a properly functioning humidity sensor. Humidity
#' filtering is disabled by default because it can result in the invalidation
#' of many potentially valid PM2.5 measurements.
#' 
#' @details Out of spec thresholds are set so that anything outside of
#' these the given range should represent a value that is not physically possible
#' in an ambient setting on planet Earth. 
#' 
#' \itemize{
#' \item{\code{humidity} -- [0:100]}
#' \item{\code{temperature} -- [-40:185]}
#' \item{\code{pm25} -- [0:2000]}
#' }
#' 
#' @return A cleaned up \emph{pat} object.
#' 
#' @references \href{https://www2.purpleair.com/products/purpleair-pa-ii}{PA-II specs}
#' 
#' @examples
#' \donttest{
#' library(AirSensor)
#' 
#' # Use a sensor with problems
#' pat <- example_pat
#' 
#' # Basic plot shows out-of-spec values for humidity
#' pat %>% pat_multiPlot(sampleSize = NULL)
#' 
#' # Applying QC removes these records
#' pat %>% pat_qc() %>% pat_multiPlot(sampleSize = NULL)
#' 
#' # We can also remove PM2.5 data at high humidities
#' pat %>% pat_qc(max_humidity = 80) %>% pat_multiPlot(sampleSize = NULL)
#' }

pat_qc <- function(
  pat = NULL, 
  removeOutOfSpec = TRUE,
  max_humidity = NULL
) {
  
  # ----- Validate parameters --------------------------------------------------
  
  MazamaCoreUtils::stopIfNull(pat)
  
  if ( !pat_isPat(pat) )
    stop("Parameter 'pat' is not a valid 'pa_timeseries' object.")
  
  if ( !is.logical(removeOutOfSpec) )
    stop("Parameter 'removeOutOfSpec' must be logical.")
  
  # Remove any duplicate data records
  pat <- pat_distinct(pat)
  
  if ( !is.null(max_humidity) ) {
    if ( !is.numeric(max_humidity) ) {
      stop("Parameter 'max_humidity' must be numeric")
    }
  }
  
  # ----- Invalidate out-of-spec values ----------------------------------------
  
  if ( removeOutOfSpec ) {

    # PA sensor specs: https://www2.purpleair.com/products/purpleair-pa-ii
    
    # https://stackoverflow.com/questions/27909000/set-certain-values-to-na-with-dplyr

    data <- pat$data
    
    data <- 
      data %>%
      # 0 <= humidity <= 100
      dplyr::mutate(humidity = replace(.data$humidity, which(.data$humidity < 0), NA) ) %>%
      dplyr::mutate(humidity = replace(.data$humidity, which(.data$humidity > 100), NA) ) %>%
      # -40 <= temperature <= 185
      dplyr::mutate(temperature = replace(.data$temperature, which(.data$temperature < -40), NA) ) %>%
      dplyr::mutate(temperature = replace(.data$temperature, which(.data$temperature > 185), NA) ) %>%
      # 0 <= pm25 <= 2000
      # NOTE:  The 2000 number is based on Jon's 2020-05-14 conversation with 
      # NOTE:  Adrian Dybwayd, founder of PurpleAir.
      dplyr::mutate(pm25_A = replace(.data$pm25_A, which(.data$pm25_A < 0), NA) ) %>%
      dplyr::mutate(pm25_A = replace(.data$pm25_A, which(.data$pm25_A > 2000), NA) ) %>%
      dplyr::mutate(pm25_B = replace(.data$pm25_B, which(.data$pm25_B < 0), NA) ) %>%
      dplyr::mutate(pm25_B = replace(.data$pm25_B, which(.data$pm25_B > 2000), NA) )
      
    pat$data <- data
    
  }
  
  # ----- Invalidate hi-humidity pm25 values -----------------------------------
  
  if ( !is.null(max_humidity) ) {
    
    data <- pat$data

    data <- 
      data %>%
      # pm25 is invalid at high humidities
      dplyr::mutate(pm25_A = replace(.data$pm25_A, which(.data$humidity > max_humidity), NA) ) %>%
      dplyr::mutate(pm25_B = replace(.data$pm25_B, which(.data$humidity > max_humidity), NA) )
      
    pat$data <- data
    
  }
  
  # ----- Return ---------------------------------------------------------------
  
  # Remove any duplicate data records
  pat <- pat_distinct(pat)
  
  return(pat)
  
}
MazamaScience/AirSensor documentation built on April 28, 2023, 11:16 a.m.