R/qaqc_stic_data.R

Defines functions qaqc_stic_data

Documented in qaqc_stic_data

#' qaqc_stic_data
#'
#' @description This function provides multiple options for QAQC flagging of processed and classified STIC data frames, such as those generated by the \link{classify_wetdry} function.
#' Users can select which operations are to be performed, and a single new QAQC column is created with all flags concatenated. QAQC options currently include: (1) correction and flagging of negative SPC values resulting from the calibration process, i.e., changing the negative values to 0 and flagging this
#' (2) inspecting the wetdry classification time series for potential deviation anomalies based on user-defined windows
#'
#' @param stic_data A data frame with classified STIC data, such as that produced by \code{classify_wetdry}.
#' @param spc_neg_correction a logical argument indicating whether the user would like to correct negative SPC values resulting from the calibration process to 0.
#' The character code associated with this correction is \code{"C"}.
#' @param inspect_deviation a logical argument indicating whether the user would like to identify deviation anomalies, in which a series of wet or dry readings less than or equal to `deviation_size` in length is surrounded on both sides by `window_size` or more observations of its opposite.
#' This operation is meant to identify potentially suspect binary wet/dry data points for further examination.
#' The character code associated with this operation is \code{"D"}.
#' @param deviation_size a numeric argument specifying the maximum size (i.e., number of observations) of a clustered group of points that can be flagged as an deviation
#' @param window_size a numeric argument specifying the minimum size (i.e., number of observations) that the deviation must be surrounded by in order to be flagged
#' @import dplyr
#'
#' @return The same data frame as input, but with new QAQC columns or a single, concatenated QAQC column. The QAQC output
#' Can include: \code{"C"}, meaning the calibrated SpC value was negative from `spc_neg_correction`; \code{"D"}, meaning the point was identified as
#' a deviation or deviation based on a moving window from `inspect_deviation`; or \code{"O"}, meaning the calibrated SpC was
#' outside the standard range based on the function \code{apply_calibration}.
#' @export
#'
#' @examples qaqc_df <-
#'   qaqc_stic_data(classified_df,
#'     spc_neg_correction = TRUE,
#'     inspect_deviation = TRUE,
#'     deviation_size = 4, window_size = 96
#'   )
#' head(qaqc_df)
qaqc_stic_data <- function(stic_data, spc_neg_correction = TRUE, inspect_deviation = TRUE,
                           deviation_size = NULL, window_size = NULL) {
  # bind variables
  SpC <- NULL

  # check if neg correction is possible
  if (spc_neg_correction & !("SpC" %in% names(stic_data))) stop("Cannot do spc_neg_correction - no SpC column. Change spc_neg_correction to FALSE or provide stic_data with SpC column.")

  if (spc_neg_correction == TRUE) {
    # Deal with negative spc values
    stic_data <-
      stic_data |>
      dplyr::mutate(negative_SpC = dplyr::if_else(
        condition = SpC < 0,
        true = "C",
        false = ""
      )) |>
      dplyr::mutate(SpC = dplyr::if_else(
        condition = SpC <= 0,
        true = 0,
        false = SpC
      ))
  }

  if (inspect_deviation == TRUE) {
    if (is.null(deviation_size)) stop("Need to provide deviation_size and window_size if inspect_deviation = TRUE")
    if (is.null(window_size)) stop("Need to provide deviation_size and window_size if inspect_deviation = TRUE")

    # Get run lengths from rle object
    rle_object <- rle(stic_data$wetdry)
    run_lengths <- rle_object$lengths

    # find any run_lengths smaller than deviation size
    i_small <- which(run_lengths <= deviation_size)

    # can't have it be the first or last run - need to get rid of
    i_small <- i_small[!(i_small %in% c(1, length(run_lengths)))]

    stic_data$deviation <- rep("", nrow(stic_data))

    for (i in i_small) {
      i_window <- run_lengths[i - 1] + run_lengths[i + 1]

      if (i_window > window_size) {
        deviation_start <- sum(run_lengths[1:(i - 1)]) + 1
        deviation_end <- deviation_start + run_lengths[i] - 1

        stic_data[deviation_start:deviation_end, "deviation"] <- "D"
      }
    }
  }

  # concatenate the QAQC columns with col codes: "C" for negative SpC;
  # "D" for anomalous classification; "O" for outside standard range
  #
  # some columns will only exist for calibrated data, so only use columns that exist
  stic_data_qacols <-
    stic_data |>
    dplyr::select(any_of(c("negative_SpC", "deviation", "outside_std_range"))) |>
    tidyr::unite("QAQC", sep = "", na.rm = T)

  stic_data$QAQC <- stic_data_qacols$QAQC

  stic_data <-
    stic_data |>
    dplyr::select(-any_of(c("negative_SpC", "deviation", "outside_std_range")))

  return(stic_data)
}

Try the STICr package in your browser

Any scripts or data that you put into this service are public.

STICr documentation built on June 8, 2025, 1:52 p.m.