R/FemFit_removeSpikes.r

#' Remove Spikes from an FemFit dataset
#'
#' @description
#' Processes an FemFit dataset to remove observations associated with temperature spikes.
#'
#' @param x An "FemFit" object.
#' @param whichSession A character vector of session identifiers. Defaults to processing all sessions.
#' @param spikes.threshold The numeric threshold to remove pressure fluctuations associated with the change in temperature.
#'
#' @details
#' Removes observations where the sum of the lagged temperature differences across the eight sensors is greater than or equal to \code{spikes.threshold}.
#'
#' @return
#' Returns the processed "FemFit" object.
#'
#' @seealso
#' \code{\link{read_FemFit}}
#'
#' @examples
#' session643 = read_FemFit("Datasets_AukRepeat/dee8fc3fdcfccb27_643_csv.zip") %>%
#'     FemFit_removeSpikes()
#'
#' # Alternatively without the piping operator:
#' session643 = FemFit_removeSpikes(read_FemFit("Datasets_AukRepeat/dee8fc3fdcfccb27_643_csv.zip"))
#'
#' @export
FemFit_removeSpikes = function (x, whichSession = "", spikes.threshold = 2) {
  # Throw an error if the x argument is not an FemFit object or missing
  if (!inherits(x, "FemFit") || is.na(x)) {
    stop("The x argument is not an FemFit object.", call. = FALSE)
  }

  # Throw an error if the whichSession argument is not a character or missing
  if (any(!is.character(whichSession) || is.na(whichSession))) {
    stop("The provided whichSession argument is not a character.", call. = FALSE)
  }

  sessionIDs = x$df$sessionID %>% unique

  # Throw an error if the whichSession argument does not map to the sessionIDs
  if (!all(whichSession %in% sessionIDs) && whichSession != "") {
    stop("The provided whichSession argument specifies sessionIDs which do not exist in x$df.", call. = FALSE)
  }

  # Which sessions to remove observations where the sum of the temperature differences is greater than or equal to spikes.threshold
  indices = match(whichSession, sessionIDs) %>% .[!is.na(.)]

  if (length(indices) == 0) {
    indices = 1:length(sessionIDs)
  }

  toRemove = setNames(rep(FALSE, length.out = length(sessionIDs)), sessionIDs)
  toRemove[indices] = TRUE

  spikes.threshold = rep(spikes.threshold, length.out = length(sessionIDs))
  names(spikes.threshold) = sessionIDs

  # Remove observations where the sum of the temperature differences is greater than or equal to spikes.threshold
  x_Work = by(x$df, x$df$sessionID, function (x_Child) {
    if (toRemove[x_Child$sessionID[1]]) {
      x_Child = x_Child %>%
        # Calculate the absolute difference within a sensor's temperature measurements
        dplyr::mutate_at(dplyr::vars(dplyr::starts_with("tmprtr_sensor")), dplyr::funs(
          tmpDiff = dplyr::if_else(is.na(abs(. - lag(.))), 0, abs(. - lag(.)))
        )) %>%
        # Calculate the row sum of the absolute temperature differences
        dplyr::mutate(tmpDiffSum = rowSums(
          dplyr::select(., dplyr::ends_with("tmpDiff"))
        )) %>%
        # Exclude any observations where the row sum of the absolute temperature differences is greater than spikes.threshold
        dplyr::filter(!tmpDiffSum >= spikes.threshold[x_Child$sessionID[1]]) %>%
        # Remove the derived variables from the data.frame object
        dplyr::select(-dplyr::contains("tmpDiff")) %>%
        dplyr::mutate(time = trunc(row_number()*10 - 10))
    }

    return (x_Child)
  })

  # Setup the object to return to the end-user
  if (length(x_Work) == 1) {
    x$df = x_Work[[1]]
  } else {
    x$df = x_Work %>% Reduce(function(df1, df2) dplyr::bind_rows(df1, df2), .)
  }

  return (x)
}
TheGreatGospel/IVPSA documentation built on May 19, 2019, 1:47 a.m.