R/segmentRefine_pcurve.r

#' Segment within a JSONLabel with a principal curve
#'
#' @description
#' Takes a segmented "FemFit" object and constructs new baseline and event labels within the specified JSONLabel.
#'
#' @param x An "FemFit" object.
#' @param whichSession A character string which identifies which session to edit.
#' @param whichJSONLabel A character string which identifies which \code{JSONLabel} to edit.
#' @param windowSize Define event duration. Defaults to 5 seconds with \code{timeScale = 1000}.
#' @param windowTolerance Define the tolerance for defining an event. A numeric vector of length two can be provided. Defaults to +/- 1 second  with \code{timeScale = 1000}.
#' @param timeScale Time scale to work on. Defaults to working in seconds.
#' @param ... Arguments passed to \code{\link{principal.curve}}.
#'
#' @details
#' Conducts principal component analysis then fits a principal curve to the first principal component and time. Regions are identified temporally with the principal curve. Regions are classified as events if the region sits above the principal curve and meets the \code{windowSize} specifications.
#'
#' If a vector is inputted for either \code{whichSession} or \code{whichJSONLabel}, only the first element is used.
#'
#' @seealso
#' \code{\link{prcomp}} and \code{\link{principal.curve}} for how the main statistical learning components work.
#'
#' @examples
#' # Read in the FemFit data
#' AS005 = read_FemFit(c(
#'         "Datasets_AukRepeat/61aa0782289af385_283_csv.zip",
#'         "Datasets_AukRepeat/61aa0782289af385_284_csv.zip"
#'     ),
#'     remove.NAs = TRUE
#'   ) %>%
#'   # Segment the FemFit data
#'   segment(
#'     cp. = 0.001,
#'     numOfNodesToLabel = list(c(3, 1, 3, 4), c(4, 1, 5, 3))
#'   )
#'
#' # Within a session refine the segmentation with principal curves
#' AS005r = segmentRefine_pcurve(
#'     x = AS005,
#'     whichSession = "283 09:28",
#'     whichJSONLabel = "pfmc3x5s_rest30s",
#'     windowTolerance = c(1, 1)
#'   )
#'
#' @export
segmentRefine_pcurve = function (x, whichSession, whichJSONLabel, windowSize = 5, windowTolerance = 1, timeScale = 1000, ...) {
  # Throw an error if the x argument is not an FemFit object or missing
  if (!inherits(x, "FemFit") || is.na(x)) {
    stop("The x argument is not an FemFit object.", call. = FALSE)
  }

  # Throw an error if trLabel does not exists in x$df
  if (x$df %>% colnames %>% {any(grepl("^trLabel$", .))} %>% !.) {
    stop("trLabel does not exist in the FemFit object.", call. = FALSE)
  }

  # Throw an error if the whichSession argument is not a character or missing
  if (!is.character(whichSession[1]) || is.na(whichSession[1])) {
    stop("The provided whichSession argument is not a character.", call. = FALSE)
  }

  # Throw an error if the whichJSONLabel argument is not a character or missing
  if (!is.character(whichJSONLabel[1]) || is.na(whichJSONLabel[1])) {
    stop("The provided whichJSONLabel argument is not a character.", call. = FALSE)
  }

  # Throw an error if the windowSize argument is not a numeric or missing
  if (!is.numeric(windowSize[1]) || is.na(windowSize[1])) {
    stop("The provided windowSize argument is not a numeric.", call. = FALSE)
  }

  # Throw an error if the windowTolerance is not a numeric or missing
  if (!is.numeric(windowTolerance) || anyNA(windowTolerance)) {
    stop("The provided windowTolerance argument(s) is not a numeric.", call. = FALSE)
  }

  # Throw an error if the timeScale argument is not a numeric or missing
  if (!is.numeric(timeScale[1]) || is.na(timeScale[1])) {
    stop("The provided timeScale argument is not a numeric.", call. = FALSE)
  }

  # Recycle elements of windowTolerance if necessary
  windowTolerance = rep(windowTolerance, length.out = 2)

  # Extract out the region to apply the principal curve event identification
  x_Child = x$df %>%
    dplyr::filter(sessionID == whichSession[1], JSONLabel == whichJSONLabel[1])

  if (nrow(x_Child) < 1) {
    stop("The provided whichSession and whichJSONLabel arguments selected no observations.", call. = FALSE)
  }

  # Generate the first principal component of the eight pressure traces
  x_Child$pc.1 =  x_Child %>%
    dplyr::select(starts_with("prssr_sensor")) %>%
    dplyr::select_if(function(x) (var(x) != 0)) %>%
    prcomp(., center = TRUE, scale. = TRUE) %>%
    .$x %>%
    .[, 1]

  # Generate the principal component curve of pc.1 and time
  x_Child$pc.c = princurve::principal.curve(x = x_Child[c("pc.1", "time")] %>% as.matrix(), ...) %>%
    {.$s[, 1]}

  # Flag where pc.1 is greater than pc.c as a binary variable, then
  x_Child = x_Child %>%
    dplyr::mutate(flag = dplyr::if_else(pc.1 > pc.c, 1, 0),
                  newRegion = dplyr::if_else(flag == dplyr::lag(flag, 1, default = 1), FALSE, TRUE),
                  regionID = NA_real_)

  # Identify temporally unique regions of flag
  cur.regionID = 1
  for (i in 1:nrow(x_Child)) {
    if (x_Child$newRegion[i]) {
      cur.regionID = cur.regionID + 1
    }
    x_Child$regionID[i] = cur.regionID
  }

  # Identify regionIDs where it meets the prerequisites of the windowSize +/- windowTolerance
  x_Isolate = x_Child %>%
    dplyr::filter(flag == 1) %>%
    dplyr::group_by(regionID) %>%
    dplyr::summarise(window = max(time) - min(time)) %>%
    dplyr::filter(timeScale[1]*(windowSize - windowTolerance[1]) <= window, window <= timeScale[1]*(windowSize + windowTolerance[2])) %>%
    dplyr::pull(regionID)

  # Load in the regionIDs
  x_Child$trLabel = "baseline"
  x_Child$trLabel[which(x_Child$regionID %in% x_Isolate)] = "event"

  # Send back the results to the inputted object
  x$df$trLabel[which(x$df$sessionID == whichSession[1] & x$df$JSONLabel == whichJSONLabel[1])] = x_Child$trLabel

  return (x)
}
TheGreatGospel/IVPSA documentation built on May 19, 2019, 1:47 a.m.