IVPSA: Functions to Analyse FemFit Datasets

Documented in summary.FemFit

#' Summarise the events of a FemFit Object
#'
#' @description
#' A summary method for segmented "FemFit" objects.
#'
#' @param x An "FemFit" object.
#' @param response Specifies which response to work with when calculating the descriptives. Must be set to "zeroPrssr" or "prssr_sensor".
#' @param descriptives A list object containing the descriptives to calculate for each event within a JSONLabel. See Details.
#' @param toPrint A logical constant which controls whether to print the summary output.
#' @param sprintf_Format A character constant which formats the numeric output the printed summary output.
#' @param ... Other arguments not used by this method.
#'
#' @details
#' The "FemFit" object must be segmented to have events within the protocol information. That is, the \code{trLabel} column has \code{"event"} entries within the \code{JSONLabel}s of interest.
#'
#' The list object for \code{descriptives} is formatted such that the name of the \code{JSONLabel} is associated with the list of descriptives to generate for the events within the \code{JSONLabel}.
#' Options that are provided by \code{summary} are:
#' \itemize{
#'  \item \code{"Lin.Trend"} to estimate the linear trend (and intercept) of an event. Prints the linear trend in terms of a second change, rather than in terms of a millisecond change.
#'  \item \code{"Count+Peak"} to estimate the number of peaks in principal component space and the maximal pressure of each peak by sensor. Prints the number of peaks and the mean maximal pressure by sensor.
#'  \item \code{"Count+PeakBySensor"} to estimate the number of peaks by sensor and the maximal pressure of each peak by sensor. Prints the number of peaks and the mean maximal pressure by sensor.
#'  \item \code{"Max"} to estimate the maximal pressure by sensor.
#'  \item If not specificed, it defaults to estimating the mean pressure by sensor.
#' }
#'
#' \code{toPrint} and \code{sprintf_Format} only uses the first element if there is a vector input.
#'
#' @seealso
#' \code{\link{sprintf}} for possible formatting options to apply to numeric output.
#'
#' @return
#' Silently returns a tibble object.
#'
#' @examples
#' AS005 = read_FemFit(c(
#'         "Datasets_AukRepeat/61aa0782289af385_283_csv.zip",
#'         "Datasets_AukRepeat/61aa0782289af385_284_csv.zip"
#'     ),
#'     remove.NAs = TRUE
#'   ) %>%
#'   # Segment the FemFit data
#'   segment(
#'     cp. = 0.001,
#'     numOfNodesToLabel = list(c(3, 1, 3, 4), c(4, 1, 5, 3))
#'   ) %>%
#'   # Calculate the zeroed out pressures
#'   deriveZero(method = "lmm")
#'
#' # Extract the descriptives with the default arguments
#' summary(AS005)
#'
#' # An example of specifying descriptives
#' summary(AS005, descriptives = list(
#'   "rapid_pfmc15s" = list("Lin.Trend", "Count+Peak", "Count+PeakBySensor"))
#' )
#'
#' # Define a function to extract the median pressure of an event trace
#' medianWrap = function (x) {
#'   # The `toPrint` element tells `summary.FemFit` what to print
#'   # Whereas the `toReturn` element which is a `list` object, tells `summary.FemFit` what to silently return.
#'   dplyr::tibble(toPrint = median(x), toReturn = list(median(x)))
#' }
#'
#' # Define a function to extract the robust regression linear trend
#' library(MASS)
#' Robust.Lin.Trend = function (x) {
#'   # Append the time component in ms to the vector
#'   x.df = data.frame(x = x, time = 0:(length(x) - 1) * 10)
#'
#'   # Fit the linear model with robust regression
#'   x.fit = MASS::rlm(x ~ time, data = x.df)
#'
#'   # Print the coefficient in terms of a second change rather than a ms change
#'   # Return the intercept and linear trend coefficients with their std. errors
#'   dplyr::tibble(toPrint = 1000*coef(x.fit)[2], toReturn = list(summary(x.fit)$coefficients[, 1:2]))
#' }
#'
#' # An example of specifying a user-defined function to generate a descriptive
#' # Instead of supplying a character input, a list input is used
#' # The list has three elements
#'   # The first is the function used to generate the descriptive
#'   # The second tells `summary.FemFit` whether to apply by sensor all to all sensors
#'   # The third tells `summary.FemFit` what to name the descriptive
#' summary(AS005, descriptives = list(
#'   "pfmc3x5s_rest30s" = list(
#'       "Lin.Trend",
#'       list(medianWrap, bySensor = TRUE, name = "Median"),
#'       list(Robust.Lin.Trend, bySensor = TRUE, name = "Robust Lin.Trend")
#'   )
#' )
#'
#' @export summary.FemFit

summary.FemFit = function(x, response = c("zeroPrssr", "prssr_sensor"), descriptives = list("pfmc3x5s_rest30s" = list("Lin.Trend"), "rapid_pfmc15s" = list("Count+Peak"), "two_and_triple_cough_rest10s" = list("Max"), "valsalva3x6s_rest30s" = list("Lin.Trend")), toPrint = TRUE, sprintf_Format = "%4.2f", ...) {
  # Throw an error if the x argument is not an FemFit object or missing
  if (!inherits(x, "FemFit") || is.na(x)) {
    stop("The x argument is not an FemFit object.", call. = FALSE)
  }

  # Throw an error if trLabel does not exists in x$df
  if (x$df %>% colnames %>% {any(grepl("^trLabel$", .))} %>% !.) {
    stop("trLabel does not exist in the FemFit object.", call. = FALSE)
  }

  # Throw an error if the method argument is not one of the available options
  response = match.arg(response)

  # Throw a warning if zeroPrssr does not exists in x$df
  if (response == "zeroPrssr" && x$df %>% colnames %>% {any(grepl("^zeroPrssr[0-9]$", .))} %>% !.) {
    warning("zeroPrssr does not exist in the FemFit object. Descriptive statistics will be in terms of the raw pressure measurements.", call. = FALSE)
    response = "prssr_sensor"
  }

  # Throw an error if toPrint isn't a logical
  if (!is.logical(toPrint[1])) {
    stop("The toPrint argument is not a logical.", call. = FALSE)
  }
  toPrint = toPrint[1]

  # Throw an error if sprintf_Format isn't a character
  if (!is.character(sprintf_Format[1])) {
    stop("The sprintf_Format argument is not a character.", call. = FALSE)
  }
  sprintf_Format = sprintf_Format[1]

  # Throw an error if not all of the JSONLabels exist in the FemFit object
  JSONLabels = names(descriptives)
  if (!all(JSONLabels %in% unique(x$df$JSONLabel))) {
    stop("Some of the specified JSONLabels do not exist in the FemFit object.", call. = FALSE)
  }

  # Extract the `df` element of the FemFit object
  x_Work = x$df

  # Create sequential event region labels if they do not exist
  if (x_Work %>% colnames %>% {any(grepl("^trLabelSeq$", .))} %>% !.) {
    x_Work = FemFit:::deriveZero_seqLabels(x_Work)
  }

  # Convert the descriptives list to the actual number of descriptive statistics to calculate
  descriptives = unlist(descriptives, recursive = FALSE)

  # Throw an error if there are no elements in the descriptivse
  if (length(descriptives) == 0) {
    stop("No descriptive statistics were requested.", call. = FALSE)
  }

  # Extract only the event pressure measurements
  toReturn = x_Work %>%
    dplyr::filter(trLabel == "event") %>%
    dplyr::select(dplyr::starts_with(response), JSONLabel, trLabelSeq, sessionID) %>%
    split(.$sessionID) %>%
    lapply(function (x_Partition) {
      # Print the session identifier
      if (toPrint) cat("\n", x_Partition$sessionID[1], "\n", rep("=", length.out = nchar(x_Partition$sessionID[1])), "\n", sep = "")

      lapply(names(descriptives), function (nameIndex) {
        # Select the pressure measurements associated with the JSONLabel of interest
        currentJSONLabel = JSONLabels[as.logical(pmatch(JSONLabels, nameIndex, nomatch = 0))]
        x_Child = x_Partition %>%
          dplyr::filter(JSONLabel == currentJSONLabel)

        # Then, update the sequential event regions with `pretty` labels
        x_Child = x_Child %>%
          dplyr::select(trLabelSeq) %>%
          dplyr::distinct() %>%
          dplyr::mutate(newLabel = paste0("Event ", 1:length(unique(x_Child$trLabelSeq)))) %>%
          dplyr::right_join(x_Child, by = c("trLabelSeq" = "trLabelSeq")) %>%
          dplyr::select(dplyr::starts_with(response), newLabel, -trLabelSeq)

        # Choose the descriptive function based requested, also setup whether the function is applied by sensor and extract the name of the descriptive statistic
        if (is.character(descriptives[[nameIndex]][[1]])) {
          # Define which descriptive function to call
          funToApply = switch(descriptives[[nameIndex]][[1]],
                              `Lin.Trend` = function (x) {
                                # Append the time component in ms to the vector
                                x.df = data.frame(x = x, time = 0:(length(x) - 1) * 10)

                                # Fit the linear model
                                x.fit = lm(x ~ time, data = x.df)

                                # Print the coefficient in terms of a second change rather than a ms change
                                # Return the intercept and linear trend coefficients with their std. errors
                                dplyr::tibble(toPrint = 1000*coef(x.fit)[2], toReturn = list(summary(x.fit)$coefficients[, 1:2]))
                              },
                              `Count+Peak` = function (x) {
                                # Append the time component in ms to the matrix
                                x.df = cbind(x, 0:(nrow(x) - 1) * 10)
                                colnames(x.df)[9] = "time"

                                # Extract the first principal component
                                x.df$pc.1 = x.df[, 1:8] %>%
                                  dplyr::select_if(function(x) (var(x) != 0)) %>%
                                  prcomp(., center = TRUE, scale. = TRUE) %>%
                                  .$x %>%
                                  .[, 1]

                                # Fit the principal curve for the first principal component over time
                                x.df$pc.c = princurve::principal.curve(x = x.df[c("pc.1", "time")] %>% as.matrix()) %>%
                                  {.$s[, 1]}

                                # Identify the partitions which sit above the fitted principal curve
                                x.df = x.df %>%
                                  dplyr::mutate(flag = dplyr::if_else(pc.1 > pc.c, 1, 0),
                                                newRegion = dplyr::if_else(flag == dplyr::lag(flag, 1, default = 1), FALSE, TRUE),
                                                regionID = NA_real_)

                                cur.regionID = 1
                                for (i in 1:nrow(x.df)) {
                                  if (x.df$newRegion[i]) {cur.regionID = cur.regionID + 1}
                                  x.df$regionID[i] = cur.regionID
                                }

                                # Calculate maximal pressure for each partition by sensor
                                x_Isolate = x.df %>%
                                  dplyr::filter(flag == 1) %>%
                                  dplyr::group_by(regionID) %>%
                                  dplyr::summarise_at(1:8, funs(max)) %>%
                                  dplyr::mutate(Peak_ID = 1:length(regionID)) %>%
                                  dplyr::ungroup() %>%
                                  dplyr::select(Peak_ID, dplyr::everything(), -regionID)

                                # Format the output for the mean maximal pressure of the partitions by sensor
                                x_toPrint = x_Isolate %>%
                                  dplyr::ungroup() %>%
                                  dplyr::select(-Peak_ID) %>%
                                  dplyr::summarise_at(1:8, mean) %>%
                                  {sprintf(sprintf_Format, .[1, ])} %>%
                                  paste(collapse = " ")

                                # Print the number of partitions which sit above the principal curve, along with the mean maximal pressure of the partitions by sensor
                                # Return the `tibble` which contains the maximal pressure of the partitions by sensor
                                dplyr::tibble(toPrint = paste0(round(length(dplyr::pull(x_Isolate, Peak_ID))), " (", x_toPrint, ")"), toReturn = list(x_Isolate))
                              },
                              `Max` = function (x) {
                                # Under the assumption that the event trace has been partitioned correctly, the maximum of the selected response is sufficient
                                dplyr::tibble(toPrint = max(x), toReturn = list(max(x)))
                              },
                              `Count+PeakBySensor` = function (x) {
                                # Append the time component in ms to the vector
                                x.df = data.frame(x = x, time = 0:(length(x) - 1) * 10)

                                # Fit the principal curve for the vector over time
                                x.df$pc.c = princurve::principal.curve(x = x.df[, 1:2] %>% as.matrix()) %>%
                                  {.$s[, 1]}

                                # Identify the partitions which sit above the fitted principal curve
                                x.df = x.df %>%
                                  dplyr::mutate(flag = dplyr::if_else(x > pc.c, 1, 0),
                                                newRegion = dplyr::if_else(flag == dplyr::lag(flag, 1, default = 1), FALSE, TRUE),
                                                regionID = NA_real_)

                                cur.regionID = 1
                                for (i in 1:nrow(x.df)) {
                                  if (x.df$newRegion[i]) {cur.regionID = cur.regionID + 1}
                                  x.df$regionID[i] = cur.regionID
                                }

                                # Calculate maximal pressure for each partition
                                x_Isolate = x.df %>%
                                  dplyr::filter(flag == 1) %>%
                                  dplyr::group_by(regionID) %>%
                                  dplyr::summarise(peak = max(x)) %>%
                                  dplyr::mutate(Peak_ID = 1:length(regionID)) %>%
                                  dplyr::ungroup() %>%
                                  dplyr::select(Peak_ID, dplyr::everything(), -regionID)

                                # Print the number of partitions which sit above the principal curve, along with the mean maximal pressure of the partitions
                                # Return the `tibble` which contains the maximal pressure of the partitions
                                dplyr::tibble(toPrint = paste0(round(length(dplyr::pull(x_Isolate, Peak_ID))), " (", sprintf(sprintf_Format, mean(x_Isolate$peak)), ")"), toReturn = list(x_Isolate))
                              },
                              function (x) {
                                # Under the assumption that the event trace has been partitioned correctly, the mean of the selected response is sufficient
                                dplyr::tibble(toPrint = mean(x), toReturn = list(mean(x)))
                              })

          # Define whether the function is to be applied by sensor
          bySensor = switch(descriptives[[nameIndex]][[1]],
                            `Lin.Trend` = TRUE,
                            `Count+Peak` = FALSE,
                            `Max` = TRUE,
                            `Count+PeakBySensor` = TRUE,
                            TRUE)

          # Define the name of the descriptive function
          name = descriptives[[nameIndex]][[1]]
        } else {
          funToApply = descriptives[[nameIndex]][[1]]
          bySensor = if(!is.null(descriptives[[nameIndex]][["bySensor"]])) {descriptives[[nameIndex]][["bySensor"]]} else {TRUE}
          name = if(!is.null(descriptives[[nameIndex]][["name"]])) {descriptives[[nameIndex]][["name"]]} else {"User Specified"}
        }

        # Print the JSONLabel with the name of the descriptive statistic
        if (toPrint) cat(currentJSONLabel, " -- ", name, ": \n", sep = "")
        toReturn_Child = by(x_Child, x_Child$newLabel, simplify = FALSE, function (x_GrandChild) {
          # Print the `pretty` event label
          if (toPrint) cat("  ", x_GrandChild$newLabel[1], ": ", sep = "")

          # Run funToApply either by sensor or on all sensors
          if (bySensor) {
            detailObj = lapply(1:8, function (colIndex) {
              funToApply(dplyr::pull(x_GrandChild, colIndex))
            }) %>% dplyr::bind_rows()
          } else {
            detailObj = funToApply(x_GrandChild[, 1:8])
          }

          # Format the output if it is numeric
          if (all(is.numeric(detailObj$toPrint))) {
            stringPrep = sprintf(sprintf_Format, detailObj$toPrint)
          } else {
            stringPrep = paste(detailObj$toPrint)
          }

          # Then, print the output
          if (toPrint) cat(stringPrep, "\n", sep = " ")

          return (detailObj$toReturn)
        })

        # Coerce the `toReturn` output into a `tibble` object
        return (
          lapply(toReturn_Child, function (toReturn_GrandChild) {
            toReturn_Edit = dplyr::tibble(JSONLabel = currentJSONLabel, descriptive = name, descriptiveOutput = list(toReturn_GrandChild))
          }) %>%
          dplyr::bind_rows() %>%
          dplyr::mutate(eventLabel = unique(x_Child$newLabel))
        )
      }) %>%
      dplyr::bind_rows() %>%
      dplyr::mutate(sessionID = x_Partition$sessionID[1])
    }) %>%
    # Sequential binding to consolidate the final to return object
    dplyr::bind_rows() %>%
    # Reoragnise the column order to be more sensible
    dplyr::select(sessionID, JSONLabel, eventLabel, dplyr::everything())

  # Return the `tibble` containing the `toReturn` elements silently
  invisible(toReturn)
}