R/deriveZero.r

Defines functions deriveZero

Documented in deriveZero

#' Derive the zeroed pressures of a FemFit dataset
#'
#' @description
#' Derives the zeroed pressures of a segmented FemFit dataset.
#'
#' @param x An "FemFit" object.
#' @param method Specifies the statistical procedure to generate the zeroed pressures. See Details.
#' @param ... Arguments passed to \code{\link{lm}} or \code{\link{lmer}}.
#'
#' @details
#' \code{"lmm"} fits a linear mixed model to calculate the means of the baseline regions.
#'
#' \code{"manova"} fits a multivariate analysis of variance to calculate the means of the baseline regions.
#'
#' @return
#' Returns a FemFit object with an updated \code{df} element containing the zeroed out pressures for each sensor as \code{zeroPrssrX}, where \code{X} is the sensor numbers 1 to 8.
#'
#' @examples
#' AS005 = read_FemFit(c(
#'         "Datasets_AukRepeat/61aa0782289af385_283_csv.zip",
#'         "Datasets_AukRepeat/61aa0782289af385_284_csv.zip"
#'     ),
#'     remove.NAs = TRUE
#'   ) %>%
#'   # Segment the FemFit data
#'   segment(
#'     cp. = 0.001,
#'     numOfNodesToLabel = list(c(3, 1, 3, 4), c(4, 1, 5, 3))
#'   )
#'
#' AS005_lmm = deriveZero(AS005, "lmm")
#' # The "lmm" method is recommended, but it can be shown that "manova" does return similar results...
#' AS005_manova = deriveZero(AS005, "manova")
#'
#' @export
deriveZero = function(x, method = c("lmm", "manova"), ...) {
  # Throw an error if the x argument is not an FemFit object or missing
  if (!inherits(x, "FemFit") || is.na(x)) {
    stop("The x argument is not an FemFit object.", call. = FALSE)
  }

  # Throw an error if trLabel does not exists in x$df
  if (x$df %>% colnames %>% {any(grepl("^trLabel$", .))} %>% !.) {
    stop("trLabel does not exist in the FemFit object.", call. = FALSE)
  }

  # Throw an error if the method argument is not one of the available options
  method = match.arg(method)

  # Throw a warning if trLabelSeq exists in x$df and remove it from x$df
  if (x$df %>% colnames %>% {any(grepl("^trLabelSeq$", .))}) {
    warning("trLabelSeq already exists in the FemFit object. deriveZero() will overwrite the previous trLabelSeq.", call. = FALSE)
    x$df = x$df %>%
      dplyr::select(-trLabelSeq)
  }

  # Throw a warning if zeroPrssrXs exists in x$df and remove it from x$df
  if (x$df %>% colnames %>% {any(grepl("^zeroPrssr[0-9]$", .))}) {
    warning("zeroPrssrXs already exists in the FemFit object. deriveZero() will overwrite the previous zeroPrssrXs.", call. = FALSE)
    x$df = x$df %>%
      dplyr::select(-dplyr::starts_with("zeroPrssr"))
  }

  x_Work = by(x$df, x$df$sessionID, function (x_Child) {
    # Create a new column with the sequential baseline//event region labels
    x_Child = FemFit:::deriveZero_seqLabels(x_Child)

    if (method == "lmm") {
      # Fit the linear mixed model to all of the observed values
      x_Child.fit = FemFit:::deriveZero_lmm(x_Child, ...)

      # Extract the fitted means for each baseline across sensors
      fit.df = expand.grid(sensor = paste0("prssr_sensor", 1:8), trLabelSeq = unique(x_Child.fit@frame$trLabelSeq), stringsAsFactors = FALSE)
      fit.df$mus = predict(x_Child.fit, newdata = fit.df)

      fit.df = fit.df %>%
        dplyr::mutate(regionID = as.numeric(gsub("(baseline\\.|event\\.)([0-9])", "\\2", trLabelSeq))) %>%
        dplyr::select(-trLabelSeq)
    } else if (method == "manova") {
      # Fit the multivariate analysis of variance to all of the observed values
      x_Child.fit = FemFit:::deriveZero_manova(x_Child, ...)

      # Derive the zeroed out pressures
      fit.df = expand.grid(trLabelSeq = unique(x_Child$trLabelSeq) %>% .[grepl("baseline", .)], stringsAsFactors = FALSE) %>%
        cbind(predict(x_Child.fit, newdata = .)) %>%
        tidyr::gather(sensor, mus, -trLabelSeq) %>%
        dplyr::mutate(regionID = as.numeric(gsub("(baseline\\.|event\\.)([0-9])", "\\2", trLabelSeq))) %>%
        dplyr::select(-trLabelSeq)
    }

    # Derive the zeroed out pressures
    zeroMatrix = vapply(paste0("zeroPrssr", 1:8), FUN.VALUE = numeric(nrow(x_Child)), function (col_name) {
      og_varName = paste0("prssr_sensor", gsub("zeroPrssr([0-9])", "\\1", col_name))

      # Do something smart to apply the baselines correctly USING only the IDs <X>.<ID>
      x_Col = x_Child[, c(og_varName, "regionID")] %>%
        dplyr::left_join(fit.df %>% dplyr::filter(sensor == og_varName), by = "regionID") %>%
        dplyr::mutate_(.dots = setNames(paste0(og_varName, "- mus"), col_name)) %>%
        dplyr::pull(col_name)
    })

    # Bind the zeroed out pressures to x_Child
    x_Child = bind_cols(x_Child, zeroMatrix %>% dplyr::as_tibble()) %>%
      dplyr::select(-regionID)

    # Return x_Child and the fitted model object (if requested)
    return (x_Child)
  })

  # Setup the object to return to the end-user
  if (length(x_Work) == 1) {
    x$df = x_Work[[1]]
  } else {
    x$df = x_Work %>% Reduce(function(df1, df2) dplyr::bind_rows(df1, df2), .)
  }

  return (x)
}

#' @title
#' Internal functions of \code{deriveZero}
#'
#' @description
#' Accessible for end-user use with \code{FemFit:::} as a prefix to each function. Note that these child functions have no in-built error detection.
#'
#' @name deriveZero_Child
NULL
#> NULL

#' @describeIn deriveZero_Child
#' Creates sequential event labels.
#'
#' @keywords internal
deriveZero_seqLabels = function(x_df) {
  # Extract the first JSONLabel and trLabel from the data.frame
  first_JSON = x_df$JSONLabel[1]
  first_trLabel = x_df$trLabel[1]

  # Create the sequential trLabels
  toReturn = x_df %>%
    dplyr::select(time, trmnlNode, trLabel, JSONLabel) %>%
    dplyr::mutate(
      flag_Label = dplyr::if_else(trLabel != dplyr::lag(trLabel, 1, default = first_trLabel), 1, 0),
      flag_Region = dplyr::if_else(JSONLabel != dplyr::lag(JSONLabel, 1, default = first_JSON), 1, 0),
      newRegion = dplyr::if_else((flag_Region == 1 | flag_Label == 1) & trLabel == "baseline", TRUE, FALSE),
      regionID = NA_real_
    ) %>%
    {
      cur.regionID = 1
      for (i in 1:nrow(.)) {
        if (.$newRegion[i]) {
          cur.regionID = cur.regionID + 1
        }
        .$regionID[i] = cur.regionID
      }
      .
    } %>%
    dplyr::select(time, trLabel, regionID) %>%
    dplyr::mutate(trLabelSeq = paste(trLabel, regionID, sep = ".")) %>%
    dplyr::select(time, regionID, trLabelSeq) %>%
    dplyr::left_join(x_df, by = c("time"))

  return (toReturn)
}

#' @describeIn deriveZero_Child
#' Fits a multivariate analysis of variance for a FemFit dataset.
#'
#' @keywords internal
deriveZero_manova = function(x_df, ...) {
  # Filter the observations and columns of interest from the provided data.frame
  x_widedf = x_df %>%
    dplyr::filter(trLabel == "baseline") %>%
    dplyr::select(dplyr::starts_with("prssr_sensor"), trLabelSeq)

  # Fit the multivariate analysis of variance with a linear model and return it to the parent function
  return (lm(formula = cbind(prssr_sensor1, prssr_sensor2, prssr_sensor3, prssr_sensor4, prssr_sensor5, prssr_sensor6, prssr_sensor7, prssr_sensor8) ~ trLabelSeq, data = x_widedf, ...))
}

#' @describeIn deriveZero_Child
#' Fits a linear mixed model for a FemFit dataset.
#'
#' @keywords internal
deriveZero_lmm = function(x_df, ...) {
  # Create the long version of the provided data.frame
  x_longdf = x_df %>%
    dplyr::filter(trLabel == "baseline") %>%
    dplyr::select(dplyr::starts_with("prssr_sensor"), trLabelSeq) %>%
    tidyr::gather(sensor, prssr, -trLabelSeq)

  # Fit the linear mixed model to the transformed data.frame and return it to the parent function
  return (lmer(formula = prssr ~ trLabelSeq + (1 | sensor) + (1 | sensor:trLabelSeq), data = x_longdf, ...))
}
TheGreatGospel/IVPSA documentation built on May 19, 2019, 1:47 a.m.