R/stats-walk-summary.R

Defines functions summarize_walks

Documented in summarize_walks

#' Summarize Walks Data
#'
#' @family Statistic Functions
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @details This function requires that the input data frame contains a
#' column named 'walk_number' and that the value to summarize is provided.
#' It computes statistics such as mean, median, variance, and quantiles
#' for the specified value variable. #' This function summarizes a data frame
#' containing random walk data by computing various statistical measures for a
#' specified value variable, grouped by a specified grouping variable. It checks
#' for necessary attributes and ensures that the data frame is structured correctly.
#'
#' @description Summarizes random walk data by computing statistical measures.
#'
#' @param .data A data frame or tibble containing random walk data.
#' @param .value A column name (unquoted) representing the value to summarize.
#' @param .group_var A column name (unquoted) representing the grouping variable.
#'
#' @return A tibble containing the summarized statistics for each group,
#' including mean, median, range, quantiles, variance, standard deviation,
#' and more.
#'
#' @examples
#' library(dplyr)
#'
#' # Example data frame
#' walk_data <- random_normal_walk(.initial_value = 100)
#'
#' # Summarize the walks
#' summarize_walks(walk_data, cum_sum, walk_number) |>
#'  glimpse()
#' summarize_walks(walk_data, y) |>
#'   glimpse()
#'
#' # Example with missing value variable
#' # summarize_walks(walk_data, NULL, group) # This will trigger an error.
#'
#' @name summarize_walks
NULL
#' @rdname summarize_walks
#' @export

summarize_walks <- function(.data, .value, .group_var) {
  # Variables
  value_var <- rlang::enquo(.value)
  group_var <- rlang::enquo(.group_var)

  # Attributes
  df <- dplyr::as_tibble(.data)
  atb <- attributes(df)

  # Checks
  if (rlang::quo_is_null(value_var)) {
    rlang::abort(
      message = "The value to summarize must be provided.",
      use_cli_format = TRUE
    )
  }

  # Is .data does not have class data.frame
  if (!inherits(.data, "data.frame") & !inherits(.data, "tbl")) {
    rlang::abort(
      message = "The data must be a data frame or tibble.",
      use_cli_format = TRUE
    )
  }

  # Is walk_number in names(.data)
  if (!"walk_number" %in% names(.data)) {
    rlang::abort(
      message = "The data must have a column named 'walk_number'. You need to
      use a data frame generated by a random walk generating function.",
      use_cli_format = TRUE
    )
  }

  # Summarize the data
  df |>
    dplyr::group_by(!!group_var) |>
    dplyr::select(!!group_var, !!value_var) |>
    dplyr::summarize(
      fns = atb[["fns"]],
      fns_name = convert_snake_to_title_case(atb[["fns"]]),
      dimensions = atb[["dimension"]],
      obs = atb[["n"]],
      mean_val = NNS::NNS.moments(!!value_var)[["mean"]],
      median = stats::median(!!value_var),
      range = rw_range(!!value_var),
      quantile_lo = stats::quantile(!!value_var, 0.025),
      quantile_hi = stats::quantile(!!value_var, 0.975),
      variance = NNS::NNS.moments(!!value_var)[["variance"]],
      sd = stats::sd(!!value_var),
      min_val = min(!!value_var),
      max_val = max(!!value_var),
      harmonic_mean = length(!!value_var) / sum(1 / !!value_var),
      geometric_mean = exp(mean(log(!!value_var))),
      skewness = NNS::NNS.moments(!!value_var)[["skewness"]],
      kurtosis = NNS::NNS.moments(!!value_var)[["kurtosis"]]
    )
}

#' @rdname summarize_walks
#' @export
summarise_walks <- summarize_walks

Try the RandomWalker package in your browser

Any scripts or data that you put into this service are public.

RandomWalker documentation built on Oct. 23, 2024, 5:07 p.m.