R/gen-random-wilcoxsr-walk.R

Defines functions random_wilcoxon_sr_walk

Documented in random_wilcoxon_sr_walk

#' Generate Multiple Random Wilcoxon Signed-Rank Walks
#'
#' @family Generator Functions
#' @family Discrete Distribution
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @details
#' The `random_wilcoxon_sr_walk` function generates multiple random walks in
#' 1, 2, or 3 dimensions. Each walk is a sequence of steps where each step is
#' a random draw from the Wilcoxon signed-rank distribution using
#' `stats::rsignrank()`. The user can specify the number of steps/periods (`nn`),
#' the number of samples in each walk (`n`), and the number of dimensions. The
#' function also allows for sampling a proportion of the steps and optionally
#' sampling with replacement.
#'
#' @description
#' A Wilcoxon signed-rank random walk is a stochastic process in which each
#' step is drawn from the Wilcoxon signed-rank distribution, commonly used in
#' nonparametric statistics. This function allows for the simulation of
#' multiple independent random walks in one, two, or three dimensions, with
#' user control over the number of walks, steps, and the sample size parameter
#' for the distribution. Sampling options allow for further customization,
#' including the ability to sample a proportion of steps and to sample with or
#' without replacement. The resulting data frame includes cumulative
#' statistics for each walk, making it suitable for simulation studies and
#' visualization.
#'
#' @param .num_walks An integer specifying the number of random walks to
#' generate. Default is 25.
#' @param .nn An integer specifying the number of steps in each walk.
#' Default is 100.
#' @param .n Integer or vector. Number(s) of observations in the sample(s) for
#' rsignrank. Default is 1.
#' @param .initial_value Numeric. Starting value of the walk. Default is 0.
#' @param .samp Logical. Whether to sample the steps. Default is TRUE.
#' @param .replace Logical. Whether sampling is with replacement.
#' Default is TRUE.
#' @param .sample_size Numeric. Proportion of steps to sample (0-1). Default
#' is 0.8.
#' @param .dimensions Integer. Number of dimensions (1, 2, or 3). Default is
#' 1.
#'
#' @examples
#' set.seed(123)
#' random_wilcoxon_sr_walk()
#'
#' set.seed(123)
#' random_wilcoxon_sr_walk(.dimensions = 3) |>
#'    head() |>
#'    t()
#'
#' @return A tibble containing the generated random walks with columns depending
#' on the number of dimensions:
#' \itemize{
#'   \item `walk_number`: Factor representing the walk number.
#'   \item `step_number`: Step index.
#'   \item `y`: If `.dimensions = 1`, the value of the walk at each step.
#'   \item `x`, `y`: If `.dimensions = 2`, the values of the walk in
#'    two dimensions.
#'   \item `x`, `y`, `z`: If `.dimensions = 3`, the values of the walk
#'    in three dimensions.
#' }
#'
#' The following are also returned based upon how many dimensions there are and could be any of x, y and or z:
#' \itemize{
#'   \item `cum_sum`: Cumulative sum of `dplyr::all_of(.dimensions)`.
#'   \item `cum_prod`: Cumulative product of `dplyr::all_of(.dimensions)`.
#'   \item `cum_min`: Cumulative minimum of `dplyr::all_of(.dimensions)`.
#'   \item `cum_max`: Cumulative maximum of `dplyr::all_of(.dimensions)`.
#'   \item `cum_mean`: Cumulative mean of `dplyr::all_of(.dimensions)`.
#' }
#'
#' @name random_wilcoxon_sr_walk
NULL

#' @export
#' @rdname random_wilcoxon_sr_walk
random_wilcoxon_sr_walk <- function(.num_walks = 25, .nn = 100, .n = 1,
                                    .initial_value = 0, .samp = TRUE,
                                    .replace = TRUE, .sample_size = 0.8,
                                    .dimensions = 1) {

  # Defensive checks
  if (.num_walks < 0) {
    rlang::abort(".num_walks cannot be less than 0", use_cli_format = TRUE)
  }

  if (.n < 0) {
    rlang::abort(".n cannot be less than 0", use_cli_format = TRUE)
  }

  if (.nn < 0) {
    rlang::abort(".nn cannot be less than 0", use_cli_format = TRUE)
  }

  if (.sample_size < 0 || .sample_size > 1) {
    rlang::abort(
      ".sample_size cannot be less than 0 or more than 1",
      use_cli_format = TRUE
    )
  }
  if (!.dimensions %in% c(1, 2, 3)) {
    rlang::abort(
      "Number of dimensions must be 1, 2, or 3.",
      use_cli_format = TRUE
    )
  }

  # Variables
  num_walks     <- as.integer(.num_walks)
  nn            <- as.integer(.nn)
  n             <- as.integer(.n)
  initial_value <- as.numeric(.initial_value)
  replace       <- as.logical(.replace)
  samp          <- as.logical(.samp)
  samp_size     <- round(.sample_size * nn, 0)
  periods       <- if (.samp) samp_size else nn

  # Define dimension names
  dim_names <- switch(.dimensions,
                      `1` = c("y"),
                      `2` = c("x", "y"),
                      `3` = c("x", "y", "z"))

  # Function to generate a single random walk
  generate_walk <- function(walk_num) {
    # Generate random steps for each dimension
    rand_steps <- purrr::map(
      dim_names,
      ~ if (samp) {
        sample(stats::rsignrank(
            nn = nn,
            n = n
            ),
            size = periods, replace = replace)
      } else {
        stats::rsignrank(nn = nn, n = n)
      }
    )
    # Set column names
    rand_walk_column_names(rand_steps, dim_names, walk_num, periods)
  }

  # Generate all walks
  res <- purrr::map_dfr(1:num_walks, generate_walk)
  res <- res |>
    dplyr::mutate(walk_number = factor(walk_number, levels = 1:num_walks))
  res <- res |>
    dplyr::group_by(walk_number) |>
    std_cum_sum_augment(
                        .value = dplyr::all_of(dim_names),
                        .initial_value = initial_value) |>
    dplyr::ungroup()
  res <- res |>
    dplyr::group_by(walk_number) |>
    std_cum_prod_augment(
                        .value = dplyr::all_of(dim_names),
                        .initial_value = initial_value) |>
    dplyr::ungroup()
  res <- res |>
    dplyr::group_by(walk_number) |>
    std_cum_min_augment(
                        .value = dplyr::all_of(dim_names),
                        .initial_value = initial_value) |>
    dplyr::ungroup()
  res <- res |>
    dplyr::group_by(walk_number) |>
    std_cum_max_augment(
                        .value = dplyr::all_of(dim_names),
                        .initial_value = initial_value) |>
    dplyr::ungroup()
  res <- res |>
    dplyr::group_by(walk_number) |>
    std_cum_mean_augment(
                        .value = dplyr::all_of(dim_names),
                        .initial_value = initial_value) |>
    dplyr::ungroup()

  # Add attributes
  attr(res, "n")             <- n
  attr(res, "nn")            <- nn
  attr(res, "num_walks")     <- num_walks
  attr(res, "initial_value") <- initial_value
  attr(res, "replace")       <- replace
  attr(res, "samp")          <- samp
  attr(res, "samp_size")     <- samp_size
  attr(res, "periods")       <- periods
  attr(res, "fns")           <- "random_wilcoxon_sr_walk"
  attr(res, "dimensions")    <- .dimensions

  # Return the result
  return(res)
}

Try the RandomWalker package in your browser

Any scripts or data that you put into this service are public.

RandomWalker documentation built on Aug. 19, 2025, 1:14 a.m.