
Defines functions tidy_normal

Documented in tidy_normal

#' Tidy Randomly Generated Gaussian Distribution Tibble
#' @family Continuous Distribution
#' @family Gaussian
#' @author Steven P. Sanderson II, MPH
#' @details This function uses the underlying `stats::rnorm()`, `stats::pnorm()`,
#' and `stats::qnorm()` functions to generate data from the given parameters. For
#' more information please see [stats::rnorm()]
#' @description This function will generate `n` random points from a Gaussian
#' distribution with a user provided, `.mean`, `.sd` - standard deviation and number of
#' random simulations to be produced. The function returns a tibble with the
#' simulation number column the x column which corresponds to the n randomly
#' generated points, the `dnorm`, `pnorm` and `qnorm` data points as well.
#' The data is returned un-grouped.
#' The columns that are output are:
#' -  `sim_number` The current simulation number.
#' -  `x` The current value of `n` for the current simulation.
#' -  `y` The randomly generated data point.
#' -  `dx` The `x` value from the [stats::density()] function.
#' -  `dy` The `y` value from the [stats::density()] function.
#' -  `p` The values from the resulting p_ function of the distribution family.
#' -  `q` The values from the resulting q_ function of the distribution family.
#' @param .n The number of randomly generated points you want.
#' @param .mean The mean of the randomly generated data.
#' @param .sd The standard deviation of the randomly generated data.
#' @param .num_sims The number of randomly generated simulations you want.
#' @param .return_tibble A logical value indicating whether to return the result
#' as a tibble. Default is TRUE.
#' @examples
#' tidy_normal()
#' @return
#' A tibble of randomly generated data.
#' @name tidy_normal

#' @export
#' @rdname tidy_normal

tidy_normal <- function(.n = 50, .mean = 0, .sd = 1, .num_sims = 1,
                        .return_tibble = TRUE) {

  # Tidyeval ----
  n <- as.integer(.n)
  num_sims <- as.integer(.num_sims)
  mu <- as.numeric(.mean)
  std <- as.numeric(.sd)
  ret_tbl <- as.logical(.return_tibble)

  # Checks ----
  if (!is.integer(n) | n < 0) {
      "The parameters '.n' must be of class integer. Please pass a whole
            number like 50 or 100. It must be greater than 0."

  if (!is.integer(num_sims) | num_sims < 0) {
      "The parameter `.num_sims' must be of class integer. Please pass a
            whole number like 50 or 100. It must be greater than 0."

  if (!is.numeric(mu)) {
      "The parameters of '.mean' and '.sd' must be of class numeric.
            Please pass a numer like 1 or 1.1 etc."

  if (!is.numeric(std)) {
      "The parameters of '.mean' and '.sd' must be of class numeric.
            Please pass a numer like 1 or 1.1 etc."

  x <- seq(1, num_sims, 1)

  # ps <- seq(-n, n - 1, 2)
  qs <- seq(0, 1, (1 / (n - 1)))
  ps <- qs

  # Create a data.table with one row per simulation
  df <- data.table::CJ(sim_number = factor(1:num_sims), x = 1:n)

  # Group the data by sim_number and add columns for x and y
  df[ , y := stats::rnorm(n = .N, mean = mu, sd = std)]

  # Compute the density of the y values and add columns for dx and dy
  df[, c("dx", "dy") := density(y, n = n)[c("x", "y")], by = sim_number]

  # Compute the p-values for the y values and add a column for p
  df[, p := stats::pnorm(y, mean = mu, sd = std)]

  # Compute the q-values for the p-values and add a column for q
  df[, q := stats::qnorm(p, mean = mu, sd = std)]

    df <- dplyr::as_tibble(df)
  } else {
    data.table::setkey(df, NULL)

  # Create a tibble with the parameter grid
  param_grid <- dplyr::tibble(.mean, .sd)

  # Attach descriptive attributes to tibble
  attr(df, "distribution_family_type") <- "continuous"
  attr(df, ".mean") <- .mean
  attr(df, ".sd") <- .sd
  attr(df, ".n") <- .n
  attr(df, ".num_sims") <- .num_sims
  attr(df, ".ret_tbl") <- .return_tibble
  attr(df, "tibble_type") <- "tidy_gaussian"
  attr(df, "ps") <- ps
  attr(df, "qs") <- qs
  attr(df, "param_grid") <- param_grid
  attr(df, "param_grid_txt") <- paste0(
    paste(param_grid[, names(param_grid)], collapse = ", "),
  attr(df, "dist_with_params") <- paste0(
    " ",
      paste(param_grid[, names(param_grid)], collapse = ", "),

  # Return final result as function output

Try the TidyDensity package in your browser

Any scripts or data that you put into this service are public.

TidyDensity documentation built on May 29, 2024, 11:06 a.m.