R/single_mean.R
In radiant.basics: Basics Menu for Radiant: Business Analytics using R and Shiny

Documented in single_mean summary.single_mean

#' Compare a sample mean to a population mean
#'
#' @details See \url{https://radiant-rstats.github.io/docs/basics/single_mean.html} for an example in Radiant
#'
#' @param dataset Dataset
#' @param var The variable selected for the mean comparison
#' @param comp_value Population value to compare to the sample mean
#' @param alternative The alternative hypothesis ("two.sided", "greater", or "less")
#' @param conf_lev Span for the confidence interval
#' @param data_filter Expression entered in, e.g., Data > View to filter the dataset in Radiant. The expression should be a string (e.g., "price > 10000")
#' @param envir Environment to extract data from
#'
#' @return A list of variables defined in single_mean as an object of class single_mean
#'
#' @examples
#' single_mean(diamonds, "price") %>% str()
#'
#' @seealso \code{\link{summary.single_mean}} to summarize results
#' @seealso \code{\link{plot.single_mean}} to plot results
#'
#' @export
single_mean <- function(dataset, var, comp_value = 0,
                        alternative = "two.sided", conf_lev = .95,
                        data_filter = "", envir = parent.frame()) {
  df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset))
  dataset <- get_data(dataset, var, filt = data_filter, na.rm = FALSE, envir = envir)

  ## counting missing values
  # miss <- n_missing(dataset)
  ## removing any missing values
  # dataset <- na.omit(dataset)

  res <- t.test(dataset[[var]], mu = comp_value, alternative = alternative, conf.level = conf_lev) %>%
    tidy()

  ## from http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
  me_calc <- function(se, n, conf.lev = .95) {
    se * qt(conf.lev / 2 + .5, n - 1)
  }

  dat_summary <- summarise_all(
    dataset,
    list(
      diff = ~ mean(., na.rm = TRUE) - comp_value,
      mean = ~ mean(., na.rm = TRUE),
      n = length,
      n_missing = n_missing,
      sd = ~ sd(., na.rm = TRUE),
      se = se,
      me = ~ me_calc(se, n, conf_lev)
    )
  )

  # removing unneeded arguments
  rm(envir, me_calc)

  as.list(environment()) %>% add_class("single_mean")
}

#' Summary method for the single_mean function
#'
#' @details See \url{https://radiant-rstats.github.io/docs/basics/single_mean.html} for an example in Radiant
#'
#' @param object Return value from \code{\link{single_mean}}
#' @param dec Number of decimals to show
#' @param ... further arguments passed to or from other methods
#'
#' @examples
#' result <- single_mean(diamonds, "price")
#' summary(result)
#' diamonds %>%
#'   single_mean("price") %>%
#'   summary()
#'
#' @seealso \code{\link{single_mean}} to generate the results
#' @seealso \code{\link{plot.single_mean}} to plot results
#'
#' @export
summary.single_mean <- function(object, dec = 3, ...) {
  cat("Single mean test\n")
  cat("Data      :", object$df_name, "\n")
  if (!is.empty(object$data_filter)) {
    cat("Filter    :", gsub("\\n", "", object$data_filter), "\n")
  }
  cat("Variable  :", object$var, "\n")
  cat("Confidence:", object$conf_lev, "\n")

  hyp_symbol <- c(
    "two.sided" = "not equal to",
    "less" = "<",
    "greater" = ">"
  )[object$alternative]

  cat("Null hyp. : the mean of", object$var, "=", object$comp_value, "\n")
  cat("Alt. hyp. : the mean of", object$var, "is", hyp_symbol, object$comp_value, "\n\n")

  ## determine lower and upper % for ci
  ci_perc <- ci_label(object$alternative, object$conf_lev)

  ## print summary statistics
  object$dat_summary %>%
    select(-1) %>%
    # select_at(c("mean", "n", "n_missing", "sd", "se", "me")) %>%
    as.data.frame(stringsAsFactors = FALSE) %>%
    format_df(dec = dec, mark = ",") %>%
    print(row.names = FALSE)
  cat("\n")

  res <- object$res
  res <- bind_cols(
    data.frame(
      diff = object$dat_summary[["diff"]],
      se = object$dat_summary[["se"]],
      stringsAsFactors = FALSE
    ),
    res[, -1]
  ) %>%
    as.data.frame(stringsAsFactors = FALSE) %>%
    select(base::setdiff(colnames(.), c("method", "alternative"))) %>%
    mutate(parameter = as.integer(parameter))


  names(res) <- c("diff", "se", "t.value", "p.value", "df", ci_perc[1], ci_perc[2])
  res %<>% round(dec) ## restrict the number of decimals
  res$` ` <- sig_stars(res$p.value)
  if (res$p.value < .001) res$p.value <- "< .001"

  ## print statistics
  print(res, row.names = FALSE)
  cat("\nSignif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n")
}

#' Plot method for the single_mean function
#'
#' @details See \url{https://radiant-rstats.github.io/docs/basics/single_mean.html} for an example in Radiant
#'
#' @param x Return value from \code{\link{single_mean}}
#' @param plots Plots to generate. "hist" shows a histogram of the data along with vertical lines that indicate the sample mean and the confidence interval. "simulate" shows the location of the sample mean and the comparison value (comp_value). Simulation is used to demonstrate the sampling variability in the data under the null-hypothesis
#' @param shiny Did the function call originate inside a shiny app
#' @param custom Logical (TRUE, FALSE) to indicate if ggplot object (or list of ggplot objects) should be returned. This option can be used to customize plots (e.g., add a title, change x and y labels, etc.). See examples and \url{https://ggplot2.tidyverse.org/} for options.
#' @param ... further arguments passed to or from other methods
#'
#' @examples
#' result <- single_mean(diamonds, "price", comp_value = 3500)
#' plot(result, plots = c("hist", "simulate"))
#'
#' @seealso \code{\link{single_mean}} to generate the result
#' @seealso \code{\link{summary.single_mean}} to summarize results
#'
#' @importFrom rlang .data
#'
#' @export
plot.single_mean <- function(x, plots = "hist",
                             shiny = FALSE, custom = FALSE, ...) {
  plot_list <- list()
  if ("hist" %in% plots) {
    bw <- x$dataset %>%
      range(na.rm = TRUE) %>%
      diff() %>%
      divide_by(10)

    plot_list[[which("hist" == plots)]] <-
      ggplot(x$dataset, aes(x = .data[[x$var]])) +
      geom_histogram(fill = "blue", binwidth = bw, alpha = 0.5) +
      geom_vline(
        xintercept = x$comp_value,
        color = "red",
        linetype = "solid",
        linewidth = 1
      ) +
      geom_vline(
        xintercept = x$res$estimate,
        color = "black",
        linetype = "solid",
        linewidth = 1
      ) +
      geom_vline(
        xintercept = c(x$res$conf.low, x$res$conf.high),
        color = "black",
        linetype = "longdash",
        linewidth = 0.5
      )
  }
  if ("simulate" %in% plots) {
    var <- na.omit(x$dataset[[x$var]])
    nr <- length(var)

    simdat <- replicate(1000, mean(sample(var, nr, replace = TRUE))) %>%
      (function(z) (z - mean(z)) + x$comp_value) %>%
      as.data.frame(stringsAsFactors = FALSE) %>%
      set_colnames(x$var)

    cip <- ci_perc(simdat[[x$var]], x$alternative, x$conf_lev)

    bw <- simdat %>%
      range() %>%
      diff() %>%
      divide_by(20)

    plot_list[[which("simulate" == plots)]] <-
      ggplot(simdat, aes(x = .data[[x$var]])) +
      geom_histogram(
        fill = "blue",
        binwidth = bw,
        alpha = 0.5
      ) +
      geom_vline(
        xintercept = x$comp_value,
        color = "red",
        linetype = "solid",
        linewidth = 1
      ) +
      geom_vline(
        xintercept = x$res$estimate,
        color = "black",
        linetype = "solid",
        linewidth = 1
      ) +
      geom_vline(
        xintercept = cip,
        color = "red",
        linetype = "longdash",
        linewidth = 0.5
      ) +
      labs(title = paste0("Simulated means if null hyp. is true (", x$var, ")"))
  }

  if (length(plot_list) > 0) {
    if (custom) {
      if (length(plot_list) == 1) plot_list[[1]] else plot_list
    } else {
      patchwork::wrap_plots(plot_list, ncol = 1) %>%
        (function(x) if (shiny) x else print(x))
    }
  }
}