#' Goodness of fit for uniform digits
#'
#' The 'uniform_digits()' function utilizes the 'count_digits()' function to count the number
#' of times that each digit occurs and the 'chisq.test()' function from the stats package to
#' test the assumption that the digits are uniformly distributed. Several other measures of
#' deviation from the uniform distribution are also available.
#'
#' This y Leemis, Schmeiser, and Evans (2000) for max.
#' Cho and Gaines (2007), Euclidean distance
#'
#' @param data A data frame
#' @param variable A numeric variable that includes the first decimal place.
#' @param group A second variable used to group the primary variable
#' @param decimal_place The decimal place for which digits are counted. The
#' default is set to one but any place may be specified by numeric rank,
#' i.e. "1" for the 1st decimal (tenths), "2" for the 2nd decimal (hundreds),
#' etc.
#' @param measures Measures of the degree to which the data deviates from
#' the uniform distribution.
#' @param counts The default is set as FALSE. If this is changed to TRUE,
#' then results from count_digits will also be included in the output.
#'
#' @return A tibble that includes a column called "chisq" that hold the value
#' of the chi-square statistic and a column called "chisq_p" that holds
#' the p-value derived form the chi-square statistic.
#'
#' @export
#'
#' @examples
#' unif_digits(bodyweight, obs, group)
#'
#'
#' @importFrom magrittr %>%
#' @importFrom rlang .data
unif_digits <- function(data,
variable,
group = NULL,
decimal_place = 1,
measures = c("mad"),
counts = FALSE) {
output <- count_digits(data, {{ variable }}, {{ group }}, {{ decimal_place }}) %>%
dplyr::rowwise() %>%
dplyr::mutate(total = sum(dplyr::across(tidyselect::starts_with("n_"))),
chisq = stats::chisq.test(c(.data$n_0,
.data$n_1,
.data$n_2,
.data$n_3,
.data$n_4,
.data$n_5,
.data$n_6,
.data$n_7,
.data$n_8,
.data$n_9))$statistic,
chisq_p = stats::chisq.test(c(.data$n_0,
.data$n_1,
.data$n_2,
.data$n_3,
.data$n_4,
.data$n_5,
.data$n_6,
.data$n_7,
.data$n_8,
.data$n_9))$p.value) %>%
dplyr::mutate(expected = .data$total * 0.1,
deviation = sum(abs(.data$expected - dplyr::across(tidyselect::starts_with("n_")))))
if("mad" %in% measures) {
output <- output %>%
dplyr::mutate(mad = ((.data$deviation / .data$total) / 10))
}
if("max_dev" %in% measures) {
output <- output %>%
dplyr::mutate(dev_max = max(abs(.data$expected - dplyr::across(tidyselect::starts_with("n_")))))
}
if("euclidean" %in% measures) {
output <- output %>%
dplyr::mutate(dev_m = sqrt(sum(abs(.data$expected - dplyr::across(tidyselect::starts_with("n_"))^2))))
}
if(counts) {
output %>%
dplyr::select(-.data$deviation, -.data$expected)
}
else {
output %>%
dplyr::select(-(.data$n_0:.data$n_9), -.data$deviation, -.data$expected)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.