fabricated: Tools to Audit Survey Data Quality

Documented in shuffle

#shuffle function


#' Random shuffle of integers and associated decimals
#'
#' The 'shuffle()' function takes a numeric column in a data frame and randomly "shuffles"
#' the integers and decimals by separate the integers from the associated decimals and
#' sampling the decimals without replacement. The 'average_fre()' function is then used to
#' calculate the average frequency for the shuffled data.
#'
#' @param data A data frame
#' @param variable A numeric variable that includes the first decimal place.
#' @param group A second variable used to group the primary variable such that
#'     average frequency is calculated separately for each group.
#' @param decimal_place The number of decimal places used for the calculation.
#'     The default is set to "1" meaning decimals in the second (hundreds place)
#'     and below are discarded.
#' @param reps The number of shuffles (simulations) to perform. The default
#'     is set to 100.
#'
#' @return A tibble
#' @export
#'
#' @examples
#' shuffle(simulated_normal, obs)
#'
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom stringr str_sub


shuffle <- function(data, variable,
                    group = NULL,
                    decimal_place = 1,
                    reps = 1)  {

  if(data %>%
     dplyr::summarise(class = class({{ variable }})) %>%
     dplyr::pull() != "numeric") {stop("average_fre() will only function on a numeric variable")}


  data <- data %>%
    dplyr::filter(!is.na({{ variable }})) %>%
    dplyr::mutate(var = formatC({{ variable }},
                                digits = {{ decimal_place }},
                                format = "f"),
                  decimal = str_sub(.data$var, start = - {{ decimal_place }}),
                  integer = str_sub(.data$var, end = -(1 + {{ decimal_place }}))) %>%
    dplyr::group_by({{ group }}) %>%
    dplyr::add_count()

  output <- lapply(1:{{ reps }}, function(i)  {
    data %>%
      dplyr::mutate(decimal_1 = sample(.data$decimal, .data$n, replace = FALSE),
                    digit_1 = as.numeric(paste0(.data$integer, .data$decimal_1))) %>%
      dplyr::ungroup() %>%
      average_fre(.data$digit_1, {{ group }}, {{ decimal_place }})
  })

  output %>%
    dplyr::bind_rows()

}