#' Single Spoof run on grouped data
#'
#' @param grouped_origin the origin data that has been grouped already
#' @param continuous_cols the continuous columns
#'
#' @return
#' @export
#'
#' @examples
ultraSingleSpoofRun <- function(grouped_origin, continuous_cols){
dplyr::summarise_at(grouped_origin, dplyr::vars(dplyr::one_of(continuous_cols)) , ultraAggRandom)
}
#' New spoofing functio
#'
#' @description Function which uses the new devised method to spoof without introducing illegal combinations of values.
#'
#' @param ultra_df the ultra df
#'
#' @return
#' @export
#'
#' @examples
ultraSpooferNew <- function(ultra_df, extract_level = "DefaultLevel"){
# Identifies Nominal Columns
nominal_split <- megametadata::metaSplitNominal(ultra_df$meta, extract_level = extract_level)
nominal_cols <- nominal_split$nominal
continuous_cols <- nominal_split$continuous
# Groups on Nominal columns
grouped_origin <- dplyr::group_by_at(ultra_df$origin(), dplyr::vars(dplyr::one_of(nominal_cols)))
# Uses random agg to summarise in the random aggregation
spoofed_data <- ultraSingleSpoofRun(grouped_origin, continuous_cols)
# calculates the number of repeats it will require to reach 100
required_runs <- ceiling(100 / nrow(spoofed_data))
#initialise the stack
stacked_spoofed_data <- spoofed_data
# calls the spoofer that many times and binds them
for( j in seq_len(required_runs - 1)){
stacked_spoofed_data <- rbind(stacked_spoofed_data, ultraSingleSpoofRun(grouped_origin, continuous_cols))
}
# if above 100 rows use sampling to produce a dataset of 100 rows
if(nrow(stacked_spoofed_data) > 100){
final_spoofed_data <- dplyr::sample_n(stacked_spoofed_data, 100)
} else {
final_spoofed_data <- stacked_spoofed_data
}
return(final_spoofed_data)
}
#' Constrained Random generator
#'
#' @param vect the vector of values
#'
#' @return
#' @export
#'
#' @examples
ultraAggRandom <- function(vect){
out <- stats::runif(1, base::min(vect), base::max(vect))
return(out)
}
#' Random Name generator
#'
#' @description uses the embedded mtcars and state.name datasets to generate passable, if often amusing fake names in PII slots
#'
#' @return
#' @export
#'
#' @examples
ultraNameSpoof <- function(){
first = sample(purrr::flatten(stringr::str_split(rownames(datasets::mtcars), pattern = "\\s")), 1)[[1]]
last = sample(datasets::state.name, 1)
return(paste0(first, " ", last))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.