Nothing
# Internal helpers to build a scrubbed JSON schema
# (intentionally not exported)
#' @keywords internal
#' @noRd
.safe_minmax <- function(x) {
mn <- suppressWarnings(min(x, na.rm = TRUE))
mx <- suppressWarnings(max(x, na.rm = TRUE))
if (!is.finite(mn)) mn <- NA
if (!is.finite(mx)) mx <- NA
list(min = mn, max = mx)
}
#' Build a safe schema from FAKE data
#'
#' We *only* summarize the FAKE data (never original),
#' so that no real values/labels leak. The goal is to give LLMs enough
#' structure (types, ranges, counts, missingness) to write good code.
#'
#' @param data original data (unused here, kept for future hooks)
#' @param fake fake data.frame generated by generate_fake_*()
#' @param level privacy preset (low/medium/high)
#' @return a nested list suitable for jsonlite::write_json()
#' @keywords internal
#' @noRd
.schema_from_data <- function(data, fake, level = c("low","medium","high")) {
level <- match.arg(level)
pkg <- "FakeDataR"
ver <- tryCatch(as.character(utils::packageVersion(pkg)),
error = function(e) "0.0.0.9000")
cols <- lapply(names(fake), function(nm) {
col <- fake[[nm]]
cls <- class(col)[1]
na_prop <- mean(is.na(col))
if (is.factor(col)) {
# Use only counts, no label names
out <- list(
name = nm, type = "factor",
n_levels = length(levels(col)),
na_prop = na_prop
)
# (Optionally include blank proportion if factor had blanks)
out$blank_prop <- mean(trimws(as.character(col)) == "", na.rm = TRUE)
return(out)
} else if (is.character(col)) {
uniq <- unique(trimws(as.character(col)))
uniq <- uniq[!is.na(uniq) & nzchar(uniq)]
return(list(
name = nm, type = "character",
n_levels = length(uniq),
na_prop = na_prop,
blank_prop = mean(trimws(as.character(col)) == "", na.rm = TRUE)
))
} else if (is.logical(col)) {
return(list(
name = nm, type = "logical",
true_prop = mean(col == TRUE, na.rm = TRUE),
false_prop = mean(col == FALSE, na.rm = TRUE),
na_prop = na_prop
))
} else if (inherits(col, "Date")) {
mm <- .safe_minmax(col)
# Keep ISO-formatted strings for stability
return(list(
name = nm, type = "Date",
min = if (!is.na(mm$min)) as.character(mm$min) else NA,
max = if (!is.na(mm$max)) as.character(mm$max) else NA,
na_prop = na_prop
))
} else if (inherits(col, "POSIXct")) {
tz <- attr(col, "tzone")
mm <- .safe_minmax(col)
return(list(
name = nm, type = "POSIXct",
tzone = if (is.null(tz)) NA else tz,
min = if (!is.na(mm$min)) as.character(mm$min) else NA,
max = if (!is.na(mm$max)) as.character(mm$max) else NA,
na_prop = na_prop
))
} else if (is.integer(col)) {
mm <- .safe_minmax(col)
return(list(
name = nm, type = "integer",
min = mm$min, max = mm$max,
na_prop = na_prop
))
} else if (is.numeric(col)) {
mm <- .safe_minmax(col)
return(list(
name = nm, type = "numeric",
min = mm$min, max = mm$max,
na_prop = na_prop
))
} else {
# Fallback: unknown column type, report class only
return(list(
name = nm, type = cls,
na_prop = na_prop
))
}
})
list(
package = pkg,
version = ver,
privacy = level,
n_rows = nrow(fake),
n_cols = ncol(fake),
columns = cols
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.