Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>", message = FALSE, warning = FALSE)
set.seed(2)
## -----------------------------------------------------------------------------
library(FakeDataR)
df <- data.frame(
id = 1:50,
email = sprintf("u%02d@x.com", 1:50),
phone = sprintf("555-01%02d", 1:50),
dept = sample(c("A","B","C"), 50, TRUE),
spend = round(runif(50, 10, 200), 2),
check.names = FALSE
)
# Auto-detect sensitive columns and fake them
# Strategy: fake sensitive fields (default)
fake_low <- generate_fake_with_privacy(
data = df, n = 60, level = "low", seed = 1,
sensitive_detect = TRUE, sensitive_strategy = "fake",
normalize = TRUE
)
# Auto-detect and drop sensitive columns
# Strategy: drop sensitive fields
fake_drop <- generate_fake_with_privacy(
data = df, n = 60, level = "medium", seed = 1,
sensitive_detect = TRUE, sensitive_strategy = "drop",
normalize = TRUE
)
names(fake_low)
names(fake_drop)
# Inspect privacy metadata
attr(fake_low, "sensitive_columns")
attr(fake_drop, "dropped_columns")
attr(fake_low, "name_map")
## -----------------------------------------------------------------------------
fake_explicit <- generate_fake_with_privacy(
data = df, n = 60, seed = 1,
sensitive = c("id","email","phone"),
sensitive_detect = FALSE,
sensitive_strategy = "fake",
normalize = TRUE
)
names(fake_explicit)
attr(fake_explicit, "sensitive_columns")
## -----------------------------------------------------------------------------
# A broad, configurable pattern set
sensitive_patterns <- c(
# direct IDs / names
"^id$", "employee[_-]?id", "user(name|[_-]?id)?$", "full[_-]?name", "first[_-]?name", "last[_-]?name",
# contact
"email|e-mail", "phone|tel|mobile", "fax",
# address / geo
"address|street|road|avenue|apt|unit|suite|zip|postal|postcode|city|state|province|country",
"lat(itude)?|lon(gitude)?|gps",
# government IDs (international sampling)
"RegId|ssn|sin|nin|aadhaar|aadhar|bvn|curp|dni|ced(ul|)+a|cpf|pan\\b|tin\\b|ein\\b|pesel|nin\\b",
# licenses / travel docs
"passport|visa|license|licence|driver|dl\\b|vin|plate",
# finance / payments
"iban|swift|bic|routing|sort[_-]?code|account|acct|bank",
"credit|debit|card|cvv|cvc|pan[_-]?number",
# auth / secrets / device
"password|pass|pwd|pin|otp|secret|token|api[_-]?key|auth|bearer|session|cookie",
"ip(_address)?|mac(_address)?|imei|imsi|serial|device|udid|android[_-]?id|idfa|gaid",
# medical / patient
"mrn|nhs|medicare|medicaid|patient|diagnosis",
# birthdays
"dob|date[_-]?of[_-]?birth|birth(day|date)",
# education
"student[_-]?id"
)
rx <- paste0("(?i)(", paste(sensitive_patterns, collapse = "|"), ")")
sens_cols <- names(df)[grepl(rx, names(df))]
sens_cols
sens_cols <- names(df)[grepl(rx, names(df))]
fake_custom_detect <- generate_fake_with_privacy(
data = df, n = 60, seed = 1,
sensitive = unique(c(sens_cols, "email")),
sensitive_detect = FALSE,
sensitive_strategy = "fake",
normalize = TRUE
)
attr(fake_custom_detect, "sensitive_columns")
## -----------------------------------------------------------------------------
v1 <- validate_fake(df, fake_low)
head(v1, 5)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.