anonymizer
anonymizes data containing Personally Identifiable Information (PII) using a combination of salting and hashing. You can find quality examples of data anonymization in R here, here, and here.
You can install the latest development version from CRAN:
install.packages("anonymizer")
Or from GitHub with:
if (packageVersion("devtools") < 1.6) {
install.packages("devtools")
}
devtools::install_github("paulhendricks/anonymizer")
If you encounter a clear bug, please file a minimal reproducible example on GitHub.
anonymizer
employs four convenience functions: salt
, unsalt
, hash
, and anonymize
.
library(dplyr, warn.conflicts = FALSE)
library(anonymizer)
letters %>% head
#> [1] "a" "b" "c" "d" "e" "f"
letters %>% head %>% salt(.seed = 1)
#> [1] "gjoxfagjoxf" "gjoxfbgjoxf" "gjoxfcgjoxf" "gjoxfdgjoxf" "gjoxfegjoxf"
#> [6] "gjoxffgjoxf"
letters %>% head %>% salt(.seed = 1) %>% unsalt(.seed = 1)
#> [1] "a" "b" "c" "d" "e" "f"
letters %>% head %>% hash(.algo = "crc32")
#> [1] "c0749952" "597dc8e8" "2e7af87e" "b01e6ddd" "c7195d4b" "5e100cf1"
letters %>% head %>% salt(.seed = 1) %>% hash(.algo = "crc32")
#> [1] "b0891ad8" "361d6876" "fd41bbd3" "e0448b6b" "2b1858ce" "ad8c2a60"
letters %>% head %>% anonymize(.algo = "crc32", .seed = 1)
#> [1] "b0891ad8" "361d6876" "fd41bbd3" "e0448b6b" "2b1858ce" "ad8c2a60"
library(generator)
n <- 6
set.seed(1)
ashley_madison <-
data.frame(name = r_full_names(n),
snn = r_national_identification_numbers(n),
dob = r_date_of_births(n),
email = r_email_addresses(n),
ip = r_ipv4_addresses(n),
phone = r_phone_numbers(n),
credit_card = r_credit_card_numbers(n),
lat = r_latitudes(n),
lon = r_longitudes(n),
stringsAsFactors = FALSE)
knitr::kable(ashley_madison, format = "markdown")
name
snn
dob
email
ip
phone
credit_card
lat
lon
Eldridge Pfannerstill
442-34-5338
1993-04-28
ntakqojv@lgbcyk.rkv
45.84.71.225
6794976958
4125-7204-9193-5140
-2.7018575
8.634988
Augustine Homenick
799-44-6396
1912-09-08
iqg@mtcuh.viy
191.116.55.106
3275827694
2182-5994-2283-9486
-70.4148630
-65.827918
Jennie Runte
941-11-5441
1985-01-12
wjszy@sjhreocvt.gbp
27.128.73.17
7419351735
4370-4866-4735-7857
-45.4091701
-79.932229
Araceli Kunde
290-44-2675
1948-04-28
uljsnvhfr@qfdkumtn.jkd
221.47.229.86
3243246285
6682-5074-2898-9396
-0.2673845
103.514583
Josue Rau
686-88-8446
1996-06-14
c@lqxzkdpi.nfy
157.136.114.185
9169736873
4510-3757-4858-5236
-22.8839925
72.886505
Elnora Zemlak
212-40-7016
1976-01-09
capvnl@nympzf.gsk
143.20.199.87
3295843196
7206-6205-2194-6432
78.2444466
-120.590050
library(detector)
ashley_madison %>%
detect %>%
knitr::kable(format = "markdown")
column_name
has_email_addresses
has_phone_numbers
has_national_identification_numbers
name
FALSE
FALSE
FALSE
snn
FALSE
FALSE
TRUE
dob
FALSE
FALSE
FALSE
email
TRUE
FALSE
FALSE
ip
FALSE
FALSE
FALSE
phone
FALSE
TRUE
FALSE
credit_card
FALSE
FALSE
FALSE
lat
FALSE
TRUE
FALSE
lon
FALSE
TRUE
FALSE
ashley_madison[] <- lapply(ashley_madison, anonymize, .algo = "crc32")
ashley_madison %>%
knitr::kable(format = "markdown")
| name | snn | dob | email | ip | phone | credit_card | lat | lon | |:---------|:---------|:---------|:---------|:---------|:---------|:-------------|:---------|:---------| | c83b4030 | 393d73d7 | 18fe3e97 | aa5dead | e4b6e2c6 | d3af086b | cb7b5ba | 80064d9e | 7dc18006 | | 98a6974d | 70ac65b0 | bf8857eb | a75947f0 | 5e0e7cef | 5c562036 | 7cd11025 | fdf9526d | 5828b961 | | 77dcbc4d | 391740d7 | f0b13e46 | 6cefaee2 | fbaaa8f1 | 9a66f57d | 299a42fe | 734886e3 | 9ea0e9a5 | | a48e2b0b | 6704117d | 2e40fae7 | e1598468 | b7a422ba | 1f0a0373 | f420590f | 53155b41 | 81018fc | | 4fecaeb2 | 9d6bf732 | e881bbe7 | 4b412ff9 | d1f2740c | ac553e93 | e3716031 | f3d9a005 | ef3bdb8d | | abc3b85c | 90866189 | 6cefc2f4 | f26e84b1 | 52596e0e | b14fa5df | 9189fc4f | 85c69f65 | f0db3bb0 |
To cite package ‘anonymizer’ in publications use:
Paul Hendricks (2015). anonymizer: Anonymize Data Containing Personally Identifiable Information. R package version 0.2.0. https://github.com/paulhendricks/anonymizer
A BibTeX entry for LaTeX users is
@Manual{,
title = {anonymizer: Anonymize Data Containing Personally Identifiable Information},
author = {Paul Hendricks},
year= {2015},
note = {R package version 0.2.0},
url = {https://github.com/paulhendricks/anonymizer},
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.