#' @title methods to deidentify different forms of data
#' @description
#' The deidentify_text method offers a flexible approach to encryption offering the user the option to encrypt text
#' columns using either openssl or sodium. The encryption path chosen depends on the encryption key generated by
#' the user. For example, if a user generates an aes_key, and passes it to deidentify_text, aes encryption will be
#' automatically applied. The function will return a dataframe with the an encrypt_id suffix added to the end of the
#' column name. Thus, a user will have to actively remove the unencrypted columns manually.
#' @param .data
#' a data frame
#' @param key
#' A cyphr key generated from the cyphr package
#' @param cols_to_encrypt
#' This is specific to data.frames. This should be a character vector specifying
#' the names of the desired columns to encrypt.
#' @return
#' depending on the input, an object of the same class. If you input
#' a data.frame, you will get a data.frame with columns with a enc suffix
#' @examples
#' \dontrun{
#' data("initiations")
#' aes_key <- gen_aes_key(24)
#' deidentify_text(initiations, "case_participant_id", aes_key)
#' }
#' @export
deidentify_text <- function(.data,key,cols_to_encrypt = NULL) {
UseMethod("deidentify_text")
}
#' @export
deidentify_text.data.frame <- function(.data, key = NULL, cols_to_encrypt) {
if (!is.character(cols_to_encrypt)) {
stop("cols_to_encrypt must be a character vector.")
}
if (class(key) != "cyphr_key") {
stop("key must be a cyphr key")
}
# check that the column name actually exists in the data.
if (!all(cols_to_encrypt %in% names(.data))) {
stop("You have selected columns which are not in the data.")
}
enc_data <- .data[, cols_to_encrypt, drop = F] %>%
dplyr::mutate(dplyr::across(dplyr::everything(),
list("encrypt_id" = ~ .apply_enc(as.character(.), key = key)))) %>%
# rejoin initial data based on unencrypted columns
dplyr::left_join(.data, by = cols_to_encrypt)
return(enc_data)
}
#' @export
deidentify_text.character <- function(.data, key = NULL,cols_to_encrypt = NULL) {
if(is.null(key)){
stop("Key must be provided in order to encrypt")
}
if (class(key) != "cyphr_key") {
stop("key must be a cyphr key")
}
enc_data <- .data %>%
.apply_enc(key = key) #encrypt the string
return(enc_data)
}
.enc_strings <- function(col_to_encrypt, key) {
# encrypts the vector depending on the key
enc_string <- paste0(cyphr::encrypt_string(col_to_encrypt, key = key), collapse = "")
return(enc_string)
}
# this encrypts the unique vector of strings and passes it on.
.apply_enc <- function(string_to_encrypt, key) {
unique_ent <- unique(string_to_encrypt)
enc_string <-
unlist(lapply(unique_ent, function(x) {
.enc_strings(x, key)
}))
# change the names based on the original encrypted string
names(enc_string) <- unique_ent
# use a lookup table to speedily replace values
# this also removes the names of the vector
final_vec <- unname(enc_string[string_to_encrypt])
return(final_vec)
}
is_any_date_type <- function(x) {
return(lubridate::is.Date(x) | lubridate::is.POSIXct(x) | lubridate::is.POSIXlt(x))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.