inst/doc/securing_data.R

## ----setup, include = FALSE-------------------------------------------------------------
options(width = 90)
knitr::opts_chunk$set(collapse = TRUE, comment = NA)

## ---- eval = FALSE----------------------------------------------------------------------
#  library(tidyverse)
#  library(duawranglr)
#  
#  ## get crosswalk and admin data files
#  dua_cw_file <- system.file('extdata', 'dua_cw.csv', package = 'duawranglr')
#  admin_file <- system.file('extdata', 'admin_data.csv', package = 'duawranglr')
#  
#  ## set the DUA crosswalk
#  set_dua_cw(dua_cw_file)

## ---- echo = FALSE----------------------------------------------------------------------
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(readr))
library(duawranglr)
dua_cw_file <- system.file('extdata', 'dua_cw.csv', package = 'duawranglr')
admin_file <- system.file('extdata', 'admin_data.csv', package = 'duawranglr')
set_dua_cw(dua_cw_file)

## ---------------------------------------------------------------------------------------
## compare level II and III restrictions
see_dua_options(level = c('level_ii', 'level_iii'))

## ---------------------------------------------------------------------------------------
## check all level restrictions
see_dua_options()

## ---------------------------------------------------------------------------------------
## set DUA level
set_dua_level('level_ii', deidentify_required = TRUE, id_column = 'sid')

## ---------------------------------------------------------------------------------------
## see set DUA level 
see_dua_level(show_restrictions = TRUE)

## ---- eval = FALSE----------------------------------------------------------------------
#  ## read in raw administrative data
#  df <- read_dua_file(admin_file)
#  df

## ---- echo = FALSE----------------------------------------------------------------------
## read in raw administrative data
df <- readr::read_csv(admin_file,
                      col_types = cols(sid = col_character(),
                                       sname = col_character(),
                                       dob = col_character(),
                                       gender = col_integer(),
                                       raceeth = col_integer(),
                                       tid = col_integer(),
                                       tname = col_character(),
                                       zip = col_integer(),
                                       mathscr = col_integer(),
                                       readscr = col_integer()
                                       )
                      )
df

## ---- echo = FALSE----------------------------------------------------------------------
dff <- df

## ---- echo = FALSE----------------------------------------------------------------------
## deidentify data
tmpdir <- tempdir()
df <- deid_dua(df, write_crosswalk = TRUE, id_length = 20,
               crosswalk_filename = file.path(tmpdir, 'tmp.csv'))

## ---- eval = FALSE----------------------------------------------------------------------
#  ## deidentify data
#  df <- deid_dua(df, write_crosswalk = TRUE, id_length = 20)

## ---- echo = FALSE----------------------------------------------------------------------
## show crosswalk
cw <- readr::read_csv(file.path(tmpdir, 'tmp.csv'),
                      col_types = cols(.default = 'c'))
cw
rm(tmpdir)

## ---------------------------------------------------------------------------------------
## show data frame
df

## ---- echo = FALSE----------------------------------------------------------------------
df <- dff

## ---- echo = FALSE----------------------------------------------------------------------
tmpdir <- tempdir()
cw2 <- readr::read_csv('../tests/testthat/testdata/crosswalk_full.csv',
                       col_types = cols(.default = 'c'))
readr::write_csv(cw2, file.path(tmpdir, 'crosswalk_full.csv'))
cw2

## ---- echo = FALSE----------------------------------------------------------------------
df <- deid_dua(df, existing_crosswalk = file.path(tmpdir, 'crosswalk_full.csv'))
rm(tmpdir)

## ---------------------------------------------------------------------------------------
df

## ---- echo = FALSE----------------------------------------------------------------------
df <- dff

## ---- echo = FALSE----------------------------------------------------------------------
tmpdir <- tempdir()
cw3 <- readr::read_csv('../tests/testthat/testdata/crosswalk_partial.csv',
                       col_types = cols(.default = 'c'))
readr::write_csv(cw3, file.path(tmpdir, 'crosswalk_partial.csv'))
cw3

## ---- echo = FALSE----------------------------------------------------------------------
df <- deid_dua(df, existing_crosswalk = file.path(tmpdir, 'crosswalk_partial.csv'))


## ---------------------------------------------------------------------------------------
df

## ---- echo = FALSE----------------------------------------------------------------------
cw4 <- readr::read_csv(file.path(tmpdir, 'crosswalk_partial.csv'),
                       col_types = cols(.default = 'c'))
rm(tmpdir)
cw4

## ---------------------------------------------------------------------------------------
## write data to disk with one last check
write_dua_df(df, 'cleaned_data.csv', output_type = 'csv')

## ---------------------------------------------------------------------------------------
## check
check_dua_restrictions(df)

## ---------------------------------------------------------------------------------------
## remove restricted columns
df <- df %>% select(-c(sname, dob, tname, zip))

## check again
check_dua_restrictions(df)

## ---------------------------------------------------------------------------------------
df

## ---- eval = FALSE----------------------------------------------------------------------
#  ## write data to disk
#  write_dua_df(df, 'cleaned_data_lev_ii.csv', output_type = 'csv')

## ---- eval = FALSE----------------------------------------------------------------------
#  ## save template to disk
#  make_dua_template('clean_data.R')

## ---- echo = FALSE----------------------------------------------------------------------
file <- file.path(tempdir(), 'clean_data.R')
make_dua_template(file, answer_list = list('N','','N','',''))
writeLines(readLines(file))

Try the duawranglr package in your browser

Any scripts or data that you put into this service are public.

duawranglr documentation built on April 15, 2021, 5:06 p.m.