RISE_FJ/Deidentification/O3_child_sampling_FJ-deidentify_data.R

library(digest)

rm(list = ls())
setwd("Z:/Data Files/Data Files Objective 3")
#setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190930_child sampling/3. Data/1. raw data")

child <- read_csv (file="O3_child_sample_FJ_v1.csv")
feces <- read_csv (file="O3_feces_FJ_v1.csv")
feces.sample <- read_csv (file="O3_feces_FJ_v1-feces_collection.csv")

temp_child <- child
temp_feces <- feces
temp_feces.sample <- feces.sample

hashed_id <- function(x, salt) {
  y <- paste(x, salt)
  y <- sapply(y, function(X) digest(X, algo="md5",serialize = FALSE))
  as.character(y)
}

###############################################################################
#
# # CHILD SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in child survey - Jeff F
child_names <- child %>%
  select (devicephonenum, return_phone, phone_yn, phone_y_no,
          name_surveyor_other, return_name, hhd_id, hhd_head_name, hhd_name,
          name1:name40, child_name2, child_name_first, child_name_last,
          child_name_text, child_name_age, child_name, respondent_name2,
          respondent_name_first, respondent_name_last, respondent_name_text,
          person_name_check, respondent_name_age, respondent_name, guardian_form3)

for(i in names(child_names)) {
  temp_child[[i]] <- ifelse(!is.na(child_names[[i]]), hashed_id(child_names[[i]], "RISE"), NA)
}

#de-identify dobs in child survey - Jeff F
child_dobs <- child %>%
  select (dob_pl, child_dob1, child_dob, respondent_dob)

for(i in names(child_dobs)) {
  temp_child[[i]] <- ifelse(!is.na(child_dobs[[i]]), hashed_id(child_dobs[[i]], "RISE"), NA)
}

#de-identify any text fields in child survey - Jeff F
child_text <- child %>%
  select (note_why_not_ready, return_name, feces_kit_barcode_note,
          weigh_problems_text, height_problems_text, blood_comment,
          survey_status_other, interviewer_notes, blood_type_comment,
          blood_spot_comment)

for(i in names(child_text)) {
  temp_child[[i]] <- ifelse(!is.na(child_text[[i]]), hashed_id(child_text[[i]], "RISE"), NA)
}

#de-identify any house numbers in child survey - Jeff F
#child_house <- child %>%
#  select (settlement_barcode)

#for(i in names(child_house)) {
#  temp_child[[i]] <- ifelse(!is.na(child_house[[i]]), hashed_id(child_house[[i]], "RISE"), NA)
#}

write_csv(temp_child, path = "Z:/Data Files/Data Files Objective 3/Deidentified_Data/O3_deidentified_child_sample_FJ_v1.csv")


###############################################################################
#
# # FECES SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in feces survey - Jeff F
feces_names <- feces %>%
  select (devicephonenum, name_surveyor_other, name1:name40)

for(i in names(feces_names)) {
  temp_feces[[i]] <- ifelse(!is.na(feces_names[[i]]), hashed_id(feces_names[[i]], "RISE"), NA)
}

#de-identify any text fields in feces survey - Jeff F
feces_text <- feces %>%
  select (survey_status_other)

for(i in names(feces_text)) {
  temp_feces[[i]] <- ifelse(!is.na(feces_text[[i]]), hashed_id(feces_text[[i]], "RISE"), NA)
}

#de-identify any house numbers in feces survey - Jeff F
#feces_house <- feces %>%
#  select (settlement_barcode)

#for(i in names(feces_house)) {
#  temp_feces[[i]] <- ifelse(!is.na(feces_house[[i]]), hashed_id(feces_house[[i]], "RISE"), NA)
#}

write_csv(temp_feces, path = "Z:/Data Files/Data Files Objective 3/Deidentified_Data/O3_deidentified_feces_FJ_v1.csv")


###############################################################################
#
# # FECES.SAMPLE SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in feces survey - Jeff F
feces.sample_names <- feces.sample %>%
  select (respondent_name2, respondent_name_first, respondent_name_last,
          respondent_name_text, respondent_name, return_name, return_phone)

for(i in names(feces.sample_names)) {
  temp_feces.sample[[i]] <- ifelse(!is.na(feces.sample_names[[i]]), hashed_id(feces.sample_names[[i]], "RISE"), NA)
}

#de-identify dobs in child survey - Jeff F
feces.sample_dobs <- feces.sample %>%
  select (respondent_dob)

for(i in names(feces.sample_dobs)) {
  temp_feces.sample[[i]] <- ifelse(!is.na(feces.sample_dobs[[i]]), hashed_id(feces.sample_dobs[[i]], "RISE"), NA)
}

#de-identify any text fields in feces survey - Jeff F
feces.sample_text <- feces.sample %>%
  select (feces_comments)

for(i in names(feces.sample_text)) {
  temp_feces.sample[[i]] <- ifelse(!is.na(feces.sample_text[[i]]), hashed_id(feces.sample_text[[i]], "RISE"), NA)
}

write_csv(temp_feces.sample, path = "Z:/Data Files/Data Files Objective 3/Deidentified_Data/O3_deidentified_feces_FJ_v1-feces_collection.csv")
Monash-RISE/riseR documentation built on Dec. 11, 2019, 9:49 a.m.