RISE_ID/Deidentification/O3&4_annual_report_ID-deidentify_data.R

library(digest)

rm(list = ls())

setwd("Z:/Data Files/Practice Data/ID_T4_annual_v1")
#setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190930_child sampling/3. Data/1. raw data")

annual <- read_csv (file="ID_T4_annual_v1.csv")
annual_feces_kit <- read_csv (file="ID_T4_annual_v1-feces_kit.csv")
annual_hhd_members_departed <- read_csv (file="ID_T4_annual_v1-hhd_members-reason_departed.csv")
annual_household_depart <- read_csv (file="ID_T4_annual_v1-household-hhds_depart.csv")
annual_new_people <- read_csv (file="ID_T4_annual_v1-new_people-new_people2.csv")
annual_person_details <- read_csv (file="ID_T4_annual_v1-person_details1.csv")

temp_annual <- annual
temp_feces_kit <- annual_feces_kit
temp_hhd_members_departed <- annual_hhd_members_departed
temp_household_depart <- annual_household_depart
temp_new_people <- annual_new_people
temp_person_details <- annual_person_details

hashed_id <- function(x, salt) {
  y <- paste(x, salt)
  y <- sapply(y, function(X) digest(X, algo="md5",serialize = FALSE))
  as.character(y)
}

###############################################################################
#
# # ANNUAL SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in annual survey - Jeff F
annual_names <- annual %>%
  select (devicephonenum, phone_yn, phone_y_no, name_surveyor_other,
          hhd_id1:hhd_id4, hhd_id_name1, hhd_id_name2, hhd_id_name, hhd_head_name,
          hhd_name, name1:name40, all_names_list1, new_hhd_id, new_name1:new_name40,
          new_all_names_list1, all_names_new_people2, all_names_combined1,
          final_name1:final_name40, respondent1_name, respondent2_name,
          respondent_name_text, respondent_name2)

for(i in names(annual_names)) {
  temp_annual[[i]] <- ifelse(!is.na(annual_names[[i]]), hashed_id(annual_names[[i]], "RISE"), NA)
}

#de-identify any text fields in annual survey - Jeff F
annual_text <- annual %>%
  select (house_status, adult_no_survey_why, survey_status_other,
          respondent_problems_other, interviewer_notes, note_why_no_participate,
          note_why_no_participate2)

for(i in names(annual_text)) {
  temp_annual[[i]] <- ifelse(!is.na(annual_text[[i]]), hashed_id(annual_text[[i]], "RISE"), NA)
}

#de-identify any house numbers/names in annual survey - Jeff F
annual_house <- annual %>%
  select (settlement_barcode, barcode_prev_yn, barcode_yn:barcode_location_other,
          instanceName)

for(i in names(annual_house)) {
  temp_annual[[i]] <- ifelse(!is.na(annual_house[[i]]), hashed_id(annual_house[[i]], "RISE"), NA)
}

write_csv(temp_annual, path = "Z:/Data Files/Practice Data/Deidentification/ID_T4_annual_v1/O3&4_deidentified_ID_T4_annual_v1.csv")

###############################################################################
#
# # ANNUAL FECES KIT SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in annual feces kit survey - Jeff F
feces_kits_names <- annual_feces_kit %>%
  select (name_feces, guardian, guardian_name, guardian_form3)

for(i in names(feces_kits_names)) {
  temp_feces_kit[[i]] <- ifelse(!is.na(feces_kits_names[[i]]), hashed_id(feces_kits_names[[i]], "RISE"), NA)
}

#de-identify dobs in annual feces kit survey - Jeff F
feces_kits_dobs <- annual_feces_kit %>%
  select (dob_feces)

for(i in names(feces_kits_dobs)) {
  temp_feces_kit[[i]] <- ifelse(!is.na(feces_kits_dobs[[i]]), hashed_id(feces_kits_dobs[[i]], "RISE"), NA)
}

#de-identify any text fields in annual feces kit survey - Jeff F
feces_kits_text <- annual_feces_kit %>%
  select (child_no_feces_why)

for(i in names(feces_kits_text)) {
  temp_feces_kit[[i]] <- ifelse(!is.na(feces_kits_text[[i]]), hashed_id(feces_kits_text[[i]], "RISE"), NA)
}

write_csv(temp_feces_kit, path = "Z:/Data Files/Practice Data/Deidentification/ID_T4_annual_v1/O3&4_deidentified_ID_T4_annual_v1-feces_kit.csv")

###############################################################################
#
# # ANNUAL HHD MEMBERS REASON DEPARTED SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in annual hhd members departed survey - Jeff F
hhd_members_departed_names <- annual_hhd_members_departed %>%
  select (name_departed2)

for(i in names(hhd_members_departed_names)) {
  temp_hhd_members_departed[[i]] <- ifelse(!is.na(hhd_members_departed_names[[i]]), hashed_id(hhd_members_departed_names[[i]], "RISE"), NA)
}

write_csv(temp_hhd_members_departed, path = "Z:/Data Files/Practice Data/Deidentification/ID_T4_annual_v1/O3&4_deidentified_ID_T4_annual_v1-hhd_members-reason_departed.csv")

###############################################################################
#
# # ANNUAL HOUSEHOLD HHDS DEPARTED SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in annual household departed survey - Jeff F
household_depart_names <- annual_household_depart %>%
  select (name_hhd_id_departed)

for(i in names(household_depart_names)) {
  temp_household_depart[[i]] <- ifelse(!is.na(household_depart_names[[i]]), hashed_id(household_depart_names[[i]], "RISE"), NA)
}

write_csv(temp_household_depart, path = "Z:/Data Files/Practice Data/Deidentification/ID_T4_annual_v1/O3&4_deidentified_ID_T4_annual_v1-household-hhds_depart.csv")

###############################################################################
#
# # ANNUAL NEW PEOPLE SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in annual new people survey - Jeff F
new_people_names <- annual_new_people %>%
  select (person_name_first:person_name_full, person_name_age)

for(i in names(new_people_names)) {
  temp_new_people[[i]] <- ifelse(!is.na(new_people_names[[i]]), hashed_id(new_people_names[[i]], "RISE"), NA)
}

#de-identify dobs in annual new people survey - Jeff F
new_people_dobs <- annual_new_people %>%
  select (person_dob)

for(i in names(new_people_dobs)) {
  temp_new_people[[i]] <- ifelse(!is.na(new_people_dobs[[i]]), hashed_id(new_people_dobs[[i]], "RISE"), NA)
}

write_csv(temp_new_people, path = "Z:/Data Files/Practice Data/Deidentification/ID_T4_annual_v1/O3&4_deidentified_ID_T4_annual_v1-new_people-new_people2.csv")

###############################################################################
#
# # ANNUAL PERSON DETAILS SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in annual person details survey - Jeff F
person_details_names <- annual_person_details %>%
  select (person_pull_name)

for(i in names(person_details_names)) {
  temp_person_details[[i]] <- ifelse(!is.na(person_details_names[[i]]), hashed_id(person_details_names[[i]], "RISE"), NA)
}

write_csv(temp_person_details, path = "Z:/Data Files/Practice Data/Deidentification/ID_T4_annual_v1/O3&4_deidentified_ID_T4_annual_v1-person_details1.csv")
Monash-RISE/riseR documentation built on Dec. 11, 2019, 9:49 a.m.