RISE_ID/Deidentification/O3_baseline_ID-deidentify_data.R

library(digest)

rm(list = ls())

setwd("Z:/Data Files/Practice Data/ID_T0_baseline")
#setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190930_child sampling/3. Data/1. raw data")

house <- read_csv (file="RISE_baseline_house_ID.csv")
house_water <- read_csv (file="RISE_baseline_house_ID-house_survey-water_use-water_repeat.csv")

consent_final <- read_csv (file="consent_ID_final.csv")
consent_form3 <- read_csv (file="consent_ID_final-consent_form3.csv")
consent_childname <- read_csv (file="consent_ID_final-consent3_childname.csv")

hhd <- read_csv (file="RISE_baseline_hhd_ID.csv")
hhd_activity <- read_csv (file="RISE_baseline_hhd_ID-hhd_survey-demographics-activity.csv")
hhd_daycare <- read_csv (file="RISE_baseline_hhd_ID-hhd_survey-demographics-daycare.csv")
hhd_ethnicity <- read_csv (file="RISE_baseline_hhd_ID-hhd_survey-demographics-ethnicity_screen-ethnicity_repeat.csv")
hhd_marital_status <- read_csv (file="RISE_baseline_hhd_ID-hhd_survey-demographics-marital_status1.csv")
hhd_read <- read_csv (file="RISE_baseline_hhd_ID-hhd_survey-demographics-read.csv")
hhd_religion <- read_csv (file="RISE_baseline_hhd_ID-hhd_survey-demographics-religion_screen-religion_repeat.csv")
hhd_school <- read_csv (file="RISE_baseline_hhd_ID-hhd_survey-demographics-school.csv")
hhd_person_details <- read_csv (file="RISE_baseline_hhd_ID-hhd_survey-person_details1.csv")

temp_house <- house
temp_house_water <- house_water

temp_consent_final <- consent_final
temp_consent_form3 <- consent_form3
temp_childname <- consent_childname

temp_hhd <- hhd
temp_hhd_activity <- hhd_activity
temp_hhd_daycare <- hhd_daycare
temp_hhd_ethnicity <- hhd_ethnicity
temp_hhd_marital_status <- hhd_marital_status
temp_hhd_read <- hhd_read
temp_hhd_religion <- hhd_religion
temp_hhd_school <- hhd_school
temp_hhd_person_details <- hhd_person_details

hashed_id <- function(x, salt) {
  y <- paste(x, salt)
  y <- sapply(y, function(X) digest(X, algo="md5",serialize = FALSE))
  as.character(y)
}

###############################################################################
#
# # HOUSE SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in house survey - Jeff F
house_names <- house %>%
  select (devicephonenum, return_phone, return_phone2, name_surveyor_other,
          hhdhead_record_name, hhdhead_pl, name_head:return_name, adult_name1,
          return_name2)

for(i in names(house_names)) {
  temp_house[[i]] <- ifelse(!is.na(house_names[[i]]), hashed_id(house_names[[i]], "RISE"), NA)
}

#de-identify any text fields in house survey - Jeff F
house_text <- house %>%
  select (note_why_no_participate1, note_why_no_participate2, survey_status_other)

for(i in names(house_text)) {
  temp_house[[i]] <- ifelse(!is.na(house_text[[i]]), hashed_id(house_text[[i]], "RISE"), NA)
}

#de-identify any house numbers/names in house survey - Jeff F
house_house <- house %>%
  select (settlement_barcode, settlement_pl, houseno_pl, instanceName)

for(i in names(house_house)) {
  temp_house[[i]] <- ifelse(!is.na(house_house[[i]]), hashed_id(house_house[[i]], "RISE"), NA)
}

write_csv(temp_house, path = "Z:/Data Files/Practice Data/Deidentification/ID_T0_baseline/O3_deidentified_RISE_baseline_house_ID.csv")

###############################################################################
#
# # HOUSE WATER SURVEY DE-IDENTIFICATION
#
###############################################################################

# There is no information that needs to be deidentified in this survey

write_csv(temp_house_water, path = "Z:/Data Files/Practice Data/Deidentification/ID_T0_baseline/O3_deidentified_RISE_baseline_house_ID-house_survey-water_use-water_repeat.csv")

###############################################################################
#
# # CONSENT FINAL SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in consent_final survey - Jeff F
consent_final_names <- consent_final %>%
  select (devicephonenum, phone_y_no, name_surveyor_other, hhd_head_name)

for(i in names(consent_final_names)) {
  temp_consent_final[[i]] <- ifelse(!is.na(consent_final_names[[i]]), hashed_id(consent_final_names[[i]], "RISE"), NA)
}

#de-identify any house numbers/names in consent_final survey - Jeff F
consent_final_house <- consent_final %>%
  select (settlement_name)

for(i in names(consent_final_house)) {
  temp_consent_final[[i]] <- ifelse(!is.na(consent_final_house[[i]]), hashed_id(consent_final_house[[i]], "RISE"), NA)
}

write_csv(temp_consent_final, path = "Z:/Data Files/Practice Data/Deidentification/ID_T0_baseline/O3_deidentified_consent_ID_final.csv")

###############################################################################
#
# # CONSENT FORM3 SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in consent form3 survey - Jeff F
consent_form3_names <- consent_form3 %>%
  select (guardian_form3)

for(i in names(consent_form3_names)) {
  temp_consent_form3[[i]] <- ifelse(!is.na(consent_form3_names[[i]]), hashed_id(consent_form3_names[[i]], "RISE"), NA)
}

write_csv(temp_consent_form3, path = "Z:/Data Files/Practice Data/Deidentification/ID_T0_baseline/O3_deidentified_consent_ID_final-consent_form3.csv")

###############################################################################
#
# # CONSENT CHILD NAME SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in consent child name survey - Jeff F
consent_childname_names <- consent_childname %>%
  select (child_name)

for(i in names(consent_childname_names)) {
  temp_childname[[i]] <- ifelse(!is.na(consent_childname_names[[i]]), hashed_id(consent_childname_names[[i]], "RISE"), NA)
}

#de-identify dobs in consent child names survey - Jeff F
consent_childnames_dobs <- consent_childname %>%
  select (dob)

for(i in names(consent_childnames_dobs)) {
  temp_childname[[i]] <- ifelse(!is.na(consent_childnames_dobs[[i]]), hashed_id(consent_childnames_dobs[[i]], "RISE"), NA)
}

write_csv(temp_childname, path = "Z:/Data Files/Practice Data/Deidentification/ID_T0_baseline/O3_deidentified_consent_ID_final-consent3_childname.csv")

###############################################################################
#
# # HHD SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in consent child name survey - Jeff F
hhd_names <- hhd %>%
  select (devicephonenum, return_phone, name_surveyor_other, return_name,
          hhdhead_record_name, hhdhead_pl, name_head, hhd_name, name_calc1:name_calc20,
          join_name_list, join_child_under5_list, adult_respondent_name)

for(i in names(hhd_names)) {
  temp_hhd[[i]] <- ifelse(!is.na(hhd_names[[i]]), hashed_id(hhd_names[[i]], "RISE"), NA)
}

#de-identify any text fields in hhd survey - Jeff F
hhd_text <- hhd %>%
  select (note_why_no_participate, note_why_no_participate2, interviewer_notes,
          survey_status_other, respondent_problems_other)

for(i in names(hhd_text)) {
  temp_hhd[[i]] <- ifelse(!is.na(hhd_text[[i]]), hashed_id(hhd_text[[i]], "RISE"), NA)
}

#de-identify any house numbers/names in hhd survey - Jeff F
hhd_house <- hhd %>%
  select (settlement_barcode, settlement_pl, houseno_pl)

for(i in names(hhd_house)) {
  temp_hhd[[i]] <- ifelse(!is.na(hhd_house[[i]]), hashed_id(hhd_house[[i]], "RISE"), NA)
}

write_csv(temp_hhd, path = "Z:/Data Files/Practice Data/Deidentification/ID_T0_baseline/O3_deidentified_RISE_baseline_hhd_ID.csv")

###############################################################################
#
# # HHD ACTIVITY SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in hhd activity survey - Jeff F
activity_names <- hhd_activity %>%
  select (person_name_activity)

for(i in names(activity_names)) {
  temp_hhd_activity[[i]] <- ifelse(!is.na(activity_names[[i]]), hashed_id(activity_names[[i]], "RISE"), NA)
}

write_csv(temp_hhd_activity, path = "Z:/Data Files/Practice Data/Deidentification/ID_T0_baseline/O3_deidentified_RISE_baseline_hhd_ID-hhd_survey-demographics-activity.csv")

###############################################################################
#
# # HHD DAYCARE SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in hhd daycare survey - Jeff F
daycare_names <- hhd_daycare %>%
  select (person_name_daycare)

for(i in names(daycare_names)) {
  temp_hhd_daycare[[i]] <- ifelse(!is.na(daycare_names[[i]]), hashed_id(daycare_names[[i]], "RISE"), NA)
}

write_csv(temp_hhd_daycare, path = "Z:/Data Files/Practice Data/Deidentification/ID_T0_baseline/O3_deidentified_RISE_baseline_hhd_ID-hhd_survey-demographics-daycare.csv")

###############################################################################
#
# # HHD ETHNICITY SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in hhd ethnicity survey - Jeff F
ethnicity_names <- hhd_ethnicity %>%
  select (person_name_ethnicity)

for(i in names(ethnicity_names)) {
  temp_hhd_ethnicity[[i]] <- ifelse(!is.na(ethnicity_names[[i]]), hashed_id(ethnicity_names[[i]], "RISE"), NA)
}

write_csv(temp_hhd_ethnicity, path = "Z:/Data Files/Practice Data/Deidentification/ID_T0_baseline/O3_deidentified_RISE_baseline_hhd_ID-hhd_survey-demographics-ethnicity_screen-ethnicity_repeat.csv")

###############################################################################
#
# # HHD MARITAL STATUS SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in hhd marital status survey - Jeff F
marital_names <- hhd_marital_status %>%
  select (person_name_marital)

for(i in names(marital_names)) {
  temp_hhd_marital_status[[i]] <- ifelse(!is.na(marital_names[[i]]), hashed_id(marital_names[[i]], "RISE"), NA)
}

write_csv(temp_hhd_marital_status, path = "Z:/Data Files/Practice Data/Deidentification/ID_T0_baseline/O3_deidentified_RISE_baseline_hhd_ID-hhd_survey-demographics-marital_status1.csv")

###############################################################################
#
# # HHD READ SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in hhd read survey - Jeff F
read_names <- hhd_read %>%
  select (person_name_read)

for(i in names(read_names)) {
  temp_hhd_read[[i]] <- ifelse(!is.na(read_names[[i]]), hashed_id(read_names[[i]], "RISE"), NA)
}

write_csv(temp_hhd_read, path = "Z:/Data Files/Practice Data/Deidentification/ID_T0_baseline/O3_deidentified_RISE_baseline_hhd_ID-hhd_survey-demographics-read.csv")

###############################################################################
#
# # HHD RELIGION SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in hhd religion survey - Jeff F
religion_names <- hhd_religion %>%
  select (person_name_religion)

for(i in names(religion_names)) {
  temp_hhd_religion[[i]] <- ifelse(!is.na(religion_names[[i]]), hashed_id(religion_names[[i]], "RISE"), NA)
}

write_csv(temp_hhd_religion, path = "Z:/Data Files/Practice Data/Deidentification/ID_T0_baseline/O3_deidentified_RISE_baseline_hhd_ID-hhd_survey-demographics-religion_screen-religion_repeat.csv")

###############################################################################
#
# # HHD SCHOOL SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in hhd school survey - Jeff F
school_names <- hhd_school %>%
  select (person_name_school)

for(i in names(school_names)) {
  temp_hhd_school[[i]] <- ifelse(!is.na(school_names[[i]]), hashed_id(school_names[[i]], "RISE"), NA)
}

write_csv(temp_hhd_school, path = "Z:/Data Files/Practice Data/Deidentification/ID_T0_baseline/O3_deidentified_RISE_baseline_hhd_ID-hhd_survey-demographics-school.csv")

###############################################################################
#
# # HHD PERSON DETAILS SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in hhd person details survey - Jeff F
person_names <- hhd_person_details %>%
  select (person_name, person_name_last, concat_name_age:child_under5_list)

for(i in names(person_names)) {
  temp_hhd_person_details[[i]] <- ifelse(!is.na(person_names[[i]]), hashed_id(person_names[[i]], "RISE"), NA)
}

#de-identify dobs in hhd person details survey - Jeff F
person_dobs <- hhd_person_details %>%
  select (dob)

for(i in names(person_dobs)) {
  temp_hhd_person_details[[i]] <- ifelse(!is.na(person_dobs[[i]]), hashed_id(person_dobs[[i]], "RISE"), NA)
}

write_csv(temp_hhd_person_details, path = "Z:/Data Files/Practice Data/Deidentification/ID_T0_baseline/O3_deidentified_RISE_baseline_hhd_ID-hhd_survey-person_details1.csv")
Monash-RISE/riseR documentation built on Dec. 11, 2019, 9:49 a.m.