RISE_ID/Deidentification/O3&4_household&feces_ID-deidentify_data.R

library(digest)

rm(list = ls())

setwd("Z:/Data Files/Practice Data/ID_T1_4_household_feces_v2")
#setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190930_child sampling/3. Data/1. raw data")

household <- read_csv (file="ID_201905_O3_4_household.csv")
household_child <- read_csv (file="ID_201905_O3_4_household-child_loop.csv")
household_feces_new <- read_csv (file="ID_201905_O3_4_household-feces_kit-children_new.csv")
household_feces_combined <- read_csv (file="ID_201905_O3_4_household-feces_kit-combined_feces.csv")
household_feces_child <- read_csv (file="ID_201905_O3_4_household-feces_kit-feces_child.csv")
household_hhd_filters <- read_csv (file="ID_201905_O3_4_household-hhd_members-filters_revise.csv")
household_hhd_departed <- read_csv (file="ID_201905_O3_4_household-hhd_members-reason_departed.csv")

feces_v2 <- read_csv (file="RISE_O3_feces_ID_v2.csv")
feces.sample_v2 <- read_csv (file="RISE_O3_feces_ID_v2-feces_collection.csv")

temp_household <- household
temp_child <- household_child
temp_feces_new <- household_feces_new
temp_feces_combined <- household_feces_combined
temp_feces_child <- household_feces_child
temp_hhd_filters <- household_hhd_filters
temp_hhd_departed <- household_hhd_departed

temp_feces_v2 <- feces_v2
temp_feces.sample_v2 <- feces.sample_v2

hashed_id <- function(x, salt) {
  y <- paste(x, salt)
  y <- sapply(y, function(X) digest(X, algo="md5",serialize = FALSE))
  as.character(y)
}

###############################################################################
#
# # HOUSEHOLD SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in household survey - Jeff F
household_names <- household %>%
  select (devicephonenum, name_surveyor_other, hhd_id, respondent_pl,
          concat_respondent_age, caregiver1_pl, concat_caregiver1_age,
          caregiver2_pl, concat_caregiver2_age, under5_names_list:baseline_names_list,
          name1:name40, caregiver3_name, respondent_name, under5_names_list2,
          age5to15_names_list2, feces_respondent, respondent_name2)

for(i in names(household_names)) {
  temp_household[[i]] <- ifelse(!is.na(household_names[[i]]), hashed_id(household_names[[i]], "RISE"), NA)
}

#de-identify dobs in household survey - Jeff F
household_dobs <- household %>%
  select (respondent_dob_pl, caregiver1_dob_pl, caregiver2_dob_pl)

for(i in names(household_dobs)) {
  temp_household[[i]] <- ifelse(!is.na(household_dobs[[i]]), hashed_id(household_dobs[[i]], "RISE"), NA)
}

#de-identify any text fields in household survey - Jeff F
household_text <- household %>%
  select (note_why_no_participate2, survey_status_other, respondent_problems_other,
          interviewer_notes)

for(i in names(household_text)) {
  temp_household[[i]] <- ifelse(!is.na(household_text[[i]]), hashed_id(household_text[[i]], "RISE"), NA)
}

#de-identify any house numbers in household survey - Jeff F
household_house <- household %>%
  select (settlement_barcode, instanceName)

for(i in names(household_house)) {
  temp_household[[i]] <- ifelse(!is.na(household_house[[i]]), hashed_id(household_house[[i]], "RISE"), NA)
}

write_csv(temp_household, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_4_household_feces_v2/O3_4_deidentified_household_ID.csv")

###############################################################################
#
# # HOUSEHOLD CHILD LOOP SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in household child loop survey - Jeff F
household_child_names <- household_child %>%
  select (child_name2, name_child)

for(i in names(household_child_names)) {
  temp_child[[i]] <- ifelse(!is.na(household_child_names[[i]]), hashed_id(household_child_names[[i]], "RISE"), NA)
}

#de-identify dobs in household child loop survey - Jeff F
household_child_dobs <- household_child %>%
  select (child_dob)

for(i in names(household_child_dobs)) {
  temp_child[[i]] <- ifelse(!is.na(household_child_dobs[[i]]), hashed_id(household_child_dobs[[i]], "RISE"), NA)
}

#de-identify any text fields in household child loop survey - Jeff F
household_child_text <- household_child %>%
  select (child_no_survey_why)

for(i in names(household_child_text)) {
  temp_child[[i]] <- ifelse(!is.na(household_child_text[[i]]), hashed_id(household_child_text[[i]], "RISE"), NA)
}

write_csv(temp_child, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_4_household_feces_v2/O3_4_deidentified_household-child_loop_ID.csv")

###############################################################################
#
# # HOUSEHOLD FECES CHILDREN NEW SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in household feces new survey - Jeff F
feces_new_names <- household_feces_new %>%
  select (child_name_text, name_feces2)

for(i in names(feces_new_names)) {
  temp_feces_new[[i]] <- ifelse(!is.na(feces_new_names[[i]]), hashed_id(feces_new_names[[i]], "RISE"), NA)
}

#de-identify dobs in household feces new survey - Jeff F
feces_new_names_dobs <- household_feces_new %>%
  select (child_dob1)

for(i in names(feces_new_names_dobs)) {
  temp_feces_new[[i]] <- ifelse(!is.na(feces_new_names_dobs[[i]]), hashed_id(feces_new_names_dobs[[i]], "RISE"), NA)
}

write_csv(temp_feces_new, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_4_household_feces_v2/O3_4_deidentified_household-feces_kit-children_new_ID.csv")

###############################################################################
#
# # HOUSEHOLD FECES CHILDREN KIT COMBINED FECECS DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in household feces combined survey - Jeff F
feces_combined_names <- household_feces_combined %>%
  select (combined_names)

for(i in names(feces_combined_names)) {
  temp_feces_combined[[i]] <- ifelse(!is.na(feces_combined_names[[i]]), hashed_id(feces_combined_names[[i]], "RISE"), NA)
}

#de-identify dobs in household feces combined survey - Jeff F
feces_combined_dobs <- household_feces_combined %>%
  select (combined_dob)

for(i in names(feces_combined_dobs)) {
  temp_feces_combined[[i]] <- ifelse(!is.na(feces_combined_dobs[[i]]), hashed_id(feces_combined_dobs[[i]], "RISE"), NA)
}

write_csv(temp_feces_combined, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_4_household_feces_v2/O3_4_deidentified_household-feces_kit-combined_feces_ID.csv")

###############################################################################
#
# # HOUSEHOLD FECES KIT FECES CHILD DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in household feces child survey - Jeff F
feces_child_names <- household_feces_child %>%
  select (name_feces)

for(i in names(feces_child_names)) {
  temp_feces_child[[i]] <- ifelse(!is.na(feces_child_names[[i]]), hashed_id(feces_child_names[[i]], "RISE"), NA)
}

#de-identify dobs in household feces child new survey - Jeff F
feces_child_dobs <- household_feces_child %>%
  select (dob_feces)

for(i in names(feces_child_dobs)) {
  temp_feces_child[[i]] <- ifelse(!is.na(feces_child_dobs[[i]]), hashed_id(feces_child_dobs[[i]], "RISE"), NA)
}

write_csv(temp_feces_child, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_4_household_feces_v2/O3_4_deidentified_household-feces_kit-feces_child_ID.csv")

###############################################################################
#
# # HOUSEHOLD HHD MEMBERS FILTER REVICE DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in household hhd filters survey - Jeff F
hhd_filters_names <- household_hhd_filters %>%
  select (all_names, all_names2, under5_names2, age5to15_names2)

for(i in names(hhd_filters_names)) {
  temp_hhd_filters[[i]] <- ifelse(!is.na(hhd_filters_names[[i]]), hashed_id(hhd_filters_names[[i]], "RISE"), NA)
}

write_csv(temp_hhd_filters, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_4_household_feces_v2/O3_4_deidentified_household-hhd_members-filters_revise_ID.csv")

###############################################################################
#
# # HOUSEHOLD HHD MEMBERS REASON DEPARTED DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in household hhd departed survey - Jeff F
hhd_departed_names <- household_hhd_departed %>%
  select (name_departed2)

for(i in names(hhd_departed_names)) {
  temp_hhd_departed[[i]] <- ifelse(!is.na(hhd_departed_names[[i]]), hashed_id(hhd_departed_names[[i]], "RISE"), NA)
}

write_csv(temp_hhd_departed, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_4_household_feces_v2/O3_4_deidentified_household-hhd_members-reason_departed_ID.csv")

###############################################################################
#
# # FECES SURVEY V2 DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in feces v2 survey - Jeff F
feces_v2_names <- feces_v2 %>%
  select (devicephonenum, name_surveyor_other, name1:name40)

for(i in names(feces_v2_names)) {
  temp_feces_v2[[i]] <- ifelse(!is.na(feces_v2_names[[i]]), hashed_id(feces_v2_names[[i]], "RISE"), NA)
}

#de-identify any text fields in feces v2 survey - Jeff F
feces_v2_text <- feces_v2 %>%
  select (survey_status_other)

for(i in names(feces_v2_text)) {
  temp_feces_v2[[i]] <- ifelse(!is.na(feces_v2_text[[i]]), hashed_id(feces_v2_text[[i]], "RISE"), NA)
}

#de-identify any house numbers in feces v2 survey - Jeff F
feces_v2_house <- feces_v2 %>%
  select (settlement_barcode, instanceName)

for(i in names(feces_v2_house)) {
  temp_feces_v2[[i]] <- ifelse(!is.na(feces_v2_house[[i]]), hashed_id(feces_v2_house[[i]], "RISE"), NA)
}

write_csv(temp_feces_v2, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_4_household_feces_v2/O3_deidentified_feces_ID_v2.csv")

###############################################################################
#
# # FECES.SAMPLE V2 SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in feces sample v2 survey - Jeff F
feces.sample_v2_names <- feces.sample_v2 %>%
  select (respondent_name2, respondent_name_text, respondent_name,
          return_name, return_phone)

for(i in names(feces.sample_v2_names)) {
  temp_feces.sample_v2[[i]] <- ifelse(!is.na(feces.sample_v2_names[[i]]), hashed_id(feces.sample_v2_names[[i]], "RISE"), NA)
}

#de-identify dobs in feces sample v2 survey - Jeff F
feces.sample_v2_dobs <- feces.sample_v2 %>%
  select (respondent_dob)

for(i in names(feces.sample_v2_dobs)) {
  temp_feces.sample_v2[[i]] <- ifelse(!is.na(feces.sample_v2_dobs[[i]]), hashed_id(feces.sample_v2_dobs[[i]], "RISE"), NA)
}

#de-identify any text fields in feces survey - Jeff F
feces.sample_v2_text <- feces.sample_v2 %>%
  select (feces_comments)

for(i in names(feces.sample_v2_text)) {
  temp_feces.sample_v2[[i]] <- ifelse(!is.na(feces.sample_v2_text[[i]]), hashed_id(feces.sample_v2_text[[i]], "RISE"), NA)
}

write_csv(temp_feces.sample_v2, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_4_household_feces_v2/O3_deidentified_feces_ID_v2-feces_collection.csv")
Monash-RISE/riseR documentation built on Dec. 11, 2019, 9:49 a.m.