RISE_ID/Deidentification/O3_child_maternal_stool_ID-deidentify_data.R

library(digest)

rm(list = ls())

setwd("Z:/Data Files/Practice Data/ID_T1_child_maternal_stool")
#setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190930_child sampling/3. Data/1. raw data")

stool <- read_csv (file="ID_201909_O3_stool.csv")
stool_child <- read_csv (file="ID_201909_O3_stool-child_loop.csv")
stool_combo_names <- read_csv (file="ID_201909_O3_stool-combined1_names.csv")
stool_hhd_filters <- read_csv (file="ID_201909_O3_stool-hhd_members-filters_revise.csv")
stool_hhd_departed <- read_csv (file="ID_201909_O3_stool-hhd_members-reason_departed.csv")
stool_new_people <- read_csv (file="ID_201909_O3_stool-new_people-combined2_names.csv")
stool_new_people2 <- read_csv (file="ID_201909_O3_stool-new_people-new_people2.csv")
stool_new_people3 <- read_csv (file="ID_201909_O3_stool-new_people-new_people3-new_revise.csv")

feces_v3 <- read_csv (file="RISE_O3_feces_ID_v3.csv")
feces.sample_v3 <- read_csv (file="RISE_O3_feces_ID_v3-feces_collection.csv")

temp_stool <- stool
temp_child <- stool_child
temp_names <- stool_combo_names
temp_hhd_filters <- stool_hhd_filters
temp_hhd_departed <- stool_hhd_departed
temp_people <- stool_new_people
temp_people2 <- stool_new_people2
temp_people3 <- stool_new_people3

temp_feces_v3 <- feces_v3
temp_feces.sample_v3 <- feces.sample_v3

hashed_id <- function(x, salt) {
  y <- paste(x, salt)
  y <- sapply(y, function(X) digest(X, algo="md5",serialize = FALSE))
  as.character(y)
}

###############################################################################
#
# # STOOL SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in stool survey - Jeff F
stool_names <- stool %>%
  select (devicephonenum, phone_yn, phone_y_no, name_surveyor_other, hhd_id,
          hhd_head_name, hhd_name, under5_names_list, female18to40_names_list,
          name1:name40, all_names_list1, new_name1:new_name40, new_hhd_id,
          new_all_names_list1, all_names_new_people2, all_names_new_people_combo,
          all_names_combined1, names_under5_no_consent, person_under5_names_list,
          person_female18to40_names_list, final_name1:final_name40, caregiver_name,
          feces_select_maternal2, feces_name_maternal)

for(i in names(stool_names)) {
  temp_stool[[i]] <- ifelse(!is.na(stool_names[[i]]), hashed_id(stool_names[[i]], "RISE"), NA)
}

#de-identify any text fields in stool survey - Jeff F
stool_text <- stool %>%
  select (note_why_not_ready, note_why_no_participate2, survey_status_other)

for(i in names(stool_text)) {
  temp_stool[[i]] <- ifelse(!is.na(stool_text[[i]]), hashed_id(stool_text[[i]], "RISE"), NA)
}

#de-identify any house numbers in stool survey - Jeff F
stool_house <- stool %>%
  select (new_people_move1, new_people_move2, settlement_barcode, instanceName)

for(i in names(stool_house)) {
  temp_stool[[i]] <- ifelse(!is.na(stool_house[[i]]), hashed_id(stool_house[[i]], "RISE"), NA)
}

write_csv(temp_stool, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_child_maternal_stool/O3_deidentified_stool_ID_201909.csv")

###############################################################################
#
# # STOOL CHILD LOOP SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in stool child loop survey - Jeff F
stool_child_names <- stool_child %>%
  select (name_child)

for(i in names(stool_child_names)) {
  temp_child[[i]] <- ifelse(!is.na(stool_child_names[[i]]), hashed_id(stool_child_names[[i]], "RISE"), NA)
}

#de-identify any text fields in stool child loop survey - Jeff F
stool_child_text <- stool_child %>%
  select (child_no_survey_why)

for(i in names(stool_child)) {
  temp_child[[i]] <- ifelse(!is.na(stool_child[[i]]), hashed_id(stool_child[[i]], "RISE"), NA)
}

write_csv(temp_child, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_child_maternal_stool/O3_deidentified_stool-child_loop_ID_201909.csv")

###############################################################################
#
# # STOOL COMBINED NAMES SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in stool combined names survey - Jeff F
combo_names <- stool_combo_names %>%
  select (combined_names, combined_names2, combined_names3, combined_names4,
          person_under5_no_consent_names, person_under5_names,
          person_female18to40_names)

for(i in names(combo_names)) {
  temp_names[[i]] <- ifelse(!is.na(combo_names[[i]]), hashed_id(combo_names[[i]], "RISE"), NA)
}

#de-identify dobs in stool combined names survey - Jeff F
combo_dobs <- stool_combo_names %>%
  select (combined_dob)

for(i in names(combo_dobs)) {
  temp_names[[i]] <- ifelse(!is.na(combo_dobs[[i]]), hashed_id(combo_dobs[[i]], "RISE"), NA)
}

write_csv(temp_names, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_child_maternal_stool/O3_deidentified_stool-combined1_names_ID_201909.csv")

###############################################################################
#
# # STOOL HHD MEMBERS FILTERS REVISE SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in stool hhd filters survey - Jeff F
hhd_filters_names <- stool_hhd_filters %>%
  select (all_names, all_names2, all_names3,	all_names4)

for(i in names(hhd_filters_names)) {
  temp_hhd_filters[[i]] <- ifelse(!is.na(hhd_filters_names[[i]]), hashed_id(hhd_filters_names[[i]], "RISE"), NA)
}

#de-identify dobs in stool hhd filters survey - Jeff F
hhd_filters_dobs <- stool_hhd_filters %>%
  select (all_dob)

for(i in names(hhd_filters_dobs)) {
  temp_hhd_filters[[i]] <- ifelse(!is.na(hhd_filters_dobs[[i]]), hashed_id(hhd_filters_dobs[[i]], "RISE"), NA)
}

write_csv(temp_hhd_filters, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_child_maternal_stool/O3_deidentified_stool-hhd_members-filters_revise_ID_201909.csv")

###############################################################################
#
# # STOOL HHD MEMBERS REASON DEPARTED SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in stool hhd departed survey - Jeff F
hhd_departed_names <- stool_hhd_departed %>%
  select (name_departed2)

for(i in names(hhd_departed_names)) {
  temp_hhd_departed[[i]] <- ifelse(!is.na(hhd_departed_names[[i]]), hashed_id(hhd_departed_names[[i]], "RISE"), NA)
}

write_csv(temp_hhd_departed, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_child_maternal_stool/O3_deidentified_stool-hhd_members-reason_departed_ID_201909.csv")

###############################################################################
#
# # STOOL NEW PEOPLE COMBINED2 SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in stool new people combo2 survey - Jeff F
people_names <- stool_new_people %>%
  select (combined2_names1, combined2_names2, combined2_names3)

for(i in names(people_names)) {
  temp_people[[i]] <- ifelse(!is.na(people_names[[i]]), hashed_id(people_names[[i]], "RISE"), NA)
}

#de-identify dobs in stool new people combo2 survey survey - Jeff F
people_dobs <- stool_new_people %>%
  select (combined2_dob)

for(i in names(people_dobs)) {
  temp_people[[i]] <- ifelse(!is.na(people_dobs[[i]]), hashed_id(people_dobs[[i]], "RISE"), NA)
}

write_csv(temp_people, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_child_maternal_stool/O3_deidentified_stool-new_people-combined2_names_ID_201909.csv")

###############################################################################
#
# # STOOL NEW PEOPLE2 SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in stool new people2 survey - Jeff F
people2_names <- stool_new_people2 %>%
  select (person_name_first, person_name_last, person_name_full,
          person_name_age)

for(i in names(people2_names)) {
  temp_people2[[i]] <- ifelse(!is.na(people2_names[[i]]), hashed_id(people2_names[[i]], "RISE"), NA)
}

#de-identify dobs in stool new people2 survey survey - Jeff F
people2_dobs <- stool_new_people2 %>%
  select (person_dob)

for(i in names(people2_dobs)) {
  temp_people2[[i]] <- ifelse(!is.na(people2_dobs[[i]]), hashed_id(people2_dobs[[i]], "RISE"), NA)
}

write_csv(temp_people2, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_child_maternal_stool/O3_deidentified_stool-new_people-new_people2_ID_201909.csv")

###############################################################################
#
# # STOOL NEW PEOPLE3 SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in stool new people3 survey - Jeff F
people3_names <- stool_new_people3 %>%
  select (new_all_names, new_all_names2, new_all_names3)

for(i in names(people3_names)) {
  temp_people3[[i]] <- ifelse(!is.na(people3_names[[i]]), hashed_id(people3_names[[i]], "RISE"), NA)
}

#de-identify dobs in stool new people3 survey survey - Jeff F
people3_dobs <- stool_new_people3 %>%
  select (new_all_dob)

for(i in names(people3_dobs)) {
  temp_people3[[i]] <- ifelse(!is.na(people3_dobs[[i]]), hashed_id(people3_dobs[[i]], "RISE"), NA)
}

write_csv(temp_people3, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_child_maternal_stool/O3_deidentified_stool-new_people-new_people3-new_revise_ID_201909.csv")

###############################################################################
#
# # FECES SURVEY V3 DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in feces v3 survey - Jeff F
feces_v3_names <- feces_v3 %>%
  select (devicephonenum, name_surveyor_other, name1:name40,
          maternal_respondent_name2, maternal_respondent_name_text,
          maternal_respondent_name)

for(i in names(feces_v3_names)) {
  temp_feces_v3[[i]] <- ifelse(!is.na(feces_v3_names[[i]]), hashed_id(feces_v3_names[[i]], "RISE"), NA)
}

#de-identify dobs in feces sample v3 survey - Jeff F
feces_v3_dobs <- feces_v3 %>%
  select (maternal_respondent_dob)

for(i in names(feces_v3_dobs)) {
  temp_feces_v3[[i]] <- ifelse(!is.na(feces_v3_dobs[[i]]), hashed_id(feces_v3_dobs[[i]], "RISE"), NA)
}

#de-identify any text fields in feces v3 survey - Jeff F
feces_v3_text <- feces_v3 %>%
  select (survey_status_other)

for(i in names(feces_v3_text)) {
  temp_feces_v3[[i]] <- ifelse(!is.na(feces_v3_text[[i]]), hashed_id(feces_v3_text[[i]], "RISE"), NA)
}

#de-identify any house numbers in feces v3 survey - Jeff F
feces_v3_house <- feces_v3 %>%
  select (settlement_barcode, instanceName)

for(i in names(feces_v3_house)) {
  temp_feces_v3[[i]] <- ifelse(!is.na(feces_v3_house[[i]]), hashed_id(feces_v3_house[[i]], "RISE"), NA)
}

write_csv(temp_feces_v3, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_child_maternal_stool/O3_deidentified_feces_ID_v3_201909.csv")

###############################################################################
#
# # FECES.SAMPLE V3 SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in feces sample v3 survey - Jeff F
feces.sample_v3_names <- feces.sample_v3 %>%
  select (respondent_name2, respondent_name_first, respondent_name_last,
          respondent_name_text, respondent_name, return_name, return_phone)

for(i in names(feces.sample_v3_names)) {
  temp_feces.sample_v3[[i]] <- ifelse(!is.na(feces.sample_v3_names[[i]]), hashed_id(feces.sample_v3_names[[i]], "RISE"), NA)
}

#de-identify dobs in feces sample v3 survey - Jeff F
feces.sample_v3_dobs <- feces.sample_v3 %>%
  select (respondent_dob)

for(i in names(feces.sample_v3_dobs)) {
  temp_feces.sample_v3[[i]] <- ifelse(!is.na(feces.sample_v3_dobs[[i]]), hashed_id(feces.sample_v3_dobs[[i]], "RISE"), NA)
}

#de-identify any text fields in feces sampe v3 survey - Jeff F
feces.sample_v3_text <- feces.sample_v3 %>%
  select (feces_comments)

for(i in names(feces.sample_v3_text)) {
  temp_feces.sample_v3[[i]] <- ifelse(!is.na(feces.sample_v3_text[[i]]), hashed_id(feces.sample_v3_text[[i]], "RISE"), NA)
}

write_csv(temp_feces.sample_v3, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_child_maternal_stool/O3_deidentified_feces_collection_ID_v3_201909.csv")
Monash-RISE/riseR documentation built on Dec. 11, 2019, 9:49 a.m.