RISE_ID/Deidentification/O3_child_sampling_ID-deidentify_data.R

library(digest)

rm(list = ls())

setwd("Z:/Data Files/Practice Data/ID_T1_child_sampling")
#setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190930_child sampling/3. Data/1. raw data")

child <- read_csv (file="RISE_O3_child_sample_ID_v1.csv")
child.sample <- read_csv (file="RISE_O3_child_sample_ID_v1-child_survey-sampling.csv")
feces <- read_csv (file="RISE_O3_feces_ID_v1.csv")
feces.sample <- read_csv (file="RISE_O3_feces_ID_v1-feces_collection.csv")

temp_child <- child
temp_child.sample <- child.sample
temp_feces <- feces
temp_feces.sample <- feces.sample

hashed_id <- function(x, salt) {
  y <- paste(x, salt)
  y <- sapply(y, function(X) digest(X, algo="md5",serialize = FALSE))
  as.character(y)
}

###############################################################################
#
# # CHILD SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in child survey - Jeff F
child_names <- child %>%
  select (devicephonenum, return_phone, name_surveyor_other, return_name,
          name1:name40)

for(i in names(child_names)) {
  temp_child[[i]] <- ifelse(!is.na(child_names[[i]]), hashed_id(child_names[[i]], "RISE"), NA)
}

#de-identify any text fields in child survey - Jeff F
child_text <- child %>%
  select (note_why_not_ready, explanatory_no_why, survey_status_other,
          interviewer_notes)

for(i in names(child_text)) {
  temp_child[[i]] <- ifelse(!is.na(child_text[[i]]), hashed_id(child_text[[i]], "RISE"), NA)
}

#de-identify any house numbers in child survey - Jeff F
child_house <- child %>%
  select (settlement_barcode)

for(i in names(child_house)) {
  temp_child[[i]] <- ifelse(!is.na(child_house[[i]]), hashed_id(child_house[[i]], "RISE"), NA)
}

write_csv(temp_child, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_child-sampling/O3_deidentified_child_sample_ID_v1.csv")

###############################################################################
#
# # CHILD.SAMPLE SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in child sample survey - Jeff F
child.sample_names <- child.sample %>%
  select (child_name2, child_name_text, child_name, hhdhead_pl, respondent_name2,
          respondent_name_text, respondent_name, )

for(i in names(child.sample_names)) {
  temp_child.sample[[i]] <- ifelse(!is.na(child.sample_names[[i]]), hashed_id(child.sample_names[[i]], "RISE"), NA)
}

#de-identify dobs in child sample survey - Jeff F
child.sample_dobs <- child.sample %>%
  select (dob_pl, child_dob1, child_dob, respondent_dob)

for(i in names(child.sample_dobs)) {
  temp_child.sample[[i]] <- ifelse(!is.na(child.sample_dobs[[i]]), hashed_id(child.sample_dobs[[i]], "RISE"), NA)
}

#de-identify any text fields in child sample survey - Jeff F
child.sample_text <- child.sample %>%
  select (feces_kit_barcode_note, weigh_problems_text, height_problems_text,
          height_no, blood_comment, blood_type_comment, blood_spot_comment)

for(i in names(child.sample_text)) {
  temp_child.sample[[i]] <- ifelse(!is.na(child.sample_text[[i]]), hashed_id(child.sample_text[[i]], "RISE"), NA)
}

write_csv(temp_child.sample, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_child-sampling/O3_deidentified_child_sample_ID_v1-child_survey-sampling.csv")

###############################################################################
#
# # FECES SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in feces survey - Jeff F
feces_names <- feces %>%
  select (devicephonenum, name_surveyor_other, name1:name40)

for(i in names(feces_names)) {
  temp_feces[[i]] <- ifelse(!is.na(feces_names[[i]]), hashed_id(feces_names[[i]], "RISE"), NA)
}

#de-identify any text fields in feces survey - Jeff F
feces_text <- feces %>%
  select (survey_status_other)

for(i in names(feces_text)) {
  temp_feces[[i]] <- ifelse(!is.na(feces_text[[i]]), hashed_id(feces_text[[i]], "RISE"), NA)
}

#de-identify any house numbers in feces survey - Jeff F
feces_house <- feces %>%
  select (settlement_barcode, instanceName)

for(i in names(feces_house)) {
  temp_feces[[i]] <- ifelse(!is.na(feces_house[[i]]), hashed_id(feces_house[[i]], "RISE"), NA)
}

write_csv(temp_feces, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_child-sampling/O3_deidentified_feces_ID_v1.csv")


###############################################################################
#
# # FECES.SAMPLE SURVEY DE-IDENTIFICATION
#
###############################################################################

#de-identify names and phone numbers in feces survey - Jeff F
feces.sample_names <- feces.sample %>%
  select (respondent_name2, respondent_name_text, respondent_name, return_name,
          return_phone)

for(i in names(feces.sample_names)) {
  temp_feces.sample[[i]] <- ifelse(!is.na(feces.sample_names[[i]]), hashed_id(feces.sample_names[[i]], "RISE"), NA)
}

#de-identify dobs in child survey - Jeff F
feces.sample_dobs <- feces.sample %>%
  select (respondent_dob)

for(i in names(feces.sample_dobs)) {
  temp_feces.sample[[i]] <- ifelse(!is.na(feces.sample_dobs[[i]]), hashed_id(feces.sample_dobs[[i]], "RISE"), NA)
}

#de-identify any text fields in feces survey - Jeff F
feces.sample_text <- feces.sample %>%
  select (feces_comments)

for(i in names(feces.sample_text)) {
  temp_feces.sample[[i]] <- ifelse(!is.na(feces.sample_text[[i]]), hashed_id(feces.sample_text[[i]], "RISE"), NA)
}

write_csv(temp_feces.sample, path = "Z:/Data Files/Practice Data/Deidentification/ID_T1_child-sampling/O3_deidentified_feces_ID_v1-feces_collection.csv")
Monash-RISE/riseR documentation built on Dec. 11, 2019, 9:49 a.m.