riseR:

# RISE consents for Suva, Fiji

# DATA SOURCES
# 1. HOUSE NUMBERING AND CONSENT (up to 13 June 2019)
# 2. O3/4 BASELINE SURVEY (20 June 2019 - ?)

# HOUSEHOLD CONSENT ITEMS
# consent1_1	a) Participation in the 5-year research program, which will include an engineering intervention either during the research program (intervention group - tranche 1) or 2 years later at the end of the study ('control' group - tranche 2).
# consent1_3	c) My household taking part in surveys to be administered 3-monthly at my home over the period 2018 - 2022.

# CHILD CONSENT ITEMS
# consent items:
# consent3_1	a) Up to quarterly collection of faecal samples from my child and subsequent analysis of samples.
# consent3_2	b) Collection of venous blood samples once per year and subsequent analysis of samples.
# consent3_3	c) Yearly measurement of height and weight.
# consent3_4	d) The faecal and blood samples provided during this research, and associated data, may be used by named researchers in this program in future research programs.
# consent3_5	e) Researchers accessing local health centre records for vaccination, medical presentations and growth measurements since the birth of my child.


# 1. HOUSE NUMBERING AND CONSENT (2019)
#setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/2.Consent and House ID/1. FJ/2. Data/4. reports/")
setwd("C:/Users/RISE Fiji/Desktop/Jeff/Data Files Objective 3/Reports")
consent_list <- read_csv (file = "consentfjhhdimport.csv") #771

#setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/2.Consent and House ID/1. FJ/2. Data/4. reports/")
setwd("C:/Users/RISE Fiji/Desktop/Jeff/Data Files Objective 3/Reports")
child_consent_list <- read_csv (file = "child_consent_items.csv") #248

# 2. O3/4 BASELINE SURVEY (20 June 2019 - ) - 
#setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/1. raw data")
setwd("C:/Users/RISE Fiji/Desktop/Jeff/Data Files Objective 3")
consent <- read_csv (file = "consent_FJ_v1.csv")
consent.form3 <- read_csv (file = "consent_FJ_v1-consent_form3.csv")
consent.childname <- read_csv (file = "consent_FJ_v1-consent3_childname.csv")
#setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data")
setwd("C:/Users/RISE Fiji/Desktop/Jeff/Data Files Objective 3/Summary")

# FIX ALL DATES *****************************
fix_date <- function(x_date){
  x_date <- ifelse(!is.na(ymd_hms(x_date)), ymd_hms(x_date), 
                   ifelse(!is.na(dmy_hms(x_date)), dmy_hms(x_date), mdy_hms(x_date)))  # Check the format and return the correct integer-date
  x_date <- as.POSIXct(x_date, origin = "1970-01-01", tz = "UTC")  # Convert the integer-date to a consistent format
}

#############################################
#############################################
#setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/2. code")
setwd("C:/Users/RISE Fiji/Desktop/R Script Obj 3")
source("O3_T0_FJ-consent_corrections.R")

#############################################
#############################################
#HOUSEHOLD CONSENTS - COMBINE ALL CONSENTS

# 1. original consent and house numbering survey
consent_list <- consent_list %>%        #771                                    
  select (settlement, house.no, hhd_id, signed.yn, study, surveys) %>% 
  mutate (date = as.Date("2019-06-11")) %>%  #last day of consent survey 11 June 2019
  mutate (house.no = as.numeric(house.no))

############################################
#CHECK:
id <- consent_list %>% 
  mutate (id = paste0(settlement, house.no, hhd_id))
table(duplicated(id$id)) #0
id[duplicated(id$id),] #0  duplicates; 
rm(id)

# 2. new consents collected during baseline (Nov 2018), child sampling (Feb/Mar 2019)
consent_house_new1 <- consent %>% 
  select (settlement_name, house_no, hhd_head_name,
          signed_form1, consent1_1, consent1_3, today) %>% 
  rename (settlement = settlement_name, 
          house.no = house_no, 
          hhd_id = hhd_head_name, 
          signed.yn = signed_form1, 
          study = consent1_1, 
          surveys = consent1_3, 
          date = today) %>% 
  arrange (settlement, house.no, hhd_id, date) %>% 
  mutate (signed.yn = recode (signed.yn, '0' = "no", '1' = "yes"), 
          study = recode (study, '0' = "no", '1' = "yes"), 
          surveys = recode (surveys, '0' = "no", '1' = "yes"))
table(is.na(consent_house_new1$hhd_id)) #5 with no head of household name - these appear to be visits with no consent
#remove if there is another entry for that house

consent_house_new <- consent_house_new1 %>% 
  group_by(settlement, house.no, hhd_id) %>% 
  mutate (count1 = n()) %>% 
  group_by(settlement, house.no) %>% 
  mutate (count2 = n()) %>% 
  filter (!(is.na(hhd_id) & count2 == 2)) %>% 
  select (-count1, -count2) #1
rm(consent_house_new1)

# 3. FINAL CONSENT LIST - need to collapse and use most recent
consent_list_all2 <- bind_rows (consent_list, consent_house_new) %>%  #701
  group_by(settlement, house.no, hhd_id) %>% 
  mutate (count1 = n()) %>% 
  group_by(settlement, house.no) %>% 
  mutate (count2 = n()) %>% 
  filter (!(is.na(hhd_id) & count2 == 2)) %>% 
  select (-count1, -count2) %>%  #772
  filter(!(is.na(hhd_id) & is.na(house.no)))  %>% #remove blank surveys
  filter (hhd_id!="N/A")
rm(consent_list, consent_house_new)

#CHECK:
id <- consent_list_all2 %>% 
  mutate (id = paste0(settlement, house.no, hhd_id))
table(duplicated(id$id)) #0
id[duplicated(id$id),] #6  duplicates; wainidinu 15 - this is a legit duplicate
#lobau 98 - why 2?
rm(id)

#keep only most recent consent
consent_list_all <- consent_list_all2 %>% 
  mutate (id = paste0(settlement, house.no, hhd_id)) %>% 
  group_by(id) %>% 
  slice(which.max(date))  #removed the duplicate
rm(consent_list_all2)
# this will get final save in hhd_id corrections R script
id <- consent_list_all %>% 
  mutate (id2 = paste0(settlement, house.no, hhd_id))
table(duplicated(id$id2)) #0
id[duplicated(id$id2),] 
rm(id)


# 4. PREPARE .CSV TO BE USED FOR NEXT SURVEY
# DATA TO IMPORT INTO NEXT SURVEY - CHILD SAMPLING ????? 2019
# COMMUNITY, HOUSE NO, NAME OF HEAD OF HHD, CONSENTS
# consent1_1	a) Participation in the 5-year research program, which will include an engineering intervention either during the research program (intervention group - tranche 1) or 2 years later at the end of the study ('control' group - tranche 2).
# consent1_3	c) My household taking part in surveys to be administered 3-monthly at my home over the period 2018 - 2022.

# consent.id.import <- consent_list_all
# 
# table(duplicated(consent.id.import)) #no duplicates; 703
# 
# # write_csv(consent.id.import, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/3/20181112_Baseline/2. ID/2. Data/4. reports/consent.id.import.csv")
# rm(consent.id.import)


#############################################
#############################################
#CHILD CONSENT

#PULL OUT DATA TO FIX ALL CONSENT INFO TO MATCH BASELINE DATA

#consent and house numbering data
# old <- child_consent_list %>% 
#   select (settlement, house.no, child.name, dob, gender) %>% 
#   arrange (settlement, house.no, dob)
# write_csv(old, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports/old_child_consents.csv")
# rm(old)

# #new consents
# # 2. new consents collected during baseline survey (Nov 2018), child sampling (feb/mar 2019)
# consent.childname$dob <- dmy (consent.childname$dob)
# 
# # CHILD CONSENT
# colnames (consent.form3) <- paste (colnames (consent.form3), "3", sep = "_")
# names (consent.form3)[names (consent.form3) == "PARENT_KEY_3"] <- "PARENT_KEY"  # change column names
# names (consent)[names (consent) == "KEY"] <- "PARENT_KEY"  # change column names
# child.consent.merge <- merge (consent, consent.form3, by = "PARENT_KEY")  # MERGE using "PARENT_KEY"
# 
# # CHILD NAMES
# colnames (consent.childname) <- paste (colnames (consent.childname), "child", sep = "_")
# names (consent.childname)[names (consent.childname) == "PARENT_KEY_child"] <- "KEY_3"  # change column names
# childnames.merge <- merge (child.consent.merge, consent.childname, by = "KEY_3")
# 
# #child consents
# new <- childnames.merge %>%
#   select (settlement_name, house_no, child_name_child, gender_child, dob_child, today) %>%
#   rename (settlement = settlement_name, 
#           house.no = house_no,  
#           child.name = child_name_child, gender = gender_child) %>% 
#   mutate (gender = recode (gender, '0' = "female", '1' = "male")) %>% 
#   select (settlement, house.no, child.name, gender, dob_child) 
# rm(childnames.merge, child.consent.merge)
# 
# write_csv(new, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports/new_child_consents.csv")
# rm(new)



#- PULL ALL CONSENTS TOGETHER

# 1. original consent and house numbering survey
child_consent_list <- child_consent_list %>%                                            
  select (settlement, house.no, guardian.name, child.name, gender, dob, signed.yn, 
          feces, blood, height.weight, samples.analysed) %>% 
  mutate (date = as.Date("2019-06-11"), hhd_id = "") %>%  ##last day of consent survey 11 June 2019
  select (settlement, house.no, hhd_id, guardian.name, child.name, gender, dob, signed.yn, 
          feces, blood, height.weight, samples.analysed, date) %>% 
  group_by(settlement, house.no, dob, child.name, date) %>% 
  mutate (count = row_number()) %>% 
  filter (count==1) %>% #remove duplicate consent
  select (-count)

#CHECK:
id <- child_consent_list %>% 
  mutate (id = paste0(settlement, house.no, dob, child.name))
table(duplicated(id$id)) #0
id[duplicated(id$id),] #1  duplicates; 
#2 entries for Nauluvatu #44  duplicates; 
rm(id)

# 2. new consents collected during baseline survey (Nov 2018), child sampling (feb/mar 2019)
# consent.childname$dob <- dmy (consent.childname$dob) - done above

# CHILD CONSENT
colnames (consent.form3) <- paste (colnames (consent.form3), "3", sep = "_")
names (consent.form3)[names (consent.form3) == "PARENT_KEY_3"] <- "PARENT_KEY"  # change column names
names (consent)[names (consent) == "KEY"] <- "PARENT_KEY"  # change column names
child.consent.merge <- merge (consent, consent.form3, by = "PARENT_KEY")  # MERGE using "PARENT_KEY"

# CHILD NAMES
colnames (consent.childname) <- paste (colnames (consent.childname), "child", sep = "_")
names (consent.childname)[names (consent.childname) == "PARENT_KEY_child"] <- "KEY_3"  # change column names
childnames.merge <- merge (child.consent.merge, consent.childname, by = "KEY_3")

#child consents
childnames.merge <- childnames.merge %>% 
  mutate (dob = dmy(dob_child))

child.consent.items <- childnames.merge %>%
  select (settlement_name, house_no, hhd_head_name, guardian_form3_3, 
          child_name_child, gender_child, dob, 
          signed_form3_3, consent3_1_3, consent3_2_3, consent3_3_3, consent3_4_3, consent3_5_3, today) %>%
  rename (settlement = settlement_name, 
          house.no = house_no,  
          hhd_id = hhd_head_name, 
          guardian.name = guardian_form3_3, 
          child.name = child_name_child, gender = gender_child, 
          signed.yn = signed_form3_3,  
          feces = consent3_1_3, 
          blood = consent3_2_3, 
          height.weight = consent3_3_3, 
          samples.analysed = consent3_4_3, 
          health.records = consent3_5_3, date = today) %>% 
  mutate (gender = recode (gender, '0' = "female", '1' = "male"), 
          signed.yn = recode (signed.yn, '0' = "no", '1' = "yes"), 
          feces = recode (feces, '0' = "no", '1' = "yes"), 
          blood = recode (blood, '0' = "no", '1' = "yes"), 
          height.weight = recode (height.weight, '0' = "no", '1' = "yes"), 
          samples.analysed = recode (samples.analysed, '0' = "no", '1' = "yes"), 
          health.records = recode (health.records, '0' = "no", '1' = "yes")) %>% 
  select (settlement, house.no, hhd_id, guardian.name, child.name, gender, dob, 
          signed.yn, feces, blood, height.weight, samples.analysed, date)  

#CHECKchild.consent.items
id <- child.consent.items %>% 
  mutate (id = paste0(settlement, house.no, dob, child.name))
table(duplicated(id$id)) #0
id[duplicated(id$id),] #0
rm(id)


#3. COMBINE ALL CONSENTS
child_consent_list_all <- bind_rows (child_consent_list, child.consent.items) #455

#ensure house.no is integer
child_consent_list_all$house.no <- as.integer (child_consent_list_all$house.no)

#CHECK:
id <- child_consent_list_all %>% 
  mutate (id = paste0(settlement, house.no, dob, child.name))
table(duplicated(id$id)) #0
id[duplicated(id$id),] #1 nauluvatu #44  duplicates; 
rm(id)

rm(child_consent_list, child.consent.items, child.consent.merge, 
   childnames.merge, consent, consent.childname, consent.form3)
#455 CONSENTS

# 4. PREPARE .CSV TO BE USED FOR NEXT SURVEY
# DATA TO IMPORT INTO NEXT SURVEY 
child.consent.id.import <- child_consent_list_all %>%
  arrange (settlement, house.no, hhd_id, child.name)    #arrange sorts columns

# write_csv(child.consent.id.import, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/Current/RISE/4. Surveys/3. Objectives/3/20181112_Baseline/2. ID/2. Data/4. reports/child.consent.id.import.csv")
rm(child.consent.id.import)

child_consent_list_all <-child_consent_list_all %>% 
  rename (child.name.consent = child.name, 
          guardian.name.consent = guardian.name) 

# will be saved in hhd_id_corrections R script

# PREPARE LIST OF ALL CHILDREN WITH CONSENTS TO BE USED BY FIELD TEAM
# MERGE WITH LIST OF ALL CHILDREN FROM BASELINE - 
# this should be done in O3_T0_ID-summary.R
# setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/Current/RISE/4. Surveys/3. Objectives/3/20181112_Baseline/2. ID/2. Data/4. reports/")
# children_under5 <- read_csv (file = "children_under5.csv") #277 kids

# children_under5 <- children_under5 %>% 
#   rename (child.name = name) %>% 
#   select (settlement, house.no, gender, dob, hhd_name, child.name)
# 
# child.list <- full_join (children_under5, child_consent_list_all, 
#                          by = c("settlement" = "settlement", 
#                                 "house.no" = "house.no", 
#                                 "gender" = "gender", 
#                                 "dob" = "dob")) %>% 
#   select (settlement, house.no, gender, dob, hhd_name, guardian.name.consent, hhd_head_name, 
#           child.name, child.name.consent, signed.yn, feces, blood, height.weight, samples.analysed, date) %>% 
#   arrange (settlement, house.no, dob, child.name, child.name.consent)   

# write_csv(child.list, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/Current/RISE/4. Surveys/3. Objectives/3/20181112_Baseline/2. ID/2. Data/4. reports/child.list.id.import.csv")