RISE_FJ/T0_baseline/O3_T0_FJ-person_list.R

# PULL OUT A LIST OF ALL PEOPLE INVOLVED IN THE BASELINE SURVEY COMBINED WITH CHILD CONSENT DATA
# VARIABLES NEEDED: settlement, house.no, hhd_id, name, dob, gender, signed.yn, feces, blood, 
# height.weight, samples.analysed, date_consented

# 1) compile full list of hhd_id
# 2) compile full list of people 

#######################################
#######################################
#######################################
#1) FULL LIST OF hhd_id - both from house and hhd surveys
#######################################
#######################################
#######################################
house_hhd_id <- house.merge %>% 
  select (settlement_barcode, extract_house_no, hhd_id, hhd_name) %>% #
  mutate (hhd_id = ifelse(is.na(hhd_id), hhd_name, hhd_id)) %>% 
  select (-hhd_name) %>% 
  filter (!is.na(hhd_id)) %>%  #
  unique ()  %>% #
  mutate (adult_respondent_name = NA) #770

hhd_hhd_id <- hhd %>% #
  select (settlement_barcode, extract_house_no, hhd_id, hhd_name, adult_respondent_name) %>%  #
  mutate (hhd_id = ifelse(is.na(hhd_id), hhd_name, hhd_id)) %>% 
  select (-hhd_name) %>%
  filter (!is.na(hhd_id)) %>% # 
  unique()   #769

final_hhd_id <- rbind (house_hhd_id, hhd_hhd_id) %>% 
  arrange (settlement_barcode, extract_house_no, hhd_id, adult_respondent_name) %>% 
  group_by (settlement_barcode, extract_house_no, hhd_id) %>% 
  mutate (number =  row_number()) %>% 
  filter (number == 1 | (!is.na(adult_respondent_name))) %>% 
  select (-number) #771

#check
id <- final_hhd_id %>% 
  mutate (id = paste0(settlement_barcode, extract_house_no, hhd_id))
table(duplicated(id$id)) # 0 duplicates
id[duplicated(id$id),] #; 
rm(id)

# write_csv(final_hhd_id, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports/final_hhd_id.csv")

#more than one hhd per house - check
check <- final_hhd_id %>%
  select (settlement_barcode, extract_house_no, hhd_id) %>%
  unique() %>%
  group_by (settlement_barcode, extract_house_no) %>%
  mutate (count = n())
table(check$count) #2 with 2
rm(check)

#######################################
#FULL LIST OF hhd_id - both from house and hhd surveys - and add household consent 
#######################################

#remove from consent list if no hhd_id and no survey data
consent <- consent_list_all %>% #840
  select (settlement, house.no, hhd_id, signed.yn, study, surveys, date) %>% #
  filter (!is.na(house.no)) %>% #
  filter (!is.na(hhd_id)) %>% #
  filter (!is.na(signed.yn)) %>% # - if not signed, it isn't a consent
  group_by(settlement, house.no, hhd_id) %>% 
  mutate (number = row_number())  #833
table(consent$number) #no duplicates yet

final_hhd_id_consent <- full_join(final_hhd_id, consent, 
                                  by = c("settlement_barcode" = "settlement",
                                         "extract_house_no" = "house.no", 
                                         "hhd_id" = "hhd_id"))  #837
#manually check - 
no.hhd.consent <- final_hhd_id_consent %>% 
  filter (!is.na(adult_respondent_name) & is.na(signed.yn))

#they are still to get hhd consent for Muanivatu #17:Anitekini Sivo and Maisasi
rm(consent)

#check
id <- final_hhd_id_consent %>% 
  mutate (id = paste0(settlement_barcode, extract_house_no, hhd_id))
table(duplicated(id$id)) # dup removed above
id[duplicated(id$id),] #; 0 dup - 
rm(id)

# write_csv(final_hhd_id_consent, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports/final_hhd_id_consent.csv")

# # CHECK: but what about hhd with only 1 person? - need to check these
# person <- hhd.merge %>%
#   select (settlement_barcode, extract_house_no, hhd_id, concat_name_age,
#           person_name, person_dob) %>%
#   group_by(settlement_barcode, extract_house_no, hhd_id) %>%
#   summarize (count = n()) %>%
#   group_by(settlement_barcode, extract_house_no) %>%
#   mutate (count_hhd = n()) 
# table(person$count) #38 with only 1 person! check with Ateca -we think ok
# x <- person %>% 
#   filter(count==1) %>% 
#   filter(!is.na(hhd_id)) #35 with data
# rm(person, x)

#######################################
#2) FULL LIST OF people, hhd consents and child consents
#######################################
# This will then be used to generate person_id in *** participants_list.R

# adult_respondent and caregiver and dob 

# define baseline_yn = 1 if !is.na(person_relationship) - 
# i.e. if there is a response to this question, then baseline survey was done
#note - where children in the house - use caregiver_name
#otherwise, use adult_respondent_name as starting point for respondent *****

people1 <- hhd.merge %>%  #4291
  select (settlement_barcode, extract_house_no, extract_settlement,  
          person_name, person_name_last, person_dob, person_gender, 
          person_relationship, hhd_id, today) %>%
  filter (!is.na(person_name)) %>% #removed 4 = 4277
  mutate (name = ifelse(is.na(person_name_last), person_name, paste0(person_name, " ", person_name_last)),
          name_dob = paste0(person_name, " (dob=", person_dob, ")"),
          age = round((today - person_dob)/365, 1),
          name_age = paste0(person_name,' (age ',age,')'),
          first_name = person_name, 
          baseline_yn = ifelse(!is.na(person_relationship), 1, 0)) %>% 
  rename (settlement = settlement_barcode, house.no = extract_house_no) %>% 
  select (-person_name, -person_name_last, -person_relationship) %>%  
  arrange (settlement, house.no, hhd_id)  #4287

#check
id <- people1 %>% 
  mutate (id = paste0(settlement, house.no, name, person_dob)) #but remember there could be twins!!!!
table(duplicated(id$id)) # 0 duplicates with name and dob; 
id[duplicated(id$id),]
rm(id)

###############
#make full list of people and hhd_id 
# iNCLUDE ALL HHD_ID, even if no match to baseline data
# infill name = hhd_id
###############

person_list_baseline1 <- full_join (people1, final_hhd_id_consent, 
                                    by = c("settlement" = "settlement_barcode", 
                                           "house.no" = "extract_house_no", 
                                           "hhd_id" = "hhd_id"))  #include hhds with no baseline 
id <- person_list_baseline1 %>% 
  mutate (id = paste0(settlement, house.no, name, person_dob)) #but remember there could be twins!!!!
table(duplicated(id$id)) # 2 - but these are hhd_id placeholders with no name and dob! ok
id[duplicated(id$id),]
rm(id)

table(is.na(person_list_baseline1$name)) #66 with no name - hhd_id placeholders
rm(people1)

##################################
#EXPORT LIST OF CHILDREN TO MATCH TO CONSENT DATA
# children <- person_list_baseline1 %>% 
#   select(settlement, house.no, hhd_id, name, person_dob, person_gender, age) %>%
#   filter(age<6.0) %>% 
#   arrange (settlement, house.no, person_dob)
# # mutate (age_calc = (today - person_dob)/365) %>% 
# #   mutate (check = age - age_calc) 
# # max(children$check) #- age looks fine
# 
# # write_csv(children, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports/children.csv")
# 
# rm(children)

##########################
#add child consents

#1) rename variables in child consent form
child_consent2 <- child_consent_list_all %>%
  rename (child.signed.yn = signed.yn,
          date_child_consent = date,
          gender_child = gender,
          child_dob = dob) #455

id <- child_consent2 %>% 
  mutate (id = paste0(settlement, house.no, child.name.consent, child_dob)) #but remember there could be twins!!!!
table(duplicated(id$id)) # 
id[duplicated(id$id),]
rm(id)


# 2) merge - we need to allow for twins (or same dob), so need to include name and dob- only 3 matches!
person_list_baseline2 <- full_join (person_list_baseline1, child_consent2,
                                    by = c("settlement" = "settlement",
                                           "house.no" = "house.no",
                                           "person_dob" = "child_dob",
                                           "name" = "child.name.consent")) %>% #4168 - only matched 3
  mutate (hhd_id = ifelse(!is.na(hhd_id.x), hhd_id.x, hhd_id.y),
          gender = ifelse(!is.na(person_gender), person_gender, gender_child)) %>%
  select (-hhd_id.x, -hhd_id.y, -person_gender, -gender_child)

not.matched <- person_list_baseline2 %>%
  filter (!is.na(guardian.name.consent) & is.na(baseline_yn)) #50; these are the consents that didn't match baseline data

#children in baseline with no consent
no.child.consent <- person_list_baseline2 %>%  
  filter (age<5.0) %>% 
  filter (is.na(child.signed.yn)) #139

# write_csv(not.matched, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports/not.matched.csv")


####################
#1. household consent but no baseline data
check1 <- final_hhd_id_consent %>% 
  filter (is.na(adult_respondent_name) & !is.na(signed.yn))
#67 hhd consents with no baseline data
# **keep these in 

#2. baseline data but no household consent
check2 <- final_hhd_id_consent %>% 
  filter (!is.na(adult_respondent_name) & is.na(signed.yn))
#they are still to get hhd consent for Muanivatu #17:Anitekini Sivo and Maisasi

#3. child baseline data but no child consent
count(no.child.consent)
#139 child entries with no child consent

#4. child consent but no baseline survey
check3 <- person_list_baseline2 %>%  
  filter (!is.na(child.signed.yn)) %>% 
  filter (is.na(age))
#50, remove these from list

##############
#REMOVE CHILD CONSENTS WITH NO BASELINE DATA
#FIELD TEAM HAVE ADVISED THAT THESE CHILDREN HAVE MOVED OUT OF THE SETTLEMENT
person_list_baseline3 <- person_list_baseline2 %>% 
  filter(!(!is.na(child.signed.yn) & is.na(age)))
rm(person_list_baseline1, person_list_baseline2)

id <- person_list_baseline3 %>%
  mutate (id = paste0(settlement, house.no, person_dob, name))
table(duplicated(id$id)) # 2 dup; these are hhd_id placeholders - so ok
id[duplicated(id$id),]
rm(id)

# 
write_csv(person_list_baseline3, path = "Z:/Data Files/Data Files Objective 3/Reports/person_list_baseline_FJ.csv")
# # this is the list that will be used to generate person_id for baseline
# # printed on ??????
# 



###############################
###############################
# # go to FJ_201908_O3_4_participants_list.R ***********************
###############################
###############################

rm(not.matched, check1, check2, check3)
Monash-RISE/riseR documentation built on Dec. 11, 2019, 9:49 a.m.