# PULL OUT A LIST OF ALL PEOPLE INVOLVED IN THE BASELINE SURVEY COMBINED WITH CHILD CONSENT DATA
# VARIABLES NEEDED: settlement, house.no, hhd_id, name, dob, gender, signed.yn, feces, blood,
# height.weight, samples.analysed, date_consented
# 1) compile full list of hhd_id
# 2) compile full list of people
#######################################
#######################################
#######################################
#1) FULL LIST OF hhd_id - both from house and hhd surveys
#######################################
#######################################
#######################################
house_hhd_id <- house.merge %>%
select (settlement_barcode, extract_house_no, hhd_id, hhd_name) %>% #
mutate (hhd_id = ifelse(is.na(hhd_id), hhd_name, hhd_id)) %>%
select (-hhd_name) %>%
filter (!is.na(hhd_id)) %>% #
unique () %>% #
mutate (adult_respondent_name = NA) #770
hhd_hhd_id <- hhd %>% #
select (settlement_barcode, extract_house_no, hhd_id, hhd_name, adult_respondent_name) %>% #
mutate (hhd_id = ifelse(is.na(hhd_id), hhd_name, hhd_id)) %>%
select (-hhd_name) %>%
filter (!is.na(hhd_id)) %>% #
unique() #769
final_hhd_id <- rbind (house_hhd_id, hhd_hhd_id) %>%
arrange (settlement_barcode, extract_house_no, hhd_id, adult_respondent_name) %>%
group_by (settlement_barcode, extract_house_no, hhd_id) %>%
mutate (number = row_number()) %>%
filter (number == 1 | (!is.na(adult_respondent_name))) %>%
select (-number) #771
#check
id <- final_hhd_id %>%
mutate (id = paste0(settlement_barcode, extract_house_no, hhd_id))
table(duplicated(id$id)) # 0 duplicates
id[duplicated(id$id),] #;
rm(id)
# write_csv(final_hhd_id, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports/final_hhd_id.csv")
#more than one hhd per house - check
check <- final_hhd_id %>%
select (settlement_barcode, extract_house_no, hhd_id) %>%
unique() %>%
group_by (settlement_barcode, extract_house_no) %>%
mutate (count = n())
table(check$count) #2 with 2
rm(check)
#######################################
#FULL LIST OF hhd_id - both from house and hhd surveys - and add household consent
#######################################
#remove from consent list if no hhd_id and no survey data
consent <- consent_list_all %>% #840
select (settlement, house.no, hhd_id, signed.yn, study, surveys, date) %>% #
filter (!is.na(house.no)) %>% #
filter (!is.na(hhd_id)) %>% #
filter (!is.na(signed.yn)) %>% # - if not signed, it isn't a consent
group_by(settlement, house.no, hhd_id) %>%
mutate (number = row_number()) #833
table(consent$number) #no duplicates yet
final_hhd_id_consent <- full_join(final_hhd_id, consent,
by = c("settlement_barcode" = "settlement",
"extract_house_no" = "house.no",
"hhd_id" = "hhd_id")) #837
#manually check -
no.hhd.consent <- final_hhd_id_consent %>%
filter (!is.na(adult_respondent_name) & is.na(signed.yn))
#they are still to get hhd consent for Muanivatu #17:Anitekini Sivo and Maisasi
rm(consent)
#check
id <- final_hhd_id_consent %>%
mutate (id = paste0(settlement_barcode, extract_house_no, hhd_id))
table(duplicated(id$id)) # dup removed above
id[duplicated(id$id),] #; 0 dup -
rm(id)
# write_csv(final_hhd_id_consent, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports/final_hhd_id_consent.csv")
# # CHECK: but what about hhd with only 1 person? - need to check these
# person <- hhd.merge %>%
# select (settlement_barcode, extract_house_no, hhd_id, concat_name_age,
# person_name, person_dob) %>%
# group_by(settlement_barcode, extract_house_no, hhd_id) %>%
# summarize (count = n()) %>%
# group_by(settlement_barcode, extract_house_no) %>%
# mutate (count_hhd = n())
# table(person$count) #38 with only 1 person! check with Ateca -we think ok
# x <- person %>%
# filter(count==1) %>%
# filter(!is.na(hhd_id)) #35 with data
# rm(person, x)
#######################################
#2) FULL LIST OF people, hhd consents and child consents
#######################################
# This will then be used to generate person_id in *** participants_list.R
# adult_respondent and caregiver and dob
# define baseline_yn = 1 if !is.na(person_relationship) -
# i.e. if there is a response to this question, then baseline survey was done
#note - where children in the house - use caregiver_name
#otherwise, use adult_respondent_name as starting point for respondent *****
people1 <- hhd.merge %>% #4291
select (settlement_barcode, extract_house_no, extract_settlement,
person_name, person_name_last, person_dob, person_gender,
person_relationship, hhd_id, today) %>%
filter (!is.na(person_name)) %>% #removed 4 = 4277
mutate (name = ifelse(is.na(person_name_last), person_name, paste0(person_name, " ", person_name_last)),
name_dob = paste0(person_name, " (dob=", person_dob, ")"),
age = round((today - person_dob)/365, 1),
name_age = paste0(person_name,' (age ',age,')'),
first_name = person_name,
baseline_yn = ifelse(!is.na(person_relationship), 1, 0)) %>%
rename (settlement = settlement_barcode, house.no = extract_house_no) %>%
select (-person_name, -person_name_last, -person_relationship) %>%
arrange (settlement, house.no, hhd_id) #4287
#check
id <- people1 %>%
mutate (id = paste0(settlement, house.no, name, person_dob)) #but remember there could be twins!!!!
table(duplicated(id$id)) # 0 duplicates with name and dob;
id[duplicated(id$id),]
rm(id)
###############
#make full list of people and hhd_id
# iNCLUDE ALL HHD_ID, even if no match to baseline data
# infill name = hhd_id
###############
person_list_baseline1 <- full_join (people1, final_hhd_id_consent,
by = c("settlement" = "settlement_barcode",
"house.no" = "extract_house_no",
"hhd_id" = "hhd_id")) #include hhds with no baseline
id <- person_list_baseline1 %>%
mutate (id = paste0(settlement, house.no, name, person_dob)) #but remember there could be twins!!!!
table(duplicated(id$id)) # 2 - but these are hhd_id placeholders with no name and dob! ok
id[duplicated(id$id),]
rm(id)
table(is.na(person_list_baseline1$name)) #66 with no name - hhd_id placeholders
rm(people1)
##################################
#EXPORT LIST OF CHILDREN TO MATCH TO CONSENT DATA
# children <- person_list_baseline1 %>%
# select(settlement, house.no, hhd_id, name, person_dob, person_gender, age) %>%
# filter(age<6.0) %>%
# arrange (settlement, house.no, person_dob)
# # mutate (age_calc = (today - person_dob)/365) %>%
# # mutate (check = age - age_calc)
# # max(children$check) #- age looks fine
#
# # write_csv(children, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports/children.csv")
#
# rm(children)
##########################
#add child consents
#1) rename variables in child consent form
child_consent2 <- child_consent_list_all %>%
rename (child.signed.yn = signed.yn,
date_child_consent = date,
gender_child = gender,
child_dob = dob) #455
id <- child_consent2 %>%
mutate (id = paste0(settlement, house.no, child.name.consent, child_dob)) #but remember there could be twins!!!!
table(duplicated(id$id)) #
id[duplicated(id$id),]
rm(id)
# 2) merge - we need to allow for twins (or same dob), so need to include name and dob- only 3 matches!
person_list_baseline2 <- full_join (person_list_baseline1, child_consent2,
by = c("settlement" = "settlement",
"house.no" = "house.no",
"person_dob" = "child_dob",
"name" = "child.name.consent")) %>% #4168 - only matched 3
mutate (hhd_id = ifelse(!is.na(hhd_id.x), hhd_id.x, hhd_id.y),
gender = ifelse(!is.na(person_gender), person_gender, gender_child)) %>%
select (-hhd_id.x, -hhd_id.y, -person_gender, -gender_child)
not.matched <- person_list_baseline2 %>%
filter (!is.na(guardian.name.consent) & is.na(baseline_yn)) #50; these are the consents that didn't match baseline data
#children in baseline with no consent
no.child.consent <- person_list_baseline2 %>%
filter (age<5.0) %>%
filter (is.na(child.signed.yn)) #139
# write_csv(not.matched, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports/not.matched.csv")
####################
#1. household consent but no baseline data
check1 <- final_hhd_id_consent %>%
filter (is.na(adult_respondent_name) & !is.na(signed.yn))
#67 hhd consents with no baseline data
# **keep these in
#2. baseline data but no household consent
check2 <- final_hhd_id_consent %>%
filter (!is.na(adult_respondent_name) & is.na(signed.yn))
#they are still to get hhd consent for Muanivatu #17:Anitekini Sivo and Maisasi
#3. child baseline data but no child consent
count(no.child.consent)
#139 child entries with no child consent
#4. child consent but no baseline survey
check3 <- person_list_baseline2 %>%
filter (!is.na(child.signed.yn)) %>%
filter (is.na(age))
#50, remove these from list
##############
#REMOVE CHILD CONSENTS WITH NO BASELINE DATA
#FIELD TEAM HAVE ADVISED THAT THESE CHILDREN HAVE MOVED OUT OF THE SETTLEMENT
person_list_baseline3 <- person_list_baseline2 %>%
filter(!(!is.na(child.signed.yn) & is.na(age)))
rm(person_list_baseline1, person_list_baseline2)
id <- person_list_baseline3 %>%
mutate (id = paste0(settlement, house.no, person_dob, name))
table(duplicated(id$id)) # 2 dup; these are hhd_id placeholders - so ok
id[duplicated(id$id),]
rm(id)
#
write_csv(person_list_baseline3, path = "Z:/Data Files/Data Files Objective 3/Reports/person_list_baseline_FJ.csv")
# # this is the list that will be used to generate person_id for baseline
# # printed on ??????
#
###############################
###############################
# # go to FJ_201908_O3_4_participants_list.R ***********************
###############################
###############################
rm(not.matched, check1, check2, check3)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.