# GENERATE FILE TO BE USED IN THE NEXT SURVEY (CHILD SAMPLING)
#get final list of person_ids from baseline: #run on XXXXX 2019
setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/4. Participants tracking/1. FJ/4. reports")
baseline_FJ <- read_csv (file="participants_FJ_201908_baseline.csv") #xxx people
setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data")
# infill settlement code
baseline_FJ$extract_settlement <- ifelse(baseline_FJ$settlement=="Wainivokai", "A",
ifelse(baseline_FJ$settlement=="Lobau", "B",
ifelse(baseline_FJ$settlement=="Komave", "C",
ifelse(baseline_FJ$settlement=="Matata", "D",
ifelse(baseline_FJ$settlement=="Nauluvatu", "E",
ifelse(baseline_FJ$settlement=="Muanikoso", "F",
ifelse(baseline_FJ$settlement=="Wailea", "G",
ifelse(baseline_FJ$settlement=="Newtown", "H",
ifelse(baseline_FJ$settlement=="Muanivatu", "J",
ifelse(baseline_FJ$settlement=="Kinoya", "K",
ifelse(baseline_FJ$settlement=="Maravu", "L",
ifelse(baseline_FJ$settlement=="Wainidinu", "M", ""))))))))))))
#############
#DATA REQUIRED FOR SAMPLING SURVEY:
# SETTLEMENT, HOUSE.NO, HHD_ID, HHD_ID_NO
#HHD CONSENT VARIABLES X 3
#name, dob, person_id, gender,
#child consent: child.signed.yn, feces, blood, height
people1 <- baseline_FJ %>%
select (settlement, house.no, hhd_id, extract_settlement,
name, dob, person_id, gender,
signed.yn, study, surveys, child.signed.yn, feces, blood, height.weight) %>%
group_by(settlement, house.no, hhd_id) %>%
mutate (no = row_number()) %>%
arrange (settlement, house.no, no) %>%
group_by(settlement, house.no, no) %>%
mutate (no2 = row_number()) %>%
arrange (settlement, house.no, hhd_id) %>%
mutate (no3 = ifelse(no==1, no2, 0)) %>%
group_by(settlement, house.no, hhd_id) %>%
mutate (number_hhd = max(no3)) %>%
ungroup() %>%
select(-no, -no2, -no3) %>%
group_by(settlement, house.no) %>%
mutate (code2 = paste0(extract_settlement, "-", house.no, "-", number_hhd),
no = row_number(),
code1 = paste0(extract_settlement, "-", house.no, "-", no)) %>%
select (-no) %>%
group_by(settlement, house.no, number_hhd) %>%
mutate (no = row_number()) %>%
mutate (code = paste0(extract_settlement, "-", house.no, "-", number_hhd, "-", no)) %>%
ungroup() %>%
select(-no, -extract_settlement)
#
write_csv(people1, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports/person_list_FJ_201908.csv")
# #run on xx
#############
# pull out what Data Team needs for supervisor log sheet
#############
supervisor_logsheet <- people1 %>%
select (settlement, house.no, hhd_id, name, dob, gender) %>%
mutate (age = (today() - dob)/365,
under5 = ifelse(age<5.00, 1, 0)) %>%
arrange (settlement, house.no, hhd_id, desc(under5)) %>%
group_by(settlement, house.no, hhd_id) %>%
mutate (count = row_number()) %>%
filter (under5 == 1 | count == 1) %>%
mutate (name = ifelse(under5 == 1, name, NA),
dob = ifelse(under5 == 1, dob, NA)) %>%
select (-gender, -age, -under5, -count)
write_csv(supervisor_logsheet, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports/supervisor_logsheet.csv")
rm(supervisor_logsheet)
#############
#############
# list of children under 5 - for a set date (30 September 2019)
#############
children_under5 <- people1 %>%
select (settlement, house.no, hhd_id, name, dob, gender) %>%
mutate (age = (dmy("30/09/2019") - dob)/365,
under5 = ifelse(age<5.00, 1, 0)) %>%
filter (under5 == 1)
write_csv(children_under5, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports/children_under5.csv")
#full settlement list for Field Supervisor to use for assistance
#can be run each day to calculate ages?
settlement_list <- people1 %>%
select (settlement, house.no, hhd_id, name, dob, gender)
write_csv(settlement_list, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports/settlement_list.csv")
rm(settlement_list)
#############
#############NOT DONE YET - script below is old and from Makassar???????????????????????????????????
#############
#data needed for 6-monthly survey
#includes:
# 1) respondent, caregivers, their dob and person id
# 2) codes
# settlement house.no hhd_id
# name dob gender person_id
# signed.yn study surveys date_consent
# adult_respondent_name adult_respondent_dob and person_id
# caregiver1 caregiver1_dob caregiver2 caregiver2_dob and person_id
# child.signed.yn feces blood height.weight samples.analysed date_child_consent
# code code2
# baseline_yn child.baseline.yn
############################
############################
# 1) RESPONDENTS AND CARERS - get person_id
############################
############################
#then match respondent name & caregiver name to hhd survey to extract dob to recalc current age
person1 <- baseline_ID %>%
select (settlement, house.no, hhd_id, name, dob, gender, first_name, name_age, person_id) %>%
mutate (name2 = str_replace_all(string = first_name, pattern = " ", repl = ""))
# 1A) ADULT RESPONDENT - from HOUSEHOLD SURVEY
respondent <- hhd %>%
select (settlement_barcode, extract_house_no, hhd_id, adult_respondent_name) %>% #598
filter (!is.na(hhd_id)) %>% #596
unique() %>% #596
filter (!is.na(adult_respondent_name)) %>% #594 - 2 with no adult respondent
separate (adult_respondent_name, into = c("name1", "name2"), #split apart adult_respondent_name
extra = "merge", sep = "age") %>%
mutate (adult_respondent_name = str_sub(name1, end = nchar(name1)-2)) %>% # this should now just be first name!
select (-name2, -name1) %>%
mutate (adult_respondent_name2 = str_replace_all(string = adult_respondent_name, pattern = " ", repl = "")) %>% #- gets rid of all spaces
filter (!(settlement_barcode == "Kg Baru, Antang" & extract_house_no == 92 &
adult_respondent_name == "Petta Aka")) #decided to remove Antang #92, respondent = Petta
#593
respondent2 <- left_join(respondent, person1,
by = c("settlement_barcode" = "settlement",
"extract_house_no" = "house.no",
"hhd_id" = "hhd_id",
"adult_respondent_name2" = "name2")) %>% #all match
select(-adult_respondent_name2, -first_name, -name) %>%
rename (adult_respondent_dob = dob,
adult_respondent_name_age = name_age,
adult_respondent_gender = gender,
adult_respondent_person_id = person_id) %>%
group_by (settlement_barcode, extract_house_no, hhd_id) %>%
mutate (number = row_number()) %>% #use this to check for duplicates (see below notes)
filter (!(settlement_barcode == "Kg Baru, Antang" & extract_house_no == 36 &
adult_respondent_dob == "1994-04-18")) %>%
filter (!(settlement_barcode == "Kg Cedde" & extract_house_no == 43 &
adult_respondent_dob == "1968-02-10")) %>%
select (-number) #593
#there were 2 cases where in one household there were 2 people with the same first name:
# Antang house #36 - respondent is Muh (age 49) - delete the second
# Cedde house #43 - respondent is Daeng (age 44.5) - delete the second
rm(respondent)
#check no dup in hhd_id
id <- respondent2 %>%
mutate (key = paste0(settlement_barcode, extract_house_no, hhd_id))
table(duplicated(id$key)) # no duplicates
id[duplicated(id$key),]
rm(id)
# 1B) CAREGIVER - from HOUSEHOLD SURVEY - there are 2 of them
#CAREGIVER NAME: need to get this from hhd.merge
#there are 6 cases where more than one caregiver per hhd_id - so will need to make wide
caregiver <- hhd.merge %>%
select (settlement_barcode, extract_house_no, hhd_id, caregiver_name, name_child) %>%
filter (!is.na(caregiver_name)) %>%
group_by(settlement_barcode, extract_house_no, hhd_id, caregiver_name) %>%
summarise (count = n()) %>%
group_by(settlement_barcode, extract_house_no, hhd_id) %>%
mutate (no = row_number(),
no2 = paste0("caregiver", no)) %>%
select (-count, -no)
caregiver2 <- spread(caregiver, no2, caregiver_name) #424
rm(caregiver)
#pull out the names and remove spaces
caregiver3 <- caregiver2 %>%
separate (caregiver1, into = c("caregiver1_1", "caregiver1_2"), #split apart name
extra = "merge", sep = "age") %>%
mutate (caregiver1 = str_sub(caregiver1_1, end = nchar(caregiver1_1)-2)) %>% # this should now just be first name!
select (-caregiver1_2, -caregiver1_1) %>%
mutate (caregiver1_1 = str_replace_all(string = caregiver1, pattern = " ", repl = "")) %>% #- gets rid of all spaces
separate (caregiver2, into = c("caregiver2_1", "caregiver2_2"), #split apart name
extra = "merge", sep = "age") %>%
mutate (caregiver2 = str_sub(caregiver2_1, end = nchar(caregiver2_1)-2)) %>% # this should now just be first name!
select (-caregiver2_2, -caregiver2_1) %>%
mutate (caregiver2_1 = str_replace_all(string = caregiver2, pattern = " ", repl = "")) #424
rm(caregiver2)
caregiver_match1 <- left_join(caregiver3, person1,
by = c("settlement_barcode" = "settlement",
"extract_house_no" = "house.no",
"hhd_id" = "hhd_id",
"caregiver1_1" = "name2")) %>%
select (-caregiver1_1, -first_name, -name) %>%
rename (caregiver1_dob = dob,
caregiver1_name_age = name_age,
caregiver1_gender = gender,
caregiver1_person_id = person_id)
caregiver_match2 <- left_join(caregiver_match1, person1,
by = c("settlement_barcode" = "settlement",
"extract_house_no" = "house.no",
"hhd_id" = "hhd_id",
"caregiver2_1" = "name2")) %>%
select (-caregiver2_1, -first_name, -name) %>%
rename (caregiver2_dob = dob,
caregiver2_name_age = name_age,
caregiver2_gender = gender,
caregiver2_person_id = person_id)
rm(caregiver3, caregiver_match1)
#check no dup in hhd_id
id <- caregiver_match2 %>%
mutate (key = paste0(settlement_barcode, extract_house_no, hhd_id))
table(duplicated(id$key)) # no duplicates
rm(id)
#THEN ADD RESPONDENT AND CAREGIVER NAMES BACK INTO FULL LIST FROM BASELINE
#RESPONDENT
baseline2 <- left_join(baseline_ID, respondent2,
by = c("settlement" = "settlement_barcode",
"house.no" = "extract_house_no",
"hhd_id" = "hhd_id"))
#CAREGIVERS
baseline3 <- left_join(baseline2, caregiver_match2,
by = c("settlement" = "settlement_barcode",
"house.no" = "extract_house_no",
"hhd_id" = "hhd_id"))
rm(respondent2, baseline2, caregiver_match2)
#check no dup
id <- baseline3 %>%
mutate (key = paste0(settlement, house.no, hhd_id, person_id))
table(duplicated(id$key)) # no duplicates
rm(id)
############################
############################
# 1) children who were surveyed at baseline
############################
############################
#CHILD NAME - FOR WHOM BASELINE SURVEY WAS DONE
child_baseline1 <- hhd.merge %>%
select (settlement_barcode, extract_house_no, hhd_id, person_name, name_child, dob) %>%
filter (!is.na(name_child)) %>% #669 - "name_child" indicates they started child loop in baseline survey
mutate (name = person_name)
#get child person_id
child_baseline2 <- left_join(child_baseline1, person1,
by = c("settlement_barcode" = "settlement",
"extract_house_no" = "house.no",
"hhd_id" = "hhd_id",
"dob" = "dob",
"name" = "first_name")) %>%
mutate(child.baseline.yn = 1) %>%
select (settlement_barcode, extract_house_no, hhd_id, name_child, person_id, child.baseline.yn)
rm(child_baseline1)
#THEN ADD CHILD BACK INTO FULL LIST FROM BASELINE
baseline4 <- left_join(baseline3, child_baseline2,
by = c("settlement" = "settlement_barcode",
"house.no" = "extract_house_no",
"hhd_id" = "hhd_id",
"person_id" = "person_id")) %>%
select (-name_child)
rm(child_baseline2, baseline3)
sum(baseline4$child.baseline.yn, na.rm = TRUE) #669- so they are all merged!
#check
table(duplicated(baseline4$code)) #0
table(duplicated(baseline4$person_id)) #0
baseline4[duplicated(baseline4$code),]
# write_csv(baseline4, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/3/20181112_Baseline/2. ID/2. Data/4. reports/person_list_baseline_input.csv")
# *************up to here 25 May 2019
########################################
########################################
########################################
########################################
# original script below
########################################
########################################
# CREATE A LIST OF ALL PEOPLE IN EACH HOUSEHOLD TO PULL IN TO NEXT SURVEY
# settlement, HOUSE NO, NAME OF HEAD OF HHD, person name, dob, calc age
# vdigest <- Vectorize(digest) #vectorise makes it generate hash for each item!
demog.id.import <- hhd.merge %>% #2768
select (settlement_barcode, extract_settlement, extract_house_no, hhd_name,
person_name, person_name_last, dob, gender, KEY, Number, today, hhd_id) %>%
filter (!is.na(person_name)) %>% #removed 4 = 2764
mutate (KEY_1 = paste(KEY, Number),
name = paste0(person_name, " ", person_name_last),
name_dob = paste0(person_name, " ", person_name_last, " (dob=", dob, ")")) %>%
rename (settlement = settlement_barcode, house.no = extract_house_no,
hhd.head = hhd_name) %>%
select (-person_name, -person_name_last, -KEY, -Number) %>% #20190319 - also remove concat_name_age
arrange (settlement, house.no, hhd.head)
demog.id.import$index_house <- as.numeric (with (demog.id.import,
ave (extract_settlement, extract_settlement, house.no, FUN = seq_along)))
demog.id.import$index_hhd <- as.numeric (with (demog.id.import,
ave (extract_settlement, house.no, hhd.head, FUN = seq_along)))
demog.id.import <- demog.id.import %>%
mutate (person.id = paste0(extract_settlement, "-", house.no, "-", index_house))
table(duplicated(demog.id.import$person.id)) #no duplicates 2780; now 2764
# - got rid of a few duplicate surveys
max(demog.id.import$index_house) # this is the biggest house size: 28; NOW 13
max(demog.id.import$index_hhd, na.rm = TRUE) # this is the biggest hhd size: 15; NOW 13
table(demog.id.import$index_house)
mean (demog.id.import$index_hhd, na.rm = TRUE) # avg # people per household = 3.2
mean (demog.id.import$index_house, na.rm = TRUE) # avg # people per house = 3.4
min (demog.id.import$index_hhd, na.rm = TRUE) # 1
min (demog.id.import$index_house, na.rm = TRUE) # 1
table(is.na(demog.id.import$person.id)) #no NA
a <- demog.id.import$person.id #2764
a <- unique(a) #2764 - so no duplicates!!!
rm(a)
table(demog.id.import$gender) #1354 female; 1410 male
# write_csv(demog.id.import, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/3/20181112_Baseline/2. ID/2. Data/4. reports/demog.id.import.csv")
# index = gsub("[^0-9.]", "", Number), - to pull out just numbers
#make full list of people and add child consents
demog.id.import2 <- full_join (demog.id.import, child_consent_list_all,
by = c("settlement" = "settlement",
"house.no" = "house.no",
"gender" = "gender",
"dob" = "dob")) %>%
select (-hhd_head_name) %>%
arrange (settlement, house.no, dob, name) %>% #2808/2823
mutate (hhd_id = ifelse(!is.na(hhd_id.x), hhd_id.x, hhd_id.y)) %>%
select (-hhd_id.x, -hhd_id.y)
table(is.na(demog.id.import2$person.id))
#xx are NA because they are consents not matched to baseline data
a <- demog.id.import2$person.id #2807
a <- unique(a) #2765 - so there are duplicates - not unique!!! we won't be using this person_id
rm(a)
# ***************
#20190303 - FIX - need to amend person.id to include all consents that didn't match baseline
# infill name from consent form
demog.id.import2$name <- ifelse (is.na(demog.id.import2$name), demog.id.import2$child.name.consent, demog.id.import2$name)
# infill settlement code
demog.id.import2$extract_settlement <- ifelse (!is.na(demog.id.import2$extract_settlement),
demog.id.import2$extract_settlement,
ifelse(demog.id.import2$settlement=="Kg Lempangang", "N",
ifelse(demog.id.import2$settlement=="Kawasan Untia", "P",
ifelse(demog.id.import2$settlement=="Kg Nelayan, Barombong", "Q",
ifelse(demog.id.import2$settlement=="Kg Bonelengga", "R",
ifelse(demog.id.import2$settlement=="Kg Tunas Jaya", "S",
ifelse(demog.id.import2$settlement=="Jl Barawaja 2, Pampang", "T",
ifelse(demog.id.import2$settlement=="Kg Cedde", "U",
ifelse(demog.id.import2$settlement=="Kg Gampangcayya, Tallo", "V",
ifelse(demog.id.import2$settlement=="Kg Bambu-Bambu, Jl Birta", "W",
ifelse(demog.id.import2$settlement=="Kg Baru, Antang", "X",
ifelse(demog.id.import2$settlement=="Jl Borong Raya Baru", "Y",
ifelse(demog.id.import2$settlement=="Kg Alla-Alla", "Z", "")))))))))))))
demog.id.import2 <- demog.id.import2 %>%
rename (person.id.original = person.id) %>% #this is the first id - but is now replaced
select (-index_house) %>%
arrange (settlement, house.no, hhd.head, name) %>%
mutate (index_house = as.numeric (ave (settlement, settlement, house.no, FUN = seq_along))) %>%
mutate (person.id = paste0 (extract_settlement, "-", house.no, "-", index_house)) %>%
select (-extract_settlement)
# sort by settlement, house #, head, name - to order the people in a way that is reproducible
a <- demog.id.import2 %>%
select (person.id) %>% #2807
unique () #2807 no dups
rm(a)
# ***************
table(demog.id.import2$index_house) #just a check - max of 13
# a <- demog.id.import2 %>%
# select (settlement, house.no, hhd.head, name, index_house, person.id)
vdigest <- Vectorize(digest) #vectorise makes it generate hash for each item!
demog.id.import2$name_hex <- vdigest(demog.id.import2$name, algo = c("md5"))
#
# write_csv(demog.id.import2, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/3/20181112_Baseline/2. ID/2. Data/4. reports/demog.id.import2.csv")
############################################
#BASELINE SURVEY HHD SURVEY RESPONDENTS
############################################
respondents <- hhd.merge %>%
select (settlement_barcode, extract_house_no, age_final, adult_respondent_name, caregiver_yn,
caregiver_name) %>%
filter (!is.na(caregiver_yn)) %>% #to select only those children who were part of the survey
mutate (check = ifelse(adult_respondent_name == caregiver_name, 1, 0))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.