####################################################
## MERGE ##
## This file merges the survey files together ##
## before corrections and analyses are conducted. ##
####################################################
#############################################
# MERGE HOUSE
# remove useless columns
house <- house %>%
select (-SubmissionDate, -starttime, -endtime, -deviceid, -subscriberid, -simid,
-devicephonenum, -username, -caseid, -length1, -length2, -check_locn,
-check_no, -signedyn_pl,
-study_pl, -surveys_pl, -instanceID, -instanceName, -formdef_version) %>%
arrange(KEY)
house.water <- house.water %>%
rename (Number = KEY) %>%
select (-`SET-OF-water_repeat`) %>%
arrange (PARENT_KEY)
# SORT DATA BY KEYS
# house[order(house$KEY), ]
# house.water[order(house.water$PARENT_KEY), ]
# MERGE BOTH HOUSE SURVEY FILES ON KEY AND PARENT KEY
house.merge <- merge(x = house, y = house.water, by.x = "KEY", by.y = "PARENT_KEY", all.x = TRUE)
# xx <- full_join (house, house.water, by = c("KEY" = "PARENT_KEY")) #this was a double check - all good
house.merge <- house.merge %>%
select (settlement_barcode, extract_house_no, hhd_name, everything()) %>%
arrange (settlement_barcode, extract_house_no, hhd_name)
#############################################
# MERGE PERSON-LEVEL DATA FROM HOUSEHOLD SURVEY
# need to merge based on PARENT_KEY and number in household (end of KEY)
#these are all blank
a <- hhd.person %>%
filter(is.na(hhd.person$person_gender)) #68 of 427
b <- hhd.person %>%
filter(is.na(hhd.person$person_name)) #68
rm(a, b)
# FUNCTION TO GET JUST NUMBER FROM "KEY" - this now works regardless of the number
# ADD NUMBER COLUMN
substrRight <- function(x){
Number <- x %>%
mutate (KEY2 = KEY) %>%
select (KEY2, KEY) %>%
separate (KEY2, c("x", "Number"), "\\[|\\]") %>%
select (-x) %>%
mutate (Number = as.numeric(Number))
}
Number <- substrRight(hhd.person)
hhd.person <- full_join (hhd.person, Number, by = c("KEY" = "KEY"))
rm(Number)
Number <- substrRight(hhd.ethnicity)
hhd.ethnicity <- full_join (hhd.ethnicity, Number, by = c("KEY" = "KEY"))
rm(Number)
Number <- substrRight(hhd.religion)
hhd.religion <- full_join (hhd.religion, Number, by = c("KEY" = "KEY"))
rm(Number)
Number <- substrRight(hhd.marital)
hhd.marital <- full_join (hhd.marital, Number, by = c("KEY" = "KEY"))
rm(Number)
Number <- substrRight(hhd.daycare)
hhd.daycare <- full_join (hhd.daycare, Number, by = c("KEY" = "KEY"))
rm(Number)
Number <- substrRight(hhd.school)
hhd.school <- full_join (hhd.school, Number, by = c("KEY" = "KEY"))
rm(Number)
Number <- substrRight(hhd.read)
hhd.read <- full_join (hhd.read, Number, by = c("KEY" = "KEY"))
rm(Number)
Number <- substrRight(hhd.activity)
hhd.activity <- full_join (hhd.activity, Number, by = c("KEY" = "KEY"))
rm(Number)
#hhd.child is different. name_id == Number in other loops
#so nust use name_id
#remove rows of empty data
# (these are generated by the survey during the looping - checked and ok;
# so "Number" won't be chronological and some will be missing)
hhd.activity <- hhd.activity[!is.na(hhd.activity$person_name_activity), ]
hhd.child <- hhd.child[!is.na(hhd.child$name_child), ]
hhd.daycare <- hhd.daycare[!is.na(hhd.daycare$person_name_daycare), ]
hhd.ethnicity <- hhd.ethnicity[!is.na(hhd.ethnicity$person_name_ethnicity), ]
hhd.marital <- hhd.marital[!is.na(hhd.marital$person_name_marital), ]
hhd.person <- hhd.person[!is.na(hhd.person$person_name), ]
hhd.read <- hhd.read[!is.na(hhd.read$person_name_read), ]
hhd.religion <- hhd.religion[!is.na(hhd.religion$person_name_religion), ]
hhd.school <- hhd.school[!is.na(hhd.school$person_name_school), ]
# remove useless columns
hhd.person <- hhd.person %>%
select (person_name, person_name_first, person_name_last, child_name_first_final, child_name_last1, child_name_last2,
person_gender, child_gender_final, child_gender_pull, child_gender_pull_text, child_gender1, child_gender2,
child_age1, child_dob_final, child_age_calc, child_age_final1, child_age_final2,
dob, age, age_calc, new_age_final1, person_dob, age_final, age_final2,
person_relationship, concat_name_age, PARENT_KEY, KEY, Number)
hhd.activity <- hhd.activity %>%
select (-(`SET-OF-activity`), -KEY)
hhd.child <- hhd.child %>%
select (-(`SET-OF-child_loop`), -KEY)
hhd.daycare <- hhd.daycare %>%
select (-(`SET-OF-daycare`), -KEY)
hhd.ethnicity <- hhd.ethnicity %>%
select (-(`SET-OF-ethnicity_repeat`), -KEY)
hhd.marital <- hhd.marital %>%
select (-(`SET-OF-marital_status1`), -KEY)
hhd.read <- hhd.read %>%
select (-(`SET-OF-read`), -KEY)
hhd.religion <- hhd.religion %>%
select (-(`SET-OF-religion_repeat`), -KEY)
hhd.school <- hhd.school %>%
select (-(`SET-OF-school`), -KEY)
# pull settlement and house # from hhd survey
x <- hhd %>%
select (settlement_barcode, extract_settlement, extract_house_no, hhd_id, KEY, survey_status, today, adult_respondent_name,
ethnicity_q, ethnicity_others, ethnicity_hhd_yn, religion_q, religion_others, religion_hhd_yn, list_5plus, no_children)
# SORT DATA BY KEYS
x[order(x$KEY), ]
hhd.person[order(hhd.person$PARENT_KEY, hhd.person$Number), ]
hhd.ethnicity[order(hhd.ethnicity$PARENT_KEY, hhd.ethnicity$Number), ]
hhd.religion[order(hhd.religion$PARENT_KEY, hhd.religion$Number), ]
hhd.marital[order(hhd.marital$PARENT_KEY, hhd.marital$Number), ]
hhd.daycare[order(hhd.daycare$PARENT_KEY, hhd.daycare$Number), ]
hhd.school[order(hhd.school$PARENT_KEY, hhd.school$Number), ]
hhd.read[order(hhd.read$PARENT_KEY, hhd.read$Number), ]
hhd.activity[order(hhd.activity$PARENT_KEY, hhd.activity$Number), ]
hhd.child[order(hhd.child$PARENT_KEY, hhd.child$name_id), ]
#merge together ON KEY AND PARENT KEY
a <- merge(x = x, y = hhd.person, by.x = "KEY", by.y = "PARENT_KEY", all.x = TRUE)
b <- merge(x = a, y = hhd.ethnicity, by.x = c("KEY", "Number"), by.y = c("PARENT_KEY", "Number"), all = TRUE)
c <- merge(x = b, y = hhd.religion, by.x = c("KEY", "Number"), by.y = c("PARENT_KEY", "Number"), all = TRUE)
d <- merge(x = c, y = hhd.marital, by.x = c("KEY", "Number"), by.y = c("PARENT_KEY", "Number"), all = TRUE)
e <- merge(x = d, y = hhd.daycare, by.x = c("KEY", "Number"), by.y = c("PARENT_KEY", "Number"), all = TRUE)
f <- merge(x = e, y = hhd.school, by.x = c("KEY", "Number"), by.y = c("PARENT_KEY", "Number"), all = TRUE)
g <- merge(x = f, y = hhd.read, by.x = c("KEY", "Number"), by.y = c("PARENT_KEY", "Number"), all = TRUE) %>%
rename(KEY.y1 = KEY.y)
h <- merge(x = g, y = hhd.activity, by.x = c("KEY", "Number"), by.y = c("PARENT_KEY", "Number"), all = TRUE)
hhd.merge <- full_join (h, hhd.child, by = c("KEY" = "PARENT_KEY",
"Number" = "name_id"))
rm(x, a, b, c, d, e, f, g, h)
rm(hhd.activity, hhd.child, hhd.daycare, hhd.ethnicity, hhd.marital, hhd.person, hhd.read,
hhd.religion, hhd.school, house, house.water)
#############
# FIX DOB - WILL HAVE TO ESTIMATE FOR THOSE THAT ONLY ENTERED AGE
#############
#DOB and age - remember that DOB was optional ***
check <- hhd.merge %>%
select (settlement_barcode, extract_house_no, hhd_id, today, person_name,
child_age1, child_dob_final, child_age_calc, child_age_final1, child_age_final2,
dob, age, age_calc, new_age_final1, person_dob, age_final, age_final2) %>% # age_child, dob_child - these are just pulled through
filter (age<=5 | child_age1<=5) %>%
filter (person_dob == "2000-02-29") #these still need to be manually fixed if possible
rm(check)
#now I need to "estimate" DOB for those where age was entered
hhd.merge <- hhd.merge %>%
rename (person_dob1 = person_dob) %>%
mutate (person_dob2 = today - 365*age_final) %>%
mutate (person_dob = if_else(!is.na(child_age1) | !is.na(age), person_dob2, person_dob1)) %>%
select (-person_dob1, -person_dob2)
#############
#FIX ETHNICITY AND RELIGION - infill
# hhd.merge
# a <- hhd.merge %>%
# mutate (ethnic = as.numeric(if_else(ethnicity_others == 1 & is.na(ethnicity), ethnicity_q, ethnicity))) %>%
# mutate (relig = if_else(religion_others == 1 & is.na(religion), religion_q, religion)) %>%
# select (-ethnicity, -ethnicity_q, -ethnicity_others, -ethnicity_hhd_yn,
# -religion, -religion_q, -religion_others, -religion_hhd_yn) %>%
# rename (religion = relig, ethnicity = ethnic)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.