RISE_ID/T4_annual_survey/O3_T4_ID-merge.R

####################################################
##            MERGE                               ##
## This file merges the survey files together     ##
## before corrections and analyses are conducted. ##
####################################################

#############################################
# PULL OUT HOUSEHOLD INFO TO ADD TO OTHER SURVEYS
hhd_info <- hhd %>% 
  select (today, extract_house_no, settlement_barcode, house_status, hhd_id_final, revisit_yn, 
          respondent_name_text, respondent_yes, respondent_person.id, survey_status, hhd_outstanding, KEY)

#############################################
# CHILD LOOP DATA - health survey
child_loop <- right_join(hhd_info, hhd_child_loop, by = c("KEY" = "PARENT_KEY")) 
rm(hhd_info)

#############################################
# PERSON INFO
hhd_info <- hhd %>% 
  select (today, extract_house_no, settlement_barcode, house_status, hhd_id_final, 
          survey_yes, respondent_name_text, respondent_yes, respondent_person.id, gift_yn, 
          survey_status, hhd_outstanding, KEY)

hhd_person_details2 <- right_join(hhd_info, hhd_person_details, by = c("KEY" = "PARENT_KEY")) 
rm(hhd_info)

#############################################
#FECES KIT HANDOUT - CHILD
hhd_info2 <- hhd %>% 
  select (today, extract_house_no, settlement_barcode, house_status, hhd_id_final, 
          respondent_name_text, respondent_yes, respondent_person.id,
          survey_status, hhd_outstanding, KEY,   feces_participate)          

feces_kit2 <- right_join(hhd_info2, hhd_child_feces_kit, by = c("KEY" = "PARENT_KEY")) %>% 
  filter(!is.na(name_feces)) %>% 
  mutate (feces_barcode_final = ifelse(is.na(feces_kit_barcode), feces_kit_barcode_note, feces_kit_barcode)) %>% 
  filter (!is.na(feces_barcode_final))
feces_kit2[duplicated(feces_kit2$feces_barcode_final),] #0 dup 
rm(hhd_info2)

#############################################
# LIST OF DEPARTED HOUSEHOLDS (full households ***) WITH REASONS WHY
hhd_info <- hhd %>% 
  select (today, extract_house_no, settlement_barcode, house_status, hhd_id_final, 
          respondent_name_text, respondent_yes, respondent_person.id, hhd_id_no_depart, survey_status, 
          hhd_outstanding, KEY) %>% 
  mutate (no_removed = str_count(hhd_id_no_depart, '\\d+')) #count # hhds removed from each house

departed.hhds <- right_join(hhd_info, hhd.depart, by = c("KEY" = "PARENT_KEY")) %>%  #32
  select (settlement_barcode, extract_house_no, name_hhd_id_departed, everything())
departed.hhds[duplicated(departed.hhds$name_hhd_id_departed),] #no duplicate - 
rm(hhd_info)

#############################################
# LIST OF DEPARTED HOUSEHOLD MEMBERS WITH REASONS WHY
library(stringr)
hhd_info <- hhd %>% 
  select (today, extract_house_no, settlement_barcode, house_status, hhd_id_final, 
          respondent_name_text, respondent_yes, respondent_person.id, hhd_members_departed, survey_status, 
          hhd_outstanding, KEY) %>% 
  mutate (no_removed = str_count(hhd_members_departed, '\\d+')) #count # people removed from each hhd?

departed <- right_join(hhd_info, hhd.members.reason, by = c("KEY" = "PARENT_KEY")) %>%  #32
  select (settlement_barcode, extract_house_no, name_departed2, everything())
departed[duplicated(departed$person.id_departed2),] #no duplicate - 

table(departed$move_settlement_yn) #- move within the settlement; 1=yes
table(departed$move_house_no) #moved to 
x <- departed %>% 
  filter (!is.na(move_house_no)) #12; 
y <- departed %>% 
  filter (move_settlement_yn==1) #15
rm(x, y)
  
table(departed$reason_departed_select)
table(departed$reason_departed_select__66)
rm(hhd_info)

#**********************
#fixes for departed:
# do manual fixes in person_list.R

#############################################
#COMBINE ALL NEW PEOPLE - FROM WITHIN THE SETTLEMENT AND FROM OUTSIDE

hhd_info <- hhd %>%
  select (today, extract_house_no, settlement_barcode, house_status, hhd_id_final,
          new_people_yn, new_people_move1, new_people_move2,
          old_house_no, new_hhd_id_no, new_hhd_id,
          respondent_name_text, respondent_yes, respondent_person.id, survey_status, hhd_outstanding, KEY)

hhd_info2 <- hhd %>%
  select (extract_house_no, settlement_barcode, hhd_id_final, KEY)

new.people.info2 <- right_join(hhd_info2, new.people.info, by = c("KEY" = "PARENT_KEY")) %>% #86
  select (extract_house_no, settlement_barcode, hhd_id_final, 
          new_person_name, new_person_person.id, when_move_in, from_where)

#from outside the settlement
new_people_outside <- right_join(hhd_info, new.people.2, by = c("KEY" = "PARENT_KEY")) #82
new_people_outside2 <- left_join(new_people_outside, new.people.info2, 
                                 by = c("settlement_barcode" = "settlement_barcode", 
                                        "extract_house_no" = "extract_house_no",
                                        "hhd_id_final" = "hhd_id_final",
                                        "person_name_age" = "new_person_name")) %>% #82
  rename(name = person_name_age, 
         person.id = person.id_new) %>% 
  mutate (dob = dmy(person_dob),
          gender = as.character(person_gender), 
          date_start = dmy(when_move_in)) %>% 
  select (today, extract_house_no, settlement_barcode, house_status, hhd_id_final, 
          name, dob, gender, person.id, date_start)
 
#from inside the settlement (from another house)
new_people_inside <- right_join(hhd_info, new.people.3, by = c("KEY" = "PARENT_KEY")) #5
new_people_inside2 <- left_join(new_people_inside, new.people.info2, 
                                 by = c("settlement_barcode" = "settlement_barcode", 
                                        "extract_house_no" = "extract_house_no",
                                        "hhd_id_final" = "hhd_id_final",
                                        "new_all_names" = "new_person_name"))  %>% 
  rename(name = new_all_names, 
         dob = new_all_dob, 
         gender = new_all_gender, 
         person.id = new_all_person.id, 
         house.no_left = old_house_no, 
         hhd_id_left = new_hhd_id) %>% 
  mutate (date_start = dmy(when_move_in)) %>% 
  select (today, extract_house_no, settlement_barcode, house_status, hhd_id_final, 
          name, dob, gender, person.id, date_start, 
          house.no_left, hhd_id_left) %>% 
  filter (!is.na(name)) #4

new_people_all <-bind_rows(new_people_outside2, new_people_inside2)
rm(hhd_info, hhd_info2, new.people.info2, new_people_outside, new_people_outside2, 
   new_people_inside, new_people_inside2)



#############################################
#FECES SAMPLE COLLECTION - 
feces_pickup_merge <- full_join(feces, feces_collect, by = c("KEY" = "PARENT_KEY")) %>% 
  mutate (feces_barcode_final = ifelse(is.na(barcode_feces), barcode_feces_text, barcode_feces))
#check for duplicates
check <- feces_pickup_merge %>% 
  filter (!is.na(feces_barcode_final))
  check[duplicated(check$feces_barcode_final),] #0  duplicates
rm(check)

#############################################
#to get person_id if I forgot to include it
hhd_info <- hhd %>%
  select (today, extract_house_no, settlement_barcode, house_status, hhd_id_final,  KEY)

person_id <-right_join(hhd_info, final.people.names, by = c("KEY" = "PARENT_KEY")) 
#does not include new people IDs yet - add this in summary.R
rm(hhd_info)
# 
# 
# #then merge with LIMS - has not been done
# library(readxl)
# LIMS_reception <- read_excel("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/2. Laboratory QA/LIMS/20190916_ID_O3_stool_survey/Obj3_Data.xlsx",
#                         sheet = "Sample Reception") %>%
#   select (Barcode_sample, Date_receipt, Time_receipt)
# LIMS_KK1 <- read_excel("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/2. Laboratory QA/LIMS/20190916_ID_O3_stool_survey/Obj3_Data.xlsx",
#                              sheet = "Feces_KK1") %>%
#   rename (date_KK1 = Date_process,
#           time_KK1 = Time_process) %>%
#   select (-Lab_tech, -Comments)
# LIMS_KK2 <- read_excel("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/2. Laboratory QA/LIMS/20190916_ID_O3_stool_survey/Obj3_Data.xlsx",
#                              sheet = "Feces_KK2") %>%
#   rename(date_KK2 = Date_process,
#          time_start_KK2 = Start_time,
#          time_end_KK2 = End_time) %>%
#   select (date_KK2, time_start_KK2, time_end_KK2, Barcode_KKslide)
# 
# #merge with sample reception
# feces_pickup_maternal <- feces %>% 
#   select (settlement_barcode, extract_house_no, today,  
#           maternal_barcode_feces, maternal_barcode_feces_text) %>% 
#   mutate (feces_barcode_final = ifelse(is.na(maternal_barcode_feces), maternal_barcode_feces_text, 
#                                        maternal_barcode_feces)) %>% 
#   select (-maternal_barcode_feces, -maternal_barcode_feces_text) %>% 
#   filter (!is.na(feces_barcode_final))
# 
# feces_merge2 <- full_join (feces_pickup_maternal, LIMS_reception, 
#                            by = c("feces_barcode_final" = "Barcode_sample"))
# #
# # #merge KK1 and KK2
# KK_samples <- full_join (LIMS_KK1, LIMS_KK2, by = c("Barcode_kk_slide" = "Barcode_KKslide")) %>%
#   mutate (days_to_KK2 = date(date_KK2) - date(date_KK1))
# #
# # #merge with KK
# KK_samples2 <- KK_samples %>%
#   group_by(Barcode_sample) %>%
#   summarise (date_KK1 = max(date_KK1),
#              Barcode_kk_slide = max(Barcode_kk_slide),
#              max_days_to_KK2 = max(days_to_KK2),
#              reads = n())
# #
# feces_merge3 <- full_join (feces_merge2, KK_samples2, by = c("feces_barcode_final" = "Barcode_sample"))
# #check for duplicates
# feces_merge3[duplicated(feces_merge3$feces_barcode_final),] #0  duplicates
# 
# rm(LIMS_reception, LIMS_KK1, LIMS_KK2, feces_merge2, KK_samples, KK_samples2)
# 
# 
# # write_csv (feces_merge3, path = "S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/2. Laboratory QA/LIMS/20190916_ID_O3_stool_survey/20191002_feces_merge.csv")
# 
Monash-RISE/riseR documentation built on Dec. 11, 2019, 9:49 a.m.