RISE_FJ/T0_baseline/O3_T0_FJ-data_report.R

#############################################
#############################################
## WEEKLY REPORT                           ##
#############################################
#############################################

start <- min(house.merge$today)
end1 <- max(house.merge$today)
end2 <- max(hhd$today)
end3 <- c(end1, end2)
end <- max(end3)
rm(end1, end2, end3)


#number of new consents
new.hhd.consents <- consent_list_all %>%
  filter(date > ymd("2019-06-11")) #72

new.child.consents <- child_consent_list_all %>%
  filter(date > ymd("2019-06-11")) #12

#########################
# house surveys completed
#########################

#number of houses visited - including those where no survey done and house status
# Total number of house visits per house
# and house status .... number vacant, etc.

house.status <- house.merge %>% #673
  select (settlement_barcode, extract_house_no, house_status, today) %>% 
  unique () %>% #636 - removes extra lines from the merge with house.water
  mutate (vacant = ifelse(house_status == "vacant", 1, 0), 
          demolished = ifelse(house_status == "demolished", 1, 0),
          flooded = ifelse(house_status == "flooded", 1, 0),
          home = ifelse(house_status == "1", 1, 0)) %>% 
  group_by(settlement_barcode, extract_house_no, today) %>% 
  mutate (check = n()) %>% #look for more than 1 visit in a day 
  filter (!(check==2 & house_status=="not_home")) %>%  #remove visit where not home on same day; 622
  select (-check) %>% 
  group_by(settlement_barcode, extract_house_no) %>% 
  summarize (vacant = max(vacant, na.rm = TRUE), 
             demolished = max(demolished, na.rm = TRUE),
             flooded = max(flooded, na.rm = TRUE),
             home = max(home, na.rm = TRUE), 
             visits = n()) #497
sum(house.status$visits) #1116! #where visits must be on different days
summary(house.status$visits) #up to 5 visits

#House survey completed
# identify a started house survey by !is.na(tenure1)
table (house.merge$tenure1, exclude = NULL) #391 NA's

house.survey <- house.merge %>% 
  select (settlement_barcode, extract_house_no, house_status, today, tenure1) %>% 
  unique () %>% #268
  filter (!is.na(tenure1))  #769 - this is completed only
table(house.survey$house_status) #CHECK********

#are there duplicates in here?
house.unique <- house.survey %>% 
  select (settlement_barcode, extract_house_no) %>% 
  unique () #769 - 0 duplicates

#find duplicate house surveys
id <- house.survey %>% 
  mutate (id = paste0(settlement_barcode, extract_house_no))
table(duplicated(id$id)) #no dup
id[duplicated(id$id),] 
rm(id)
# 

house.survey.comm <- house.survey %>% 
  group_by(settlement_barcode) %>% 
  summarise (house.survey.completed = n())
sum(house.survey.comm$house.survey.completed) #769
  
#########################
# household surveys completed
#########################
#hhd survey started identified by !is.na(gift_yn)
hhd.survey <- hhd %>% #466
  filter (gift_yn==0 | gift_yn==1) %>%
  select (today, settlement_barcode, extract_house_no, hhd_id,  
          house_status, comm_harmo, survey_status)  

table (hhd.survey$house_status, exclude = NULL) #768 completed hhd surveys
table (hhd.survey$survey_status, exclude = NULL) #768 completed hhd surveys

# Check for duplicates
id <- hhd.survey %>%
  mutate (id = paste0(settlement_barcode, extract_house_no, hhd_id))
table(duplicated(id$id)) #0 dup
id[duplicated(id$id),] 
rm(id)
#

hhd.survey.comm <- hhd.survey %>% 
  group_by(settlement_barcode) %>% 
  summarise (hhd.survey.completed = n())
sum(hhd.survey.comm$hhd.survey.completed) #768

#####################
#total number of visits to each house - both house and household survey
#where a visit but be on a different day (more than one survey attempt in a day = 1 visits)
#####################
a <- house.merge %>% 
  select (settlement_barcode, extract_house_no, today) %>% 
  unique () #886 visits
b <- hhd %>% 
  select (settlement_barcode, extract_house_no, today) %>% 
  unique() #645 visits to a house (2 surveys for dif hhds in same house on same day = 1 visit)
c <- rbind (a, b) #1531

house.visits.byhouse <- c %>% 
  group_by (settlement_barcode, extract_house_no, today) %>% 
  unique () %>%  #1125 visits
  mutate (count = 1)
rm (a, b, c)

house.visits.summ <- house.visits.byhouse %>% 
  group_by (settlement_barcode) %>% 
  summarize (visits = sum (count))
sum(house.visits.summ$visits) #1125 - matches


#############################################
#############################################
##  REFUSALS - this is not correct         ##
#############################################
#############################################
# From data officer log sheet:
# 1)  Matata #50 - refused hhd consent
# 2) Wailea #13 - refusal to consent and baseline
# 3) Wailea # 58 - refused hhd consent


table(house.merge$house_status)
# withdrawn_activity=1
# withdrawn_RISE=1
table(house.merge$survey_status) #all 1=completed
table(house.merge$survey_status, house.merge$house_status, exclude = NULL) #

table(hhd$house_status)   #4 not at home
table(hhd$survey_status) #3= not home, 1=no one avail to complete survey
table (hhd$survey_status, hhd$house_status, exclude = NULL) # fine
  
house.refuse <- house.merge %>% 
  select (settlement_barcode, extract_house_no, hhd_id, house_status, new_consent_yn, 
          consent_prev_no, consent_new_complete, consent_prev_yes, respondent_house, 
          home_yn) %>% 
  unique() %>% 
  mutate (refuse = ifelse(house_status == "withdrawn_activity" | 
                            house_status == "withdrawn_RISE", 1, 0))
sum(house.refuse$refuse) #wailea #13; Missing Matata #50 (they didn't do survey)

hhd.refuse <- hhd  %>% 
  select (settlement_barcode, extract_house_no, hhd_id, house_status, new_consent_yn, 
          consent_prev_no, consent_new_complete, consent_prev_yes, survey_continue_yes) %>% 
  mutate (refusal = ifelse(new_consent_yn == 0 & !is.na(new_consent_yn), 1, 
                           ifelse(consent_prev_no ==0 & !is.na(consent_prev_no), 1, 
                                  ifelse(consent_new_complete == 0 & !is.na(consent_new_complete), 1, 
                                         ifelse(consent_prev_yes == 0 & !is.na(consent_prev_yes), 1, 0)))))
sum(hhd.refuse$refusal)
#wailea #58

# no = 0
#   new_consent_yn -= new hhd, do they wish to consent? no = refusal, 
# consent_prev_no - did not consent before; do they wish to consent? no = refusal
# consent_new_complete, did they consent now?no = refusal
#   consent_prev_yes = previously consented; happy to continue? no  = refusal, 
# survey_continue_yes



#############################################
#############################################
##  CHECK PERSON-LEVEL DATA                ##
#############################################
#############################################
#age and DOB are a bit confusing
#Final variables to use:
# person_dob
# person_name
# person_gender
# person_dob
# age_final
# age_final2
# person_relationship
# concat_name_age

table(is.na(hhd.merge$person_dob)) #5 missing - those are not home I think; no data

# #generate age category variable - using current age (calc'd as of today)
# age_final is the age at the time of the survey
hhd.merge <- hhd.merge %>%
  mutate (age_today = as.numeric(age_final2)) %>% #yes, correct age to use
  mutate (age_cat = cut (age_today,
                         breaks = c(-1, 4.99, 14.99, 17.99, Inf),
                         labels = c("0-<5", "5-<15", "15-<18", "18+")))
table(hhd.merge$age_cat) #537 kids under 5; 4292 people total
###################
#PEOPLE
###################
#final variables to use: person_name, person_gender, person_dob, age_final,age_final2

people <- hhd.merge %>% 
  filter (!is.na(person_name)) %>% 
  select (person_relationship, person_gender, age_final) #591; yes, this is correct age to use!!!
table(people$person_gender)
gender.all.prop <- prop.table(table(people$person_gender))
gender.all.prop #49% female

#people per hhd
people.hhd <- hhd.merge %>% 
  filter (!is.na(person_name)) %>% #remove if no data
  select (settlement_barcode, extract_house_no, person_name)  %>% 
  group_by(settlement_barcode, extract_house_no) %>% 
  summarize (no_people = n())
sum(people.hhd$no_people) #4287

# children <5 (effective the date of the survey) -
agecat.all <- table(hhd.merge$age_cat)
agecat.all.prop <- prop.table(table(hhd.merge$age_cat))
agecat.all.prop[1] #12.5%

# under 5  year olds
under5 <- hhd.merge %>% 
  filter (!is.na(person_name)) %>% 
  filter (age_cat == "0-<5") #348 under 5 as at the date the survey was done
table(under5$person_gender) #43% female
gender.under5.prop <- prop.table(table(under5$person_gender))
gender.under5.prop #43.6% female


#############################################
# table of # houses and #hhds by settlement
households <- hhd %>% 
  filter (!is.na(hhd_id)) %>% 
  select (settlement_barcode, extract_house_no, hhd_id) #769

hhds.summ <- households %>% 
  group_by (settlement_barcode, extract_house_no) %>% 
  summarize (no.hhds = n()) #768
table(hhds.summ$no.hhds) #1 with 2 !

houses.summ <- house.merge %>% 
  select (settlement_barcode, extract_house_no) %>% 
  unique () %>% 
  group_by (settlement_barcode, extract_house_no) %>% 
  summarize (no.houses = n()) #839

summ.settlements1 <- full_join (houses.summ, hhds.summ, 
                               by = c("settlement_barcode" = "settlement_barcode", 
                                      "extract_house_no" = "extract_house_no")) 

#house survey with no matching hhd survey - missing hhd survey:
a<- house.survey %>% 
  group_by (settlement_barcode, extract_house_no) %>% 
  summarize (no.houses = n())
b <- hhd.survey %>% 
  unique () %>% 
  group_by (settlement_barcode, extract_house_no) %>% 
  summarize (no.hhds = n())
c <- full_join (a, b, by = c("settlement_barcode" = "settlement_barcode", 
                                       "extract_house_no" = "extract_house_no")) 

missing_hhd_survey <- c %>% 
  filter(is.na(no.hhds)) %>% 
  mutate(missing.hhd.survey = 1) %>% 
  select (-no.houses, -no.hhds)

################
a<- house.merge %>% 
  filter (!is.na(tenure1)) %>% #219
  select (settlement_barcode, extract_house_no, hhd_id) %>% 
  unique () %>% #204
  mutate (house.survey = 1)
b <- hhd.survey %>% 
  unique () %>% 
  select (settlement_barcode, extract_house_no, hhd_id) %>% 
  mutate (hhd.survey = 1)
c <- bind_rows (a, b) %>% 
  group_by(settlement_barcode, extract_house_no, hhd_id) %>% 
  mutate (hhd.survey = max (hhd.survey, na.rm = TRUE)) %>% 
  group_by(settlement_barcode, extract_house_no) %>% 
  summarize (house.survey = max (house.survey, na.rm = TRUE), 
             hhd.survey = max (hhd.survey, na.rm = TRUE))

missing_surveys <- c %>% 
  filter (house.survey!=1 | hhd.survey !=1)
missing_hhd_survey1 <- missing_surveys %>% 
  filter (hhd.survey !=1)
rm(a,b,c)

summ.settlements <- summ.settlements1 %>% 
  group_by (settlement_barcode) %>% 
  summarize (no.houses = sum(no.houses, na.rm = TRUE), 
             no.hhds = sum(no.hhds, na.rm = TRUE)) %>% 
  rename (settlement = settlement_barcode)

# Add totals
totals <- summ.settlements %>% 
  mutate (settlement = "Total") %>% 
  group_by (settlement) %>% 
  summarize (no.houses = sum (no.houses), 
             no.hhds = sum(no.hhds)) 
#append
summ.settlements <- rbind (summ.settlements, totals)

# export - this is something for Ateca to check each day ****
setwd("Z:/Data Files/Data Files Objective 3/Summary/Settlement")
write_csv(summ.settlements, path = "Z:/Data Files/Data Files Objective 3/Summary/Settlement/settlement.no.csv")
write_csv(missing_hhd_survey, path = "Z:/Data Files/Data Files Objective 3/Summary/Settlement/missing_hhd_survey.csv")



###################################
#NEW CONSENT DATA
##################################

hhd.consent.new <- consent_list_all %>% 
  filter (date > date("2019-06-11"))

child.consent.new <- child_consent_list_all %>% 
  filter (date > date("2019-06-11"))

###################################
#TRACKING PROGRESS - CONSENT LIST VS ALL VISITS
##################################

# VISITS To HOUSES FOR HOUSE AND/OR HOUSEHOLD SURVEY
a <- house.merge %>% #283
  select (settlement_barcode, extract_house_no, today) %>% 
  unique () #261
b <- hhd %>% #203
  select (settlement_barcode, extract_house_no, today) %>% 
  unique () #202
visits.all <- bind_rows(a, b) %>% 
  unique() %>%  #262
  group_by(settlement_barcode, extract_house_no) %>% 
  summarize (house.visits = n()) #839
rm(a, b)

#FULL LIST OF HHD_ID 
a<- house.merge %>% 
  select (settlement_barcode, extract_house_no, hhd_id) %>% 
  unique () 
b <- hhd.survey %>% 
  select (settlement_barcode, extract_house_no, hhd_id) %>% 
  unique () 
all.hhd_id <- bind_rows (a, b) %>% 
  unique() %>% 
  filter (!is.na(hhd_id)) #771
rm(a, b)

#FULL LIST OF SURVEYS: HOUSE=YES, HHD=YES
a<- house.merge %>% 
  filter (!is.na(tenure1)) %>% #219
  select (settlement_barcode, extract_house_no, hhd_id) %>% 
  unique () %>% #204
  mutate (house.survey = 1)
b <- hhd.survey %>% 
  unique () %>% 
  select (settlement_barcode, extract_house_no, hhd_id) %>% 
  mutate (hhd.survey = 1)
all.surveys <- bind_rows (a, b) %>% 
  group_by(settlement_barcode, extract_house_no, hhd_id) %>% 
  summarize (house.survey = max (house.survey, na.rm = TRUE), 
             hhd.survey = max (hhd.survey, na.rm = TRUE)) #770
rm(a, b)

#now merge with consent data
z <- full_join(consent_list_all, visits.all, by = c("settlement" = "settlement_barcode", 
                                           "house.no" = "extract_house_no"))
z1 <- full_join(z, all.hhd_id, by = c("settlement" = "settlement_barcode", 
                                      "house.no" = "extract_house_no", 
                                      "hhd_id" = "hhd_id"))
z2 <- full_join(z1, all.surveys, by = c("settlement" = "settlement_barcode", 
                                      "house.no" = "extract_house_no", 
                                      "hhd_id" = "hhd_id")) %>% 
  rename (date.consent = date)

# ************************
#print list per settlement for Ateca to cross check and provide to Field Supervisor for revisits
# ************************

for (i in c("Wainivokai", "Lobau", "Komave", "Matata",
            "Nauluvatu", "Muanikoso", "Wailea","Newtown",
            "Muanivatu", "Kinoya", "Maravu", "Wainidinu")){

  settlement_file <- z2 %>% 
    filter (settlement == i) 
  filename <- paste0("Z:/Data Files/Data Files Objective 3/Summary/Settlement/", i, ".csv")
  print(filename)
  write_csv(settlement_file, path = filename)
}


rm(visits.all, all.hhd_id, all.surveys, z, z1, z2)
Monash-RISE/riseR documentation built on Dec. 11, 2019, 9:49 a.m.