riseR:

# OBJECTIVE 3 CHILD SAMPLING IN FIJI - 26 SEPTEMBER 2019 TO 7 Nov 2019

library (tidyverse)
library (lubridate)
library (stringr)
library (readxl)
library (zscorer) #something from jeff

rm(list = ls())
# setwd("Z:/Data Files/Data Files Objective 3")
setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190930_child sampling/3. Data/1. raw data")

# CHILD SAMPLING
child <- read_csv (file="O3_child_sample_FJ_v1.csv")

# FECES COLLECTION
feces <- read_csv (file="O3_feces_FJ_v1.csv")
feces.sample <- read_csv (file="O3_feces_FJ_v1-feces_collection.csv")

# COMBINED CONSENT LISTS FROM CONSENT AND BASELINE -
# last saved on 24 SEPT 2019
setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190624_baseline/3. Data/4. reports")
# setwd("Z:/Data Files/Data Files Objective 3/Reports/Child Sampling/Data Input")
consent1 <- read_csv (file = "consent.fj.import.csv") #833
child_consent1 <- read_csv (file = "child.consent.fj.import.csv") #454

#LIMS Data
setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/2. Laboratory QA/LIMS/20190926_FJ_child sampling")
# setwd("Z:/Data Files/Data Files Objective 3/LIMS Data")

sample_reception <- read_excel("Obj3_Data.xlsx", "Sample Reception")

blood_lab <- sample_reception %>%
  filter (Sample_type == "Blood-Both" | Sample_type == "Blood-EDTA" | Sample_type == "Blood-SST") %>%
  select (Barcode_sample, Date_receipt)
feces_lab <- sample_reception %>%
  filter (Sample_type == "Human Feces") %>%
  select (Barcode_sample, Date_receipt)
bloodspot_lab <- sample_reception %>%
  filter (Sample_type == "Filter Paper") %>%
  select (Barcode_sample, Date_receipt)

#load more data! - Jeff
blood_EDTA <- read_excel("Obj3_Data.xlsx", "EDTA_Whole Blood")
blood_EDTA_aliq <- read_excel("Obj3_Data.xlsx", "EDTA_Aliquots")
blood_centrifuge <- read_excel("Obj3_Data.xlsx", "Blood_Centrifuge")
blood_SST_aliq <- read_excel("Obj3_Data.xlsx", "SST_Aliquots")
blood_filter <- read_excel("Obj3_Data.xlsx", "Blood_Filter")

KK1 <- read_excel("Obj3_Data.xlsx", "Feces_KK1")
feces_aliq <- read_excel("Obj3_Data.xlsx", "Feces_Aliquots")
feces_mac1 <- read_excel("Obj3_Data.xlsx", "Feces_Macconkey1")
feces_mac2 <- read_excel("Obj3_Data.xlsx", "Feces_Macconkey2")
KK2 <- read_excel("Obj3_Data.xlsx", " Feces_KK2")





setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/Code/RISE-R/RISE_FJ/T1_child_sampling")

#############################################
# coding fixes and data checks
#############################################

##########################
#infill gender 
table(child$child_gender)
child$child_gender <- ifelse (child$child_gender == "0", "female", 
                                     ifelse (child$child_gender == "1", "male", child$child_gender))
table(child$child_gender, exclude = TRUE) #fixed; 188 with no gender????

##########################
#check for missing or weird data
table(child$hhd_consent_yes, exclude = TRUE) #458 with consent; 188 no consent
table(child$hhd_id_no, exclude = TRUE) #441 that picked a hhd + 17 that said new hhd; 188 NA
table(child$caregiver_present==1 | child$home_yn==1) #188 false
table(child$child_gender, exclude = TRUE) #188 with no gender

table(is.na(child$respondent_name)) #189 NA - why is one missing respondent?
check <- child %>% #646
  filter (hhd_consent_yes==1)  %>%  #458
  filter(is.na(respondent_name)) #1 - this child was over 5, so not sampled
rm(check)

table(child$survey_yes, exclude = TRUE) #188NA; 450=yes to surveying
table(child$sample_yes, exclude = TRUE) #188NA; 450=yes to sampling

table(child$survey_check2, exclude = TRUE) #196 NA (188+8); 409 agreed to do survey;28 did not; 13 didn't ask question
table (is.na(child$cough)) #409 answered - good!

table(child$survey_check3, exclude = TRUE) #of the 28 that said no to survey; 9 said yes to sampling; 18 said no
table(child$feces_check2, exclude = TRUE) #406 ok to do feces sampling

table(child$child_photo_yn, exclude = TRUE) #392 ok with photo; 3 no; 37 did not ask; 214 NA

table(child$height_check2, exclude = TRUE) #390 yes; 3 no; 39 did not ask; 214 NA

table(child$gift_yn, exclude = TRUE) #214 NA; 384 yes, 8 no, 40 did not ask
table (child$blood_check2, exclude = TRUE) #214 NA; 317 yes, 55 no, 33 did not ask
table (child$survey_check2, child$cough, exclude = TRUE)

##########################
#BARCODE SCANNING

#HOUSE BARCODE - still big problem!!
table (is.na(child$barcode_scan)) #more than 50% are NOT scanning
table (is.na(child$barcode_scan_text))
table (is.na(child$barcode_scan_text), child$caregiver_present==1 | child$home_yn==1 )

#SAMPLE BARCODE - all good
table (is.na(child$feces_kit_barcode)) #these appear to be scanned = 401
table (is.na(child$feces_kit_barcode_note)) #- no text entries

table (is.na(child$barcode_edta)) 
table (is.na(child$barcode_edta_text)) #- no text entries

table (is.na(child$barcode_serum))
table (is.na(child$barcode_serum_text)) #- no text entries

table (is.na(child$barcode_bloodspot))
table (is.na(child$barcode_bloodspot_text)) #- no text entries


#FIX TIMES
child$starttime <- as.Date(child$starttime, format = '%B %d, %Y')
feces$starttime <- as.Date(feces$starttime, format = '%B %d, %Y')
summary(child$today) #26 Sep 2019 to 7 Nov 2019

#############################################
# CORRECTIONS
source("O3_T1_FJ-corrections.R")
#############################################
#NOTE - MANY DATE OF BIRTH CORRECTIONS - NEED TO CONSIDER HOW THESE ARE HANDLED ************???????

#############################################
# MERGE
#############################################

#MERGE CHILD SAMPLING - don't need merge for Fiji as only one child per survey
child.merge <- child %>%
  select (settlement_barcode, extract_house_no, hhd_id, today, starttime,
          caregiver_present,	home_yn, child_name, child_dob, child_gender,
          child_name2, child_age, 
          respondent_name, respondent_id, respondent_dob, gift_yn,child_photo_yn, 
          signedyn_pl, feces_pl, blood_pl, height_pl, blood_check2, height_check2,
          survey_check2, survey_status, endtime, barcode_edta, barcode_edta_text,
          barcode_serum,barcode_serum_text,	barcode_bloodspot,
          barcode_bloodspot_text, blood_check2, survey_check2, feces_kit_barcode,
          feces_kit_barcode_note) %>%
  mutate (edta = ifelse (is.na(barcode_edta), barcode_edta_text, barcode_edta),
          serum = ifelse (is.na(barcode_serum), barcode_serum_text, barcode_serum),
          bloodspot = ifelse (is.na(barcode_bloodspot), barcode_bloodspot_text, barcode_bloodspot),
          feces = ifelse (is.na(feces_kit_barcode), feces_kit_barcode_note, feces_kit_barcode)) %>%
  select (-barcode_edta,	-barcode_edta_text, -barcode_serum, -barcode_serum_text,
          -barcode_bloodspot,	-barcode_bloodspot_text, -feces_kit_barcode, -feces_kit_barcode_note)

#FECES PICKUP
feces.merge <- merge(x = feces, y = feces.sample, by.x = "KEY", by.y = "PARENT_KEY")  %>%
  mutate (barcode = ifelse (is.na(barcode_feces), barcode_feces_text, barcode_feces)) %>%
  select (-barcode_feces, -barcode_feces_text) #merges only those with a sample picked up

feces.merge2 <- full_join (feces, feces.sample, by = c("KEY" = "PARENT_KEY")) %>%
  mutate (barcode = ifelse (is.na(barcode_feces), barcode_feces_text, barcode_feces)) %>%
  select (-barcode_feces, -barcode_feces_text) # merges all feces surveys started

#20191119 - a few stats for randomisation

# #stool samples collected
# feces.recd <- feces.merge %>%
#   select (settlement_barcode, extract_house_no, barcode, today) %>%
#   rename (feces.recd = barcode,
#           date.pickup = today) %>%
#   filter (!is.na(feces.recd))
# 
# #blood samples
# child_blood <- child %>%
#   mutate (edta = ifelse (is.na(barcode_edta), barcode_edta_text, barcode_edta),
#           serum = ifelse (is.na(barcode_serum), barcode_serum_text, barcode_serum),
#           bloodspot = ifelse (is.na(barcode_bloodspot), barcode_bloodspot_text, barcode_bloodspot)) %>%
#   select (edta, serum, bloodspot, today, child_name) %>% 
#   mutate (blood.any = ifelse(!is.na(edta) | !is.na(serum) |!is.na(bloodspot), 1, 0))
# table(child_blood$blood.any)
# 
# #anthropometry
# child_anthro <- child %>% 
#   select (weight_record1, weight_avg, ht_r1, height_avg, today, child_name) 
# table(is.na(child_anthro$weight_avg))
# table(is.na(child_anthro$height_avg))
# 
# # with worm infection
# setwd("S:/R-MNHS-SPHPM-EPM-IDEpi/RISE/4. Surveys/3. Objectives/1. FJ/3/20190930_child sampling/3. Data/1. raw data")
# helminths <- read_excel("Obj3_Data20191118.xls", sheet = " Feces_KK2") %>% 
#   mutate(helminth_count = AscarisLumbricoides + Hookworm + TrichurisTrichura + StrongyloidesStercoralis + TaeniaSpss)
# table(helminths$helminth_count>0)

#############################################
# PULL TOGETHER CONSENT LISTS
#############################################
# PULL TOGETHER FINAL LIST OF CONSENTS
source("O3_T1_FJ_consent_update.R")
#############################################

#############################################
##  DATA CHECKING FOR WEEKLY REPORT ##
#############################################
source("O3_T1_FJ_data_check.R")

#############################################
##  RUN LIMS CHECKS ##
#############################################
source("O3_T1_FJ-LIMS_QC.R")
# then run weekly report .Rmd - Jeff to do this end of each week


#############################################
## generate person ids - no new people yet (FIONA WILL DO THIS) ??????????  ##
#############################################
source("O3_T1_FJ-person_list.R")
# this feeds into R script that generates person_ids ***not done yet
# FJ_201903_O3_participants_list.R??????????????


#############################################
## create data files for users / analysis ##
# and for use in next survey              ##
#############################################
# source("O3_T1_ID-data_extract.R")



#################################
#29 NOv 2019 - stats for Karin for annual meeting

# anthro measurements below threshold
table(child$wlz, exclude = NULL) #390 with a measurement; 1 below

# low hemoglobin
table(child$blood_hb, exclude = NULL) #367 NAs
table(child$blood_hb<7.0) #4 out of 279 with blood tested