library("knitr") opts_chunk$set( warning = FALSE, collapse = TRUE, comment = "#>", message = FALSE, fig.width = 8, fig.height = 6, dpi = 300, out.width = "80%") wd <- rprojroot::find_rstudio_root_file() opts_knit$set(root.dir = wd)
Connect to database:
library(dplyr) library(L2TDatabase) # Work relative to RStudio project wd <- rprojroot::find_rstudio_root_file() dir_here <- file.path(wd, "inst", "analyses", "ci_matching") cnf_file <- file.path(wd, "inst", "l2t_db.cnf") l2t_main <- l2t_connect(cnf_file, "l2t") l2t_backend <- l2t_connect(cnf_file, "backend") l2t_eyetracking <- l2t_connect(cnf_file, "eyetracking")
Starting pool for matching: Children at TimePoint1 who
We also recruited some non-longitudinal participants to serve as age-matches for the children with cochlear implants in our CochlearMatching study, so we gather those participants as well and filter them with the same criteria.
apply_nh_matching_criteria <- . %>% filter(!AAE, !CImplant, !LateTalker, !is.na(EVT_Standard), !is.na(Maternal_Education)) nh_set1 <- tbl(l2t_main, "Scores_TimePoint1") %>% apply_nh_matching_criteria() %>% collect() nh_set2 <- tbl(l2t_main, "Scores_CochlearMatching") %>% apply_nh_matching_criteria() %>% collect() nh_set <- bind_rows(nh_set1, nh_set2)
Post-stratify by maternal education. First, define low-mid-high medu groups. This coding scheme uses the unusual convention of lumping the two families with less than two years of college education in the low-education group.
medu_scheme <- data_frame( Maternal_Education_Level = c(NA, 1:7), Maternal_Education_Group = c(NA, "Low", "Low", "Low", "Low", "Mid", "Mid", "High") ) nh_set <- left_join(nh_set, medu_scheme, by = "Maternal_Education_Level") nh_set %>% count(Maternal_Education, Maternal_Education_Level, Maternal_Education_Group) %>% arrange(Maternal_Education_Level)
Find the vocabulary scores that would define the 10-90% range for each maternal education group.
nh_vocab_limits <- nh_set %>% group_by(Maternal_Education_Group) %>% summarise( n = n(), EVT_Mean = mean(EVT_Standard), EVT_Min = min(EVT_Standard), EVT_Max = max(EVT_Standard), EVT_10 = quantile(EVT_Standard, .1), EVT_90 = quantile(EVT_Standard, .9), nInRange = sum(between(EVT_Standard, EVT_10, EVT_90))) %>% ungroup() nh_vocab_limits
Restrict pool to just children with scores in their range.
# N before nrow(nh_set) nh_filtered <- nh_set %>% left_join(nh_vocab_limits) %>% rowwise() %>% mutate(InRange = between(EVT_Standard, EVT_10, EVT_90)) %>% ungroup() %>% filter(InRange) # N after nrow(nh_filtered)
Children who can be matches who were tested at Timepoint1 should not have received the first bad version of the eyetracking experiment with the timing glitch. Identify children to exclude from matching because they had a nonstandard version of an eyetracking experiment.
children_with_bad_blocks <- tbl(l2t_eyetracking, "q_BlocksByStudy") %>% filter(Version != "Standard") %>% distinct(Study, ResearchID) %>% collect() children_with_bad_blocks
Also take into account missing data for each eyetracking task.
missing_data_stats_raw <- tbl(l2t_eyetracking, "q_MissingDataByBlock") %>% collect() missing_data_stats <- missing_data_stats_raw %>% filter(Version == "Standard") %>% group_by(Study, ResearchID, Task) %>% summarise(MinMissing = min(ProportionMissing), NUseableBlocks = sum(ProportionMissing < .5)) %>% ungroup() missing_data_stats
Get the participation of children across multiple studies.
childstudy <- tbl(l2t_backend, "ChildStudy") %>% left_join(tbl(l2t_backend, "Study")) %>% select(ChildID, ChildStudyID, ResearchID = ShortResearchID, Study) %>% collect() childstudy
Get study participation for children in the NH matching pool.
nh_childstudy <- childstudy %>% semi_join(select(nh_filtered, ChildID)) nh_childstudy
Remove the child-study pairs for children with the bad eyetracking.
no_useable_blocks <- missing_data_stats %>% group_by(Study, ResearchID) %>% filter(sum(NUseableBlocks) == 0) %>% ungroup() no_useable_blocks nh_childstudy <- nh_childstudy %>% anti_join(children_with_bad_blocks) %>% anti_join(no_useable_blocks) nh_childstudy
Get the information for matching.
d_tp1 <- tbl(l2t_main, "Scores_TimePoint1") %>% collect() d_tp2 <- tbl(l2t_main, "Scores_TimePoint2") %>% collect() d_tp3 <- tbl(l2t_main, "Scores_TimePoint3") %>% collect() d_matching <- tbl(l2t_main, "Scores_CochlearMatching") %>% collect() d_ci1 <- tbl(l2t_main, "Scores_CochlearV1") %>% collect() d_ci2 <- tbl(l2t_main, "Scores_CochlearV2") %>% collect() # Get age, Sex, Medu for everyone matching_vars <- bind_rows(d_tp1, d_tp2, d_tp3, d_ci1, d_ci2, d_matching) %>% mutate(Age = EVT_Age, Age = ifelse(is.na(Age), PPVT_Age, Age), Age = ifelse(is.na(Age), FruitStroop_Age, Age)) %>% select(ChildID, ChildStudyID, Study, Age, Female, CImplant, AAE, LateTalker, ResearchID, Maternal_Education, Maternal_Education_Level) %>% left_join(medu_scheme, by = "Maternal_Education_Level") # Number the study visits nh_matching <- matching_vars %>% semi_join(nh_childstudy) %>% group_by(ChildID) %>% arrange(Age) %>% mutate(NumVisits = length(ChildStudyID), VisitNum = seq_along(Age)) %>% ungroup() %>% arrange(ChildID, VisitNum) %>% select(-ChildStudyID) %>% select(ChildID, ResearchID, Study, Age, VisitNum, NumVisits, everything()) # Also include information about eyetracking data useable_blocks_per_childstudy <- missing_data_stats %>% tidyr::gather(Variable, Value, MinMissing:NUseableBlocks) %>% filter(Variable == "NUseableBlocks") %>% tidyr::complete(tidyr::nesting(Study, ResearchID), Task, Variable, fill = list(Value = 0)) %>% tidyr::unite(Variable, Task, Variable) %>% tidyr::spread(Variable, Value) nh_matching <- nh_matching %>% left_join(useable_blocks_per_childstudy) nh_matching ci_matching <- matching_vars %>% # Exclude 079L's Timepoint1 visit because they were moved from the # longitudinal study tract to the CochlearV1/CochlearV2 tract. filter(CImplant == 1, ResearchID != "079L") %>% group_by(ChildID) %>% arrange(Age) %>% mutate(NumVisits = length(ChildStudyID), VisitNum = seq_along(Age)) %>% ungroup() %>% arrange(ChildID, VisitNum) %>% select(-ChildStudyID) %>% select(ChildID, ResearchID, Study, Age, VisitNum, NumVisits, everything()) %>% left_join(useable_blocks_per_childstudy) ci_matching readr::write_csv(nh_matching, "nh_matching_info.csv") readr::write_csv(ci_matching, "ci_matching_info.csv")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.