library("knitr")
opts_chunk$set(
  warning = FALSE,
  collapse = TRUE, 
  comment = "#>", 
  message = FALSE,
  fig.width = 8,
  fig.height = 6,
  dpi = 300,
  out.width = "80%")

wd <- rprojroot::find_rstudio_root_file()
opts_knit$set(root.dir = wd)

Find children who need matches

Connect to database:

library(dplyr)
library(L2TDatabase)

# Work relative to RStudio project
wd <- rprojroot::find_rstudio_root_file()
dir_here <- file.path(wd, "inst", "analyses", "ci_matching")
cnf_file <- file.path(wd, "inst", "l2t_db.cnf")

l2t_main <- l2t_connect(cnf_file, "l2t")
l2t_backend <- l2t_connect(cnf_file, "backend")
l2t_eyetracking <- l2t_connect(cnf_file, "eyetracking")

Starting pool for matching: Children at TimePoint1 who

We also recruited some non-longitudinal participants to serve as age-matches for the children with cochlear implants in our CochlearMatching study, so we gather those participants as well and filter them with the same criteria.

apply_nh_matching_criteria <- . %>% 
  filter(!AAE, !CImplant, !LateTalker, 
         !is.na(EVT_Standard), !is.na(Maternal_Education))

nh_set1 <- tbl(l2t_main, "Scores_TimePoint1") %>% 
  apply_nh_matching_criteria() %>% 
  collect()

nh_set2 <- tbl(l2t_main, "Scores_CochlearMatching") %>% 
  apply_nh_matching_criteria() %>% 
  collect()

nh_set <- bind_rows(nh_set1, nh_set2)

Post-stratify by maternal education. First, define low-mid-high medu groups. This coding scheme uses the unusual convention of lumping the two families with less than two years of college education in the low-education group.

medu_scheme <- data_frame(
  Maternal_Education_Level = c(NA, 1:7),
  Maternal_Education_Group = c(NA, "Low", "Low", "Low", "Low", 
                               "Mid", "Mid", "High")
)

nh_set <- left_join(nh_set, medu_scheme, by = "Maternal_Education_Level")

nh_set %>% 
  count(Maternal_Education, Maternal_Education_Level, 
        Maternal_Education_Group) %>% 
  arrange(Maternal_Education_Level)

Find the vocabulary scores that would define the 10-90% range for each maternal education group.

nh_vocab_limits <- nh_set %>% 
  group_by(Maternal_Education_Group) %>% 
  summarise(
    n = n(),
    EVT_Mean = mean(EVT_Standard),
    EVT_Min = min(EVT_Standard), 
    EVT_Max = max(EVT_Standard),
    EVT_10 = quantile(EVT_Standard, .1),
    EVT_90 = quantile(EVT_Standard, .9),
    nInRange = sum(between(EVT_Standard, EVT_10, EVT_90))) %>% 
  ungroup()

nh_vocab_limits

Restrict pool to just children with scores in their range.

# N before
nrow(nh_set)

nh_filtered <- nh_set %>% 
  left_join(nh_vocab_limits) %>% 
  rowwise() %>% 
  mutate(InRange = between(EVT_Standard, EVT_10, EVT_90)) %>% 
  ungroup() %>% 
  filter(InRange)

# N after
nrow(nh_filtered)

Children who can be matches who were tested at Timepoint1 should not have received the first bad version of the eyetracking experiment with the timing glitch. Identify children to exclude from matching because they had a nonstandard version of an eyetracking experiment.

children_with_bad_blocks <- tbl(l2t_eyetracking, "q_BlocksByStudy") %>% 
  filter(Version != "Standard") %>% 
  distinct(Study, ResearchID) %>% 
  collect()
children_with_bad_blocks

Also take into account missing data for each eyetracking task.

missing_data_stats_raw <- tbl(l2t_eyetracking, "q_MissingDataByBlock") %>% 
  collect()

missing_data_stats <- missing_data_stats_raw %>% 
  filter(Version == "Standard") %>% 
  group_by(Study, ResearchID, Task) %>% 
  summarise(MinMissing = min(ProportionMissing),
            NUseableBlocks = sum(ProportionMissing < .5)) %>% 
  ungroup()
missing_data_stats

Accounting for multiple visits

Get the participation of children across multiple studies.

childstudy <- tbl(l2t_backend, "ChildStudy") %>% 
  left_join(tbl(l2t_backend, "Study")) %>% 
  select(ChildID, ChildStudyID, ResearchID = ShortResearchID, Study) %>% 
  collect()

childstudy

Get study participation for children in the NH matching pool.

nh_childstudy <- childstudy %>% 
  semi_join(select(nh_filtered, ChildID))
nh_childstudy

Remove the child-study pairs for children with the bad eyetracking.

no_useable_blocks <- missing_data_stats %>% 
  group_by(Study, ResearchID) %>% 
  filter(sum(NUseableBlocks) == 0) %>% 
  ungroup()
no_useable_blocks

nh_childstudy <- nh_childstudy %>% 
  anti_join(children_with_bad_blocks) %>% 
  anti_join(no_useable_blocks)
nh_childstudy

Get the information for matching.

d_tp1 <- tbl(l2t_main, "Scores_TimePoint1") %>% collect()
d_tp2 <- tbl(l2t_main, "Scores_TimePoint2") %>% collect()
d_tp3 <- tbl(l2t_main, "Scores_TimePoint3") %>% collect()
d_matching <- tbl(l2t_main, "Scores_CochlearMatching") %>% collect()
d_ci1 <- tbl(l2t_main, "Scores_CochlearV1") %>% collect()
d_ci2 <- tbl(l2t_main, "Scores_CochlearV2") %>% collect()

# Get age, Sex, Medu for everyone
matching_vars <- bind_rows(d_tp1, d_tp2, d_tp3, d_ci1, d_ci2, d_matching) %>% 
  mutate(Age = EVT_Age,
         Age = ifelse(is.na(Age), PPVT_Age, Age), 
         Age = ifelse(is.na(Age), FruitStroop_Age, Age)) %>% 
  select(ChildID, ChildStudyID, Study, Age, 
         Female, CImplant, AAE, LateTalker, ResearchID, 
         Maternal_Education, Maternal_Education_Level) %>% 
  left_join(medu_scheme, by = "Maternal_Education_Level")

# Number the study visits
nh_matching <- matching_vars %>% 
  semi_join(nh_childstudy) %>% 
  group_by(ChildID) %>% 
  arrange(Age) %>% 
  mutate(NumVisits = length(ChildStudyID),
         VisitNum = seq_along(Age)) %>% 
  ungroup() %>% 
  arrange(ChildID, VisitNum) %>% 
  select(-ChildStudyID) %>% 
  select(ChildID, ResearchID, Study, Age, VisitNum, NumVisits, everything())

# Also include information about eyetracking data
useable_blocks_per_childstudy <- missing_data_stats %>% 
  tidyr::gather(Variable, Value, MinMissing:NUseableBlocks) %>% 
  filter(Variable == "NUseableBlocks") %>% 
  tidyr::complete(tidyr::nesting(Study, ResearchID), Task, Variable, 
                  fill = list(Value = 0)) %>% 
  tidyr::unite(Variable, Task, Variable) %>% 
  tidyr::spread(Variable, Value)

nh_matching <- nh_matching %>% 
  left_join(useable_blocks_per_childstudy)
nh_matching

ci_matching <- matching_vars %>% 
  # Exclude 079L's Timepoint1 visit because they were moved from the
  # longitudinal study tract to the CochlearV1/CochlearV2 tract.
  filter(CImplant == 1, ResearchID != "079L") %>% 
  group_by(ChildID) %>% 
  arrange(Age) %>% 
  mutate(NumVisits = length(ChildStudyID),
         VisitNum = seq_along(Age)) %>% 
  ungroup() %>% 
  arrange(ChildID, VisitNum) %>% 
  select(-ChildStudyID) %>% 
  select(ChildID, ResearchID, Study, Age, VisitNum, NumVisits, everything()) %>% 
  left_join(useable_blocks_per_childstudy)
ci_matching

readr::write_csv(nh_matching, "nh_matching_info.csv")
readr::write_csv(ci_matching, "ci_matching_info.csv")


LearningToTalk/L2TDatabase documentation built on June 24, 2020, 3:45 a.m.