In LearningToTalk/L2TDatabase: Helper Functions for the Learning-To-Talk 'MySQL' Database

library("knitr")
opts_chunk$set(
  warning = FALSE,
  collapse = TRUE, 
  comment = "#>", 
  message = FALSE,
  fig.width = 8,
  fig.height = 6,
  dpi = 300,
  out.width = "80%")

wd <- rprojroot::find_rstudio_root_file()
opts_knit$set(root.dir = wd)

Find children who need matches

Connect to database:

library(dplyr)
library(L2TDatabase)

# Work relative to RStudio project
wd <- rprojroot::find_rstudio_root_file()
dir_here <- file.path(wd, "inst", "analyses", "ci_matching")
cnf_file <- file.path(wd, "inst", "l2t_db.cnf")

l2t_main <- l2t_connect(cnf_file, "l2t")
l2t_backend <- l2t_connect(cnf_file, "backend")
l2t_eyetracking <- l2t_connect(cnf_file, "eyetracking")

Starting pool for matching: Children at TimePoint1 who

speak Mainstream American English at home
do not have cochlear implants
are not late talkers
have an EVT score
have a maternal education entry in the database

We also recruited some non-longitudinal participants to serve as age-matches for the children with cochlear implants in our CochlearMatching study, so we gather those participants as well and filter them with the same criteria.

apply_nh_matching_criteria <- . %>% 
  filter(!AAE, !CImplant, !LateTalker, 
         !is.na(EVT_Standard), !is.na(Maternal_Education))

nh_set1 <- tbl(l2t_main, "Scores_TimePoint1") %>% 
  apply_nh_matching_criteria() %>% 
  collect()

nh_set2 <- tbl(l2t_main, "Scores_CochlearMatching") %>% 
  apply_nh_matching_criteria() %>% 
  collect()

nh_set <- bind_rows(nh_set1, nh_set2)

Post-stratify by maternal education. First, define low-mid-high medu groups. This coding scheme uses the unusual convention of lumping the two families with less than two years of college education in the low-education group.

medu_scheme <- data_frame(
  Maternal_Education_Level = c(NA, 1:7),
  Maternal_Education_Group = c(NA, "Low", "Low", "Low", "Low", 
                               "Mid", "Mid", "High")
)

nh_set <- left_join(nh_set, medu_scheme, by = "Maternal_Education_Level")

nh_set %>% 
  count(Maternal_Education, Maternal_Education_Level, 
        Maternal_Education_Group) %>% 
  arrange(Maternal_Education_Level)

Find the vocabulary scores that would define the 10-90% range for each maternal education group.

nh_vocab_limits <- nh_set %>% 
  group_by(Maternal_Education_Group) %>% 
  summarise(
    n = n(),
    EVT_Mean = mean(EVT_Standard),
    EVT_Min = min(EVT_Standard), 
    EVT_Max = max(EVT_Standard),
    EVT_10 = quantile(EVT_Standard, .1),
    EVT_90 = quantile(EVT_Standard, .9),
    nInRange = sum(between(EVT_Standard, EVT_10, EVT_90))) %>% 
  ungroup()

nh_vocab_limits

Restrict pool to just children with scores in their range.

# N before
nrow(nh_set)

nh_filtered <- nh_set %>% 
  left_join(nh_vocab_limits) %>% 
  rowwise() %>% 
  mutate(InRange = between(EVT_Standard, EVT_10, EVT_90)) %>% 
  ungroup() %>% 
  filter(InRange)

# N after
nrow(nh_filtered)

Children who can be matches who were tested at Timepoint1 should not have received the first bad version of the eyetracking experiment with the timing glitch. Identify children to exclude from matching because they had a nonstandard version of an eyetracking experiment.

children_with_bad_blocks <- tbl(l2t_eyetracking, "q_BlocksByStudy") %>% 
  filter(Version != "Standard") %>% 
  distinct(Study, ResearchID) %>% 
  collect()
children_with_bad_blocks

Also take into account missing data for each eyetracking task.

missing_data_stats_raw <- tbl(l2t_eyetracking, "q_MissingDataByBlock") %>% 
  collect()

missing_data_stats <- missing_data_stats_raw %>% 
  filter(Version == "Standard") %>% 
  group_by(Study, ResearchID, Task) %>% 
  summarise(MinMissing = min(ProportionMissing),
            NUseableBlocks = sum(ProportionMissing < .5)) %>% 
  ungroup()
missing_data_stats

Accounting for multiple visits

Get the participation of children across multiple studies.

childstudy <- tbl(l2t_backend, "ChildStudy") %>% 
  left_join(tbl(l2t_backend, "Study")) %>% 
  select(ChildID, ChildStudyID, ResearchID = ShortResearchID, Study) %>% 
  collect()

childstudy

Get study participation for children in the NH matching pool.

nh_childstudy <- childstudy %>% 
  semi_join(select(nh_filtered, ChildID))
nh_childstudy

Remove the child-study pairs for children with the bad eyetracking.

no_useable_blocks <- missing_data_stats %>% 
  group_by(Study, ResearchID) %>% 
  filter(sum(NUseableBlocks) == 0) %>% 
  ungroup()
no_useable_blocks

nh_childstudy <- nh_childstudy %>% 
  anti_join(children_with_bad_blocks) %>% 
  anti_join(no_useable_blocks)
nh_childstudy

Get the information for matching.

d_tp1 <- tbl(l2t_main, "Scores_TimePoint1") %>% collect()
d_tp2 <- tbl(l2t_main, "Scores_TimePoint2") %>% collect()
d_tp3 <- tbl(l2t_main, "Scores_TimePoint3") %>% collect()
d_matching <- tbl(l2t_main, "Scores_CochlearMatching") %>% collect()
d_ci1 <- tbl(l2t_main, "Scores_CochlearV1") %>% collect()
d_ci2 <- tbl(l2t_main, "Scores_CochlearV2") %>% collect()

# Get age, Sex, Medu for everyone
matching_vars <- bind_rows(d_tp1, d_tp2, d_tp3, d_ci1, d_ci2, d_matching) %>% 
  mutate(Age = EVT_Age,
         Age = ifelse(is.na(Age), PPVT_Age, Age), 
         Age = ifelse(is.na(Age), FruitStroop_Age, Age)) %>% 
  select(ChildID, ChildStudyID, Study, Age, 
         Female, CImplant, AAE, LateTalker, ResearchID, 
         Maternal_Education, Maternal_Education_Level) %>% 
  left_join(medu_scheme, by = "Maternal_Education_Level")

# Number the study visits
nh_matching <- matching_vars %>% 
  semi_join(nh_childstudy) %>% 
  group_by(ChildID) %>% 
  arrange(Age) %>% 
  mutate(NumVisits = length(ChildStudyID),
         VisitNum = seq_along(Age)) %>% 
  ungroup() %>% 
  arrange(ChildID, VisitNum) %>% 
  select(-ChildStudyID) %>% 
  select(ChildID, ResearchID, Study, Age, VisitNum, NumVisits, everything())

# Also include information about eyetracking data
useable_blocks_per_childstudy <- missing_data_stats %>% 
  tidyr::gather(Variable, Value, MinMissing:NUseableBlocks) %>% 
  filter(Variable == "NUseableBlocks") %>% 
  tidyr::complete(tidyr::nesting(Study, ResearchID), Task, Variable, 
                  fill = list(Value = 0)) %>% 
  tidyr::unite(Variable, Task, Variable) %>% 
  tidyr::spread(Variable, Value)

nh_matching <- nh_matching %>% 
  left_join(useable_blocks_per_childstudy)
nh_matching

ci_matching <- matching_vars %>% 
  # Exclude 079L's Timepoint1 visit because they were moved from the
  # longitudinal study tract to the CochlearV1/CochlearV2 tract.
  filter(CImplant == 1, ResearchID != "079L") %>% 
  group_by(ChildID) %>% 
  arrange(Age) %>% 
  mutate(NumVisits = length(ChildStudyID),
         VisitNum = seq_along(Age)) %>% 
  ungroup() %>% 
  arrange(ChildID, VisitNum) %>% 
  select(-ChildStudyID) %>% 
  select(ChildID, ResearchID, Study, Age, VisitNum, NumVisits, everything()) %>% 
  left_join(useable_blocks_per_childstudy)
ci_matching

readr::write_csv(nh_matching, "nh_matching_info.csv")
readr::write_csv(ci_matching, "ci_matching_info.csv")

LearningToTalk/L2TDatabase documentation built on June 24, 2020, 3:45 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

LearningToTalk/L2TDatabase
Helper Functions for the Learning-To-Talk 'MySQL' Database

In LearningToTalk/L2TDatabase: Helper Functions for the Learning-To-Talk 'MySQL' Database

Find children who need matches

Accounting for multiple visits

R Package Documentation

Browse R Packages

We want your feedback!

LearningToTalk/L2TDatabase Helper Functions for the Learning-To-Talk 'MySQL' Database

In LearningToTalk/L2TDatabase: Helper Functions for the Learning-To-Talk 'MySQL' Database

Find children who need matches

Accounting for multiple visits

R Package Documentation

Browse R Packages

We want your feedback!

LearningToTalk/L2TDatabase
Helper Functions for the Learning-To-Talk 'MySQL' Database