library("knitr")
opts_chunk$set(
  warning = FALSE,
  collapse = TRUE, 
  comment = "#>", 
  message = FALSE,
  fig.width = 8,
  # fig.height = 6,
  fig.asp = 0.618,
  dpi = 300,
  out.width = "80%")

wd <- rprojroot::find_rstudio_root_file()
opts_knit$set(root.dir = wd)

Problem statement

ME would like to look at the data for the RWL task for participants in the Dialect Switch study. I will

Find the eyetracking blocks

Connect to database:

library(dplyr)
library(L2TDatabase)

# Work relative to RStudio project
wd <- rprojroot::find_rstudio_root_file()
dir_here <- file.path(wd, "inst", "analyses", "ci_matching")
cnf_file <- file.path(wd, "inst", "l2t_db.cnf")

# Connect to the individual databases
l2t_main <- l2t_connect(cnf_file, "l2t")
l2t_backend <- l2t_connect(cnf_file, "backend")
l2t_eyetracking <- l2t_connect(cnf_file, "eyetracking")

Determine which blocks of RWL used which stimuli

JE says that we should only use the experiments that used the re-recorded stimuli for TimePoint2. We modified the stimuli at TimePoint2 so that the two dialect versions had similar durations.

These are the tables in the eyetracking database.

src_tbls(l2t_eyetracking)

Tables with q_ at the front are queries which contain helpful information (like Study and ResearchID) alongside the main data in the table. We use the q_TrialAttributesByStudy table to get information about each eyetracking trial. I use q_ for queries that developed in these scripts as well.

# Dialects used in each block
q_block_dialects <- l2t_eyetracking %>% 
  tbl("q_BlockAttributesByStudy") %>% 
  filter(Task == "RWL", 
    BlockAttribute_Name == c("Dialect"), 
    Study %in% c("DialectSwitch", "MaternalEd")) %>% 
  select(BlockID, Dialect = BlockAttribute_Value)

q_block_ages <- l2t_eyetracking %>% 
  tbl("Blocks") %>% 
  select(BlockID, Block_Age)

# Find the blocks that used TP2 stimuli.
q_blocks_to_use <- l2t_eyetracking %>% 
  tbl("q_BlockAttributesByStudy") %>% 
  filter(
    Task == "RWL", 
    BlockAttribute_Name == c("StimulusSet"), 
    BlockAttribute_Value == c("TP2"),
    Study %in% c("DialectSwitch", "MaternalEd")) %>% 
  select(-BlockAttribute_Name, StimulusSet = BlockAttribute_Value) %>% 
  left_join(q_block_dialects) %>% 
  left_join(q_block_ages)

Count the number of blocks in the dialect-varying studies.

q_blocks_to_use %>% 
  count(Study, Dialect, StimulusSet) %>% 
  rename(nBlocks = n) %>% 
  ungroup() %>% 
  collect()

Download block and child demographic information

Download child-level information.

df_child_info <- tbl(l2t_backend, "Child") %>% 
  left_join(tbl(l2t_backend, "ChildStudy")) %>% 
  left_join(tbl(l2t_backend, "Study")) %>% 
  filter(Study %in% c("DialectSwitch", "MaternalEd")) %>% 
  rename(ResearchID = ShortResearchID) %>% 
  collect() %>% 
  left_join(collect(tbl(l2t_main, "Maternal_Education"))) %>% 
  select(Study, ResearchID, ChildID, ChildStudyID, 
         Female, AAE, LateTalker, CImplant, 
         Maternal_Education, Maternal_Education_Level) 

# Add some helper columns
df_medu_scheme <- data_frame(
  Maternal_Education_Level = c(NA, 1:7),
  Maternal_Education_Group = c(NA, "Low", "Low", "Low", "Mid", 
                               "Mid", "High", "High")
)

df_child_info <- df_child_info %>% 
  left_join(df_medu_scheme) %>%
  mutate(
    Dialect = ifelse(AAE, "AAE", "MAE"),
    Gender = ifelse(Female, "Girl", "Boy")) 

df_child_info$Maternal_Education_Group <- factor(
  df_child_info$Maternal_Education_Group, 
  levels = c("Low", "Mid", "High")
)

Attach research IDs and study names to the non-TP1 blocks.

df_rwl_blocks <- q_blocks_to_use %>% 
  rename(BlockDialect = Dialect) %>% 
  collect() %>% 
  left_join(df_child_info)

Now we count the number of dialects presented to each child to find out who received more than one version of the experiment.

df_children_who_got_multiple_dialects <- df_rwl_blocks %>% 
  select(ChildID, AAE, BlockDialect) %>% 
  distinct() %>% 
  # Count the dialects presented to each child. Find multiple dialect children
  count(AAE, ChildID) %>% 
  filter(n > 1) %>% 
  ungroup() %>% 
  select(ChildID)

df_blocks_to_keep <- df_rwl_blocks %>% 
  inner_join(df_children_who_got_multiple_dialects)

One child apparently got three blocks. Figure out which block to manually exclude.

q_blocks_to_use %>% 
  group_by(ResearchID) %>% 
  tally() %>% 
  collect() %>% 
  filter(n > 2)

q_blocks_to_use %>% 
  collect() %>% 
  filter(ResearchID == "436D")

# Skip the latest redundant plock
df_blocks_to_keep <- df_blocks_to_keep %>% 
  filter(DateTime != "2014-04-01 09:52:14")

# Update the server-side query to use the restricted subset of blocks
q_blocks_to_use <- q_blocks_to_use %>% 
  filter(BlockID %in% df_blocks_to_keep$BlockID)

Download the gaze data

Download the eyetracking data

# Keep just the trials in the blocks we want
q_rwl_trials <- tbl(l2t_eyetracking, "Trials") %>% 
  semi_join(q_blocks_to_use)

# Download the gazes in those trials
df_raw_rwl_looks <- tbl(l2t_eyetracking, "Looks") %>% 
  inner_join(q_rwl_trials) %>% 
  select(TrialID, Time, GazeByImageAOI) %>% 
  collect(n = Inf) 

df_rwl_looks <- df_raw_rwl_looks %>% 
  inner_join(collect(q_rwl_trials)) %>% 
  inner_join(df_blocks_to_keep) %>% 
  rename(TrialNo = Trial_TrialNo)



# rwl_looks <- rwl_looks %>% 
#   left_join(blocks_to_keep) %>% 
#   left_join(child_info) %>% 
#   select(Study, ResearchID, Dialect, BlockDialect, Block_Age,
#          BlockID, ChildID:Maternal_Education_Level, 
#          Maternal_Education_Group:PPVT_Standard,
#          TrialID, TrialNo, Time, GazeByImageAOI)
# 
# # rwl_looks %>% filter(is.na(Dialect))
# 
# rwl_looks <- rwl_looks %>% 

Data screening

Excessive missing data is defined as having more than 50% missing data between 0 and 2000 ms (relative to target onset).

Filter out participants who have an entire block with excessive missing data averaged across trials.

library(lookr)
library(ggplot2)

df_participants_to_drop <- df_rwl_looks %>% 
  filter(between(Time, -20, 2020)) %>% 
  AggregateLooks(Dialect + BlockID + ResearchID ~ GazeByImageAOI) %>%
  tibble::as_tibble() %>% 
  filter(PropNA > .5) %>% 
  select(NativeDialect = Dialect, ResearchID) %>% 
  print()

r nrow(df_participants_to_drop) participants are dropped for excessive missing data.

Filter out individual trials with excessive missing data.

df_trials_to_drop  <- df_rwl_looks %>% 
  anti_join(df_participants_to_drop) %>% 
  filter(between(Time, -20, 2020)) %>% 
  AggregateLooks(Dialect + TrialID + ResearchID ~ GazeByImageAOI) %>% 
  tibble::as_tibble() %>% 
  filter(PropNA > .5) %>% 
  select(NativeDialect = Dialect, ResearchID, TrialID)

df_trials_to_drop %>% 
  count(NativeDialect)

r nrow(df_trials_to_drop) additional trials are dropped.

Data quality stats for remaining children.

df_rwl_looks <- df_rwl_looks %>% 
  anti_join(df_participants_to_drop) %>% 
  anti_join(df_trials_to_drop) %>% 
  mutate(BlockDialect = ifelse(BlockDialect == "AAE", "AAE", "MAE"),
         HearsNativeDialect = Dialect == BlockDialect)

df_rwl_looks %>%
  filter(between(Time, -20, 2020)) %>% 
  AggregateLooks(Dialect + BlockDialect + ResearchID + TrialID ~ GazeByImageAOI) %>% 
  tibble::as_tibble() %>% 
  group_by(Dialect, BlockDialect, ResearchID) %>% 
  summarise(
    nGoodTrials = n(),
    Mean_Prop_NA = mean(PropNA)) %>% 
  summarise(
    nChildren = n(), 
    `Total Useable Trials` = sum(nGoodTrials),
    `Mean N of Useable Trials` = mean(nGoodTrials) %>% round(1), 
    `SD Trials` = sd(nGoodTrials) %>% round(1),
    `Min Trials` = min(nGoodTrials),
    `Max Trials` = max(nGoodTrials),
    `Mean Prop of Missing Data` = mean(Mean_Prop_NA) %>% round(3), 
    `SD Prop Missing` = sd(Mean_Prop_NA) %>% round(3),
    `Min Prop Missing` = min(Mean_Prop_NA) %>% round(3),
    `Max Prop Missing` = max(Mean_Prop_NA) %>% round(3)) %>% 
  knitr::kable()

df_participants <- df_rwl_looks %>% 
  distinct(Study, ResearchID, ChildStudyID)

df_child_info <- df_child_info %>% inner_join(df_participants)

Summarize child-level information

Now that we know which blocks to include, we can get the vocabulary scores for the children who contributed those blocks.

df_evt <- tbl(l2t_main, "EVT") %>% 
  select(Study, ResearchID, EVT_Age:EVT_Standard) %>% 
  collect()

df_ppvt <- tbl(l2t_main, "PPVT") %>% 
  select(Study, ResearchID, PPVT_Age:PPVT_Standard) %>% 
  collect()

df_child_vars <- df_child_info %>% 
  select(Study, ResearchID, ChildStudyID, Female:Gender) %>% 
  distinct() %>% 
  left_join(df_evt) %>% 
  left_join(df_ppvt)

Boys/girls by native dialect.

df_child_vars %>% 
  count(Dialect, Gender) %>% 
  ungroup() %>% 
  rename(`N Children` = n) %>% 
  knitr::kable()

Maternal education by native dialect.

df_child_vars %>% 
  count(Dialect, Maternal_Education_Group) %>% 
  ungroup() %>% 
  rename(`N Children` = n) %>%   
  knitr::kable() 

df_child_vars %>% 
  count(Dialect, Maternal_Education_Group) %>% 
  ungroup() %>% 
  tidyr::spread(Dialect, n) %>% 
  knitr::kable()

df_child_vars %>% 
  count(Dialect, Maternal_Education_Group, Maternal_Education) %>% 
  ungroup() %>% 
  tidyr::spread(Dialect, n) %>% 
  tidyr::replace_na(list(AAE = 0, MAE = 0)) %>% 
  knitr::kable()

Child level measures by dialect group.

narm_mean <- function(...) mean(..., na.rm = TRUE)
narm_sd <- function(...) sd(..., na.rm = TRUE)
narm_n <- function(...) sum(!is.na(...))

df_child_vars %>% 
  group_by(Dialect) %>% 
  summarise(
    `N Children` = n(), 
    sum(Female),
    `Mean Age (months)` = narm_mean(EVT_Age),
    `SD Age (months)` = narm_sd(EVT_Age),
    `N EVT` = narm_n(EVT_Raw), 
    `Mean EVT Standard` = narm_mean(EVT_Standard),
    `SD EVT Standard` = narm_sd(EVT_Standard),
    `N PPVT` = narm_n(PPVT_Raw), 
    `Mean PPVT Standard` = narm_mean(PPVT_Standard),
    `SD PPVT Standard` = narm_sd(PPVT_Standard)) %>% 
  knitr::kable(digits = 1)

df_child_vars %>% 
  group_by(Dialect, Maternal_Education_Group) %>% 
  summarise(
    `N Children` = n(), 
    `Mean Age (months)` = narm_mean(EVT_Age),
    `SD Age (months)` = narm_sd(EVT_Age),
    `N EVT` = narm_n(EVT_Raw), 
    `Mean EVT Standard` = narm_mean(EVT_Standard),
    `SD EVT Standard` = narm_sd(EVT_Standard),
    `N PPVT` = narm_n(PPVT_Raw), 
    `Mean PPVT Standard` = narm_mean(PPVT_Standard),
    `SD PPVT Standard` = narm_sd(PPVT_Standard)) %>% 
  knitr::kable(digits = 1)

Finally, plots of fixation patterns over time

Assign the individual frames into 50-ms bins.

df_bin_times <- df_rwl_looks %>%  
  filter(between(Time, -520, 2020)) %>% 
  distinct(Time) %>% 
  mutate(Bin = lookr::AssignBins(Time, bin_width = 3)) %>% 
  group_by(Bin) %>% 
  mutate(BinTime = Time %>% median() %>% round(-1)) %>% 
  ungroup()
df_bin_times

Aggregated looking data over Time across trials within Dialect x BlockDialect within Child.

df_looks <- df_rwl_looks %>%
  inner_join(df_bin_times) %>% 
  AggregateLooks(Maternal_Education_Group + Study + Dialect + BlockDialect + 
                   HearsNativeDialect + ResearchID + BinTime ~ GazeByImageAOI) %>% 
  tibble::as_tibble() %>% 
  rename(Time = BinTime) %>% 
  mutate(Looks_Images = Target + Others,
       Prop_Target = Target / Looks_Images,
       Prop_PhonologicalFoil = PhonologicalFoil / Looks_Images,
       Prop_SemanticFoil = SemanticFoil / Looks_Images,
       Prop_Unrelated = Unrelated / Looks_Images)

df_evt <- df_child_vars %>% 
  mutate(Vocab3tile = ntile(EVT_Standard, 3), 
         `Exp. vocab.` = factor(Vocab3tile, 1:3, 
                                c("Lower third", "Middle third", "Upper third"))) %>% 
  select(Study, ResearchID, Vocab3tile, `Exp. vocab.`)

df_looks <- df_looks %>% 
  mutate(`Child hears` = ifelse(HearsNativeDialect, "Native dialect", 
                                "Non-native dialect"),
         `Native dialect` = Dialect,
         `Maternal edu.` = Maternal_Education_Group) %>% 
  left_join(df_evt)
library(hrbrthemes)
plot_text <- list(
  x_time = "Time (ms) after target noun onset", 
  y_target = "Proportion of looks to named image",
  y_image = "Proportion of looks to image",
  caption_mean_se = "Mean ± SE"
)

legend_position <- theme(
  legend.position = "bottom", 
  legend.text = element_text(size = 10), 
  legend.justification = "left")

colors <- viridis::scale_color_viridis(end = .7, discrete = TRUE)

ggplot(df_looks) + 
  aes(x = Time, y = Proportion) +   
  geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + 
  geom_vline(xintercept = 0, size = 1.25, color = "#cccccc") + 
  stat_summary() + 
  theme_ipsum_rc(axis_title_size = 11) + 
  labs(x = plot_text$x_time, 
       y = plot_text$y_target, 
       caption = plot_text$caption_mean_se)

ggplot(df_looks) + 
  aes(x = Time, y = Proportion, color = `Child hears`) + 
  geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + 
  geom_vline(xintercept = 0, size = 1.25, color = "#cccccc") + 
  stat_summary() + 
  theme_ipsum_rc(axis_title_size = 11) + 
  legend_position + 
  labs(x = plot_text$x_time, 
       y = plot_text$y_target) + 
  colors

ggplot(df_looks) + 
  aes(x = Time, y = Proportion, color = `Child hears`) + 
  geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + 
  stat_summary() + 
  theme_ipsum_rc(axis_title_size = 11) + 
  legend_position + 
  labs(x = plot_text$x_time, 
       y = plot_text$y_target) +
  facet_wrap("`Native dialect`", labeller = label_both) + 
  xlim(0, 2000) + 
  colors

ggplot(df_looks %>% filter(!is.na(`Exp. vocab.`))) + 
  aes(x = Time, y = Proportion, color = `Child hears`) + 
  geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + 
  stat_summary() + 
  theme_ipsum_rc(axis_title_size = 11) + 
  legend_position + 
  colors +
  labs(x = plot_text$x_time, 
       y = plot_text$y_target) +
  facet_wrap("`Exp. vocab.`", labeller = label_both) + 
  xlim(0, 2000)

ggplot(df_looks %>% filter(!is.na(Maternal_Education_Group))) + 
  aes(x = Time, y = Proportion, color = `Child hears`) + 
  geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + 
  stat_summary() + 
  theme_ipsum_rc(axis_title_size = 11) + 
  theme(legend.position = "bottom", legend.text = element_text(size = 10)) + 
  labs(x = plot_text$x_time, 
       y = plot_text$y_target) +
  facet_wrap("`Maternal edu.`", labeller = label_both) + 
  xlim(0, 2000)

Plot looks to each competitor type.

df_looks_to_aois <- df_looks %>% 
  select(Study, ResearchID, `Child hears`:`Maternal edu.`, `Exp. vocab.`,
         Time, starts_with("Prop_")) %>% 
  tidyr::gather(AOI, Proportion, starts_with("Prop_")) %>% 
  mutate(AOI = AOI %>% 
           stringr::str_replace("Prop_", "") %>% 
           stringr::str_replace("Foil", "")) 

df_looks_to_aois$AOI <- factor(
  df_looks_to_aois$AOI, 
  levels = c("Target", "Phonological", "Semantic", "Unrelated"))

df_looks_to_aois$Image <- factor(
  df_looks_to_aois$AOI, 
  labels = c("Target word", "Phonological foil", "Semantic foil", "Unrelated word"),
  levels = c("Target", "Phonological", "Semantic", "Unrelated"))


ggplot(df_looks_to_aois) + 
  aes(x = Time, y = Proportion, color = Image) + 
  geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + 
  stat_summary() + 
  theme_ipsum_rc(axis_title_size = 11) + 
  theme(legend.position = "bottom", legend.text = element_text(size = 10)) + 
  labs(x = plot_text$x_time, 
       y = plot_text$y_image) + 
  xlim(0, 2000)

ggplot(df_looks_to_aois) + 
  aes(x = Time, y = Proportion, color = Image) + 
  geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + 
  stat_summary() + 
  facet_wrap("`Child hears`", labeller = label_both) + 
  theme_ipsum_rc(axis_title_size = 11) + 
  theme(legend.position = "bottom", legend.text = element_text(size = 10)) + 
  labs(x = plot_text$x_time, 
       y = plot_text$y_image) + 
  xlim(0, 2000)

By AOI

ggplot(df_looks_to_aois %>% filter(Image == "Phonological foil")) + 
  aes(x = Time, y = Proportion, color = `Child hears`) + 
  geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + 
  stat_summary() + 
  facet_wrap("Image") + 
  theme_ipsum_rc(axis_title_size = 11) + 
  theme(legend.position = "bottom", legend.text = element_text(size = 10)) + 
  labs(x = plot_text$x_time, 
       y = plot_text$y_image) + 
  xlim(0, 2000)

# Use %+% to replace the data-set used in a plot
last_plot() %+% 
  filter(df_looks_to_aois, Image == "Semantic foil")

last_plot() %+% 
  filter(df_looks_to_aois, Image == "Unrelated word")

last_plot() %+% 
  filter(df_looks_to_aois, Image == "Target word")

AOI by maternal edu

ggplot(df_looks_to_aois %>% 
         filter(Image == "Phonological foil", !is.na(`Maternal edu.`))) + 
  aes(x = Time, y = Proportion, color = `Maternal edu.`) + 
  geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + 
  stat_summary() + 
  facet_wrap("Image") + 
  theme_ipsum_rc(axis_title_size = 11) + 
  viridis::scale_color_viridis(discrete = TRUE, option = "inferno", end = .9) +
  theme(legend.position = "bottom", legend.text = element_text(size = 10)) + 
  labs(x = plot_text$x_time, 
       y = plot_text$y_image) + 
  xlim(0, 2000)

last_plot() %+% 
  filter(df_looks_to_aois, 
         Image == "Semantic foil", !is.na(`Maternal edu.`))

last_plot() %+% 
  filter(df_looks_to_aois, 
         Image == "Unrelated word", !is.na(`Maternal edu.`))

last_plot() %+% 
  filter(df_looks_to_aois, 
         Image == "Target word", !is.na(`Maternal edu.`))


ggplot(df_looks_to_aois %>% filter(!is.na(`Maternal edu.`))) + 
  aes(x = Time, y = Proportion, color = `Maternal edu.`) + 
  geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + 
  stat_summary(fun.y = "mean", geom = "line", size = 1.5) + 
  viridis::scale_color_viridis(discrete = TRUE, option = "inferno", end = .9) +
  facet_wrap("Image") + 
  theme_ipsum_rc(axis_title_size = 11) + 
  theme(legend.position = "bottom", legend.text = element_text(size = 10)) + 
  labs(x = plot_text$x_time, 
       y = plot_text$y_image) + 
  xlim(0, 2000)


ggplot(df_looks_to_aois) + 
  aes(x = Time, y = Proportion, color = `Native dialect`) + 
  geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + 
  stat_summary(fun.y = "mean", geom = "line", size = 1.5) + 
  viridis::scale_color_viridis(discrete = TRUE, option = "viridis", end = .9) +
  facet_wrap("Image") + 
  theme_ipsum_rc(axis_title_size = 11) + 
  theme(legend.position = "bottom", legend.text = element_text(size = 10)) + 
  labs(x = plot_text$x_time, 
       y = plot_text$y_image) + 
  xlim(0, 2000)

ggplot(df_looks_to_aois) + 
  aes(x = Time, y = Proportion, color = `Child hears`) + 
  geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + 
  stat_summary() + 
  facet_wrap("AOI", labeller = label_both) + 
  theme_ipsum_rc(axis_title_size = 11) + 
  theme(legend.position = "bottom", legend.text = element_text(size = 10)) + 
  labs(x = plot_text$x_time, 
       y = plot_text$y_target) + 
  xlim(0, 2000)

By vocabulary.

ggplot(df_looks_to_aois %>% 
         filter(Image == "Phonological foil", !is.na(`Exp. vocab.`))) + 
  aes(x = Time, y = Proportion, color = `Exp. vocab.`) + 
  geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + 
  stat_summary() + 
  facet_wrap("Image") + 
  theme_ipsum_rc(axis_title_size = 11) + 
  viridis::scale_color_viridis(discrete = TRUE, option = "plasma", end = .9) +
  theme(legend.position = "bottom", legend.text = element_text(size = 10)) + 
  labs(x = plot_text$x_time, 
       y = plot_text$y_image) + 
  xlim(0, 2000)

last_plot() %+% 
  filter(df_looks_to_aois, 
         Image == "Semantic foil", !is.na(`Exp. vocab.`))

last_plot() %+% 
  filter(df_looks_to_aois, 
         Image == "Unrelated word", !is.na(`Exp. vocab.`))

last_plot() %+% 
  filter(df_looks_to_aois, 
         Image == "Target word", !is.na(`Exp. vocab.`)) 


LearningToTalk/L2TDatabase documentation built on June 24, 2020, 3:45 a.m.