library("knitr") opts_chunk$set( warning = FALSE, collapse = TRUE, comment = "#>", message = FALSE, fig.width = 8, # fig.height = 6, fig.asp = 0.618, dpi = 300, out.width = "80%") wd <- rprojroot::find_rstudio_root_file() opts_knit$set(root.dir = wd)
ME would like to look at the data for the RWL task for participants in the Dialect Switch study. I will
Connect to database:
library(dplyr) library(L2TDatabase) # Work relative to RStudio project wd <- rprojroot::find_rstudio_root_file() dir_here <- file.path(wd, "inst", "analyses", "ci_matching") cnf_file <- file.path(wd, "inst", "l2t_db.cnf") # Connect to the individual databases l2t_main <- l2t_connect(cnf_file, "l2t") l2t_backend <- l2t_connect(cnf_file, "backend") l2t_eyetracking <- l2t_connect(cnf_file, "eyetracking")
JE says that we should only use the experiments that used the re-recorded stimuli for TimePoint2. We modified the stimuli at TimePoint2 so that the two dialect versions had similar durations.
These are the tables in the eyetracking database.
src_tbls(l2t_eyetracking)
Tables with q_
at the front are queries which contain helpful information
(like Study and ResearchID) alongside the main data in the table. We use the
q_TrialAttributesByStudy
table to get information about each eyetracking
trial. I use q_
for queries that developed in these scripts as well.
# Dialects used in each block q_block_dialects <- l2t_eyetracking %>% tbl("q_BlockAttributesByStudy") %>% filter(Task == "RWL", BlockAttribute_Name == c("Dialect"), Study %in% c("DialectSwitch", "MaternalEd")) %>% select(BlockID, Dialect = BlockAttribute_Value) q_block_ages <- l2t_eyetracking %>% tbl("Blocks") %>% select(BlockID, Block_Age) # Find the blocks that used TP2 stimuli. q_blocks_to_use <- l2t_eyetracking %>% tbl("q_BlockAttributesByStudy") %>% filter( Task == "RWL", BlockAttribute_Name == c("StimulusSet"), BlockAttribute_Value == c("TP2"), Study %in% c("DialectSwitch", "MaternalEd")) %>% select(-BlockAttribute_Name, StimulusSet = BlockAttribute_Value) %>% left_join(q_block_dialects) %>% left_join(q_block_ages)
Count the number of blocks in the dialect-varying studies.
q_blocks_to_use %>% count(Study, Dialect, StimulusSet) %>% rename(nBlocks = n) %>% ungroup() %>% collect()
Download child-level information.
df_child_info <- tbl(l2t_backend, "Child") %>% left_join(tbl(l2t_backend, "ChildStudy")) %>% left_join(tbl(l2t_backend, "Study")) %>% filter(Study %in% c("DialectSwitch", "MaternalEd")) %>% rename(ResearchID = ShortResearchID) %>% collect() %>% left_join(collect(tbl(l2t_main, "Maternal_Education"))) %>% select(Study, ResearchID, ChildID, ChildStudyID, Female, AAE, LateTalker, CImplant, Maternal_Education, Maternal_Education_Level) # Add some helper columns df_medu_scheme <- data_frame( Maternal_Education_Level = c(NA, 1:7), Maternal_Education_Group = c(NA, "Low", "Low", "Low", "Mid", "Mid", "High", "High") ) df_child_info <- df_child_info %>% left_join(df_medu_scheme) %>% mutate( Dialect = ifelse(AAE, "AAE", "MAE"), Gender = ifelse(Female, "Girl", "Boy")) df_child_info$Maternal_Education_Group <- factor( df_child_info$Maternal_Education_Group, levels = c("Low", "Mid", "High") )
Attach research IDs and study names to the non-TP1 blocks.
df_rwl_blocks <- q_blocks_to_use %>% rename(BlockDialect = Dialect) %>% collect() %>% left_join(df_child_info)
Now we count the number of dialects presented to each child to find out who received more than one version of the experiment.
df_children_who_got_multiple_dialects <- df_rwl_blocks %>% select(ChildID, AAE, BlockDialect) %>% distinct() %>% # Count the dialects presented to each child. Find multiple dialect children count(AAE, ChildID) %>% filter(n > 1) %>% ungroup() %>% select(ChildID) df_blocks_to_keep <- df_rwl_blocks %>% inner_join(df_children_who_got_multiple_dialects)
One child apparently got three blocks. Figure out which block to manually exclude.
q_blocks_to_use %>% group_by(ResearchID) %>% tally() %>% collect() %>% filter(n > 2) q_blocks_to_use %>% collect() %>% filter(ResearchID == "436D") # Skip the latest redundant plock df_blocks_to_keep <- df_blocks_to_keep %>% filter(DateTime != "2014-04-01 09:52:14") # Update the server-side query to use the restricted subset of blocks q_blocks_to_use <- q_blocks_to_use %>% filter(BlockID %in% df_blocks_to_keep$BlockID)
Download the eyetracking data
# Keep just the trials in the blocks we want q_rwl_trials <- tbl(l2t_eyetracking, "Trials") %>% semi_join(q_blocks_to_use) # Download the gazes in those trials df_raw_rwl_looks <- tbl(l2t_eyetracking, "Looks") %>% inner_join(q_rwl_trials) %>% select(TrialID, Time, GazeByImageAOI) %>% collect(n = Inf) df_rwl_looks <- df_raw_rwl_looks %>% inner_join(collect(q_rwl_trials)) %>% inner_join(df_blocks_to_keep) %>% rename(TrialNo = Trial_TrialNo) # rwl_looks <- rwl_looks %>% # left_join(blocks_to_keep) %>% # left_join(child_info) %>% # select(Study, ResearchID, Dialect, BlockDialect, Block_Age, # BlockID, ChildID:Maternal_Education_Level, # Maternal_Education_Group:PPVT_Standard, # TrialID, TrialNo, Time, GazeByImageAOI) # # # rwl_looks %>% filter(is.na(Dialect)) # # rwl_looks <- rwl_looks %>%
Excessive missing data is defined as having more than 50% missing data between 0 and 2000 ms (relative to target onset).
Filter out participants who have an entire block with excessive missing data averaged across trials.
library(lookr) library(ggplot2) df_participants_to_drop <- df_rwl_looks %>% filter(between(Time, -20, 2020)) %>% AggregateLooks(Dialect + BlockID + ResearchID ~ GazeByImageAOI) %>% tibble::as_tibble() %>% filter(PropNA > .5) %>% select(NativeDialect = Dialect, ResearchID) %>% print()
r nrow(df_participants_to_drop)
participants are dropped for excessive
missing data.
Filter out individual trials with excessive missing data.
df_trials_to_drop <- df_rwl_looks %>% anti_join(df_participants_to_drop) %>% filter(between(Time, -20, 2020)) %>% AggregateLooks(Dialect + TrialID + ResearchID ~ GazeByImageAOI) %>% tibble::as_tibble() %>% filter(PropNA > .5) %>% select(NativeDialect = Dialect, ResearchID, TrialID) df_trials_to_drop %>% count(NativeDialect)
r nrow(df_trials_to_drop)
additional trials are dropped.
Data quality stats for remaining children.
df_rwl_looks <- df_rwl_looks %>% anti_join(df_participants_to_drop) %>% anti_join(df_trials_to_drop) %>% mutate(BlockDialect = ifelse(BlockDialect == "AAE", "AAE", "MAE"), HearsNativeDialect = Dialect == BlockDialect) df_rwl_looks %>% filter(between(Time, -20, 2020)) %>% AggregateLooks(Dialect + BlockDialect + ResearchID + TrialID ~ GazeByImageAOI) %>% tibble::as_tibble() %>% group_by(Dialect, BlockDialect, ResearchID) %>% summarise( nGoodTrials = n(), Mean_Prop_NA = mean(PropNA)) %>% summarise( nChildren = n(), `Total Useable Trials` = sum(nGoodTrials), `Mean N of Useable Trials` = mean(nGoodTrials) %>% round(1), `SD Trials` = sd(nGoodTrials) %>% round(1), `Min Trials` = min(nGoodTrials), `Max Trials` = max(nGoodTrials), `Mean Prop of Missing Data` = mean(Mean_Prop_NA) %>% round(3), `SD Prop Missing` = sd(Mean_Prop_NA) %>% round(3), `Min Prop Missing` = min(Mean_Prop_NA) %>% round(3), `Max Prop Missing` = max(Mean_Prop_NA) %>% round(3)) %>% knitr::kable() df_participants <- df_rwl_looks %>% distinct(Study, ResearchID, ChildStudyID) df_child_info <- df_child_info %>% inner_join(df_participants)
Now that we know which blocks to include, we can get the vocabulary scores for the children who contributed those blocks.
df_evt <- tbl(l2t_main, "EVT") %>% select(Study, ResearchID, EVT_Age:EVT_Standard) %>% collect() df_ppvt <- tbl(l2t_main, "PPVT") %>% select(Study, ResearchID, PPVT_Age:PPVT_Standard) %>% collect() df_child_vars <- df_child_info %>% select(Study, ResearchID, ChildStudyID, Female:Gender) %>% distinct() %>% left_join(df_evt) %>% left_join(df_ppvt)
Boys/girls by native dialect.
df_child_vars %>% count(Dialect, Gender) %>% ungroup() %>% rename(`N Children` = n) %>% knitr::kable()
Maternal education by native dialect.
df_child_vars %>% count(Dialect, Maternal_Education_Group) %>% ungroup() %>% rename(`N Children` = n) %>% knitr::kable() df_child_vars %>% count(Dialect, Maternal_Education_Group) %>% ungroup() %>% tidyr::spread(Dialect, n) %>% knitr::kable() df_child_vars %>% count(Dialect, Maternal_Education_Group, Maternal_Education) %>% ungroup() %>% tidyr::spread(Dialect, n) %>% tidyr::replace_na(list(AAE = 0, MAE = 0)) %>% knitr::kable()
Child level measures by dialect group.
narm_mean <- function(...) mean(..., na.rm = TRUE) narm_sd <- function(...) sd(..., na.rm = TRUE) narm_n <- function(...) sum(!is.na(...)) df_child_vars %>% group_by(Dialect) %>% summarise( `N Children` = n(), sum(Female), `Mean Age (months)` = narm_mean(EVT_Age), `SD Age (months)` = narm_sd(EVT_Age), `N EVT` = narm_n(EVT_Raw), `Mean EVT Standard` = narm_mean(EVT_Standard), `SD EVT Standard` = narm_sd(EVT_Standard), `N PPVT` = narm_n(PPVT_Raw), `Mean PPVT Standard` = narm_mean(PPVT_Standard), `SD PPVT Standard` = narm_sd(PPVT_Standard)) %>% knitr::kable(digits = 1) df_child_vars %>% group_by(Dialect, Maternal_Education_Group) %>% summarise( `N Children` = n(), `Mean Age (months)` = narm_mean(EVT_Age), `SD Age (months)` = narm_sd(EVT_Age), `N EVT` = narm_n(EVT_Raw), `Mean EVT Standard` = narm_mean(EVT_Standard), `SD EVT Standard` = narm_sd(EVT_Standard), `N PPVT` = narm_n(PPVT_Raw), `Mean PPVT Standard` = narm_mean(PPVT_Standard), `SD PPVT Standard` = narm_sd(PPVT_Standard)) %>% knitr::kable(digits = 1)
Assign the individual frames into 50-ms bins.
df_bin_times <- df_rwl_looks %>% filter(between(Time, -520, 2020)) %>% distinct(Time) %>% mutate(Bin = lookr::AssignBins(Time, bin_width = 3)) %>% group_by(Bin) %>% mutate(BinTime = Time %>% median() %>% round(-1)) %>% ungroup() df_bin_times
Aggregated looking data over Time across trials within Dialect x BlockDialect within Child.
df_looks <- df_rwl_looks %>% inner_join(df_bin_times) %>% AggregateLooks(Maternal_Education_Group + Study + Dialect + BlockDialect + HearsNativeDialect + ResearchID + BinTime ~ GazeByImageAOI) %>% tibble::as_tibble() %>% rename(Time = BinTime) %>% mutate(Looks_Images = Target + Others, Prop_Target = Target / Looks_Images, Prop_PhonologicalFoil = PhonologicalFoil / Looks_Images, Prop_SemanticFoil = SemanticFoil / Looks_Images, Prop_Unrelated = Unrelated / Looks_Images) df_evt <- df_child_vars %>% mutate(Vocab3tile = ntile(EVT_Standard, 3), `Exp. vocab.` = factor(Vocab3tile, 1:3, c("Lower third", "Middle third", "Upper third"))) %>% select(Study, ResearchID, Vocab3tile, `Exp. vocab.`) df_looks <- df_looks %>% mutate(`Child hears` = ifelse(HearsNativeDialect, "Native dialect", "Non-native dialect"), `Native dialect` = Dialect, `Maternal edu.` = Maternal_Education_Group) %>% left_join(df_evt)
library(hrbrthemes) plot_text <- list( x_time = "Time (ms) after target noun onset", y_target = "Proportion of looks to named image", y_image = "Proportion of looks to image", caption_mean_se = "Mean ± SE" ) legend_position <- theme( legend.position = "bottom", legend.text = element_text(size = 10), legend.justification = "left") colors <- viridis::scale_color_viridis(end = .7, discrete = TRUE) ggplot(df_looks) + aes(x = Time, y = Proportion) + geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + geom_vline(xintercept = 0, size = 1.25, color = "#cccccc") + stat_summary() + theme_ipsum_rc(axis_title_size = 11) + labs(x = plot_text$x_time, y = plot_text$y_target, caption = plot_text$caption_mean_se) ggplot(df_looks) + aes(x = Time, y = Proportion, color = `Child hears`) + geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + geom_vline(xintercept = 0, size = 1.25, color = "#cccccc") + stat_summary() + theme_ipsum_rc(axis_title_size = 11) + legend_position + labs(x = plot_text$x_time, y = plot_text$y_target) + colors ggplot(df_looks) + aes(x = Time, y = Proportion, color = `Child hears`) + geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + stat_summary() + theme_ipsum_rc(axis_title_size = 11) + legend_position + labs(x = plot_text$x_time, y = plot_text$y_target) + facet_wrap("`Native dialect`", labeller = label_both) + xlim(0, 2000) + colors ggplot(df_looks %>% filter(!is.na(`Exp. vocab.`))) + aes(x = Time, y = Proportion, color = `Child hears`) + geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + stat_summary() + theme_ipsum_rc(axis_title_size = 11) + legend_position + colors + labs(x = plot_text$x_time, y = plot_text$y_target) + facet_wrap("`Exp. vocab.`", labeller = label_both) + xlim(0, 2000) ggplot(df_looks %>% filter(!is.na(Maternal_Education_Group))) + aes(x = Time, y = Proportion, color = `Child hears`) + geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + stat_summary() + theme_ipsum_rc(axis_title_size = 11) + theme(legend.position = "bottom", legend.text = element_text(size = 10)) + labs(x = plot_text$x_time, y = plot_text$y_target) + facet_wrap("`Maternal edu.`", labeller = label_both) + xlim(0, 2000)
df_looks_to_aois <- df_looks %>% select(Study, ResearchID, `Child hears`:`Maternal edu.`, `Exp. vocab.`, Time, starts_with("Prop_")) %>% tidyr::gather(AOI, Proportion, starts_with("Prop_")) %>% mutate(AOI = AOI %>% stringr::str_replace("Prop_", "") %>% stringr::str_replace("Foil", "")) df_looks_to_aois$AOI <- factor( df_looks_to_aois$AOI, levels = c("Target", "Phonological", "Semantic", "Unrelated")) df_looks_to_aois$Image <- factor( df_looks_to_aois$AOI, labels = c("Target word", "Phonological foil", "Semantic foil", "Unrelated word"), levels = c("Target", "Phonological", "Semantic", "Unrelated")) ggplot(df_looks_to_aois) + aes(x = Time, y = Proportion, color = Image) + geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + stat_summary() + theme_ipsum_rc(axis_title_size = 11) + theme(legend.position = "bottom", legend.text = element_text(size = 10)) + labs(x = plot_text$x_time, y = plot_text$y_image) + xlim(0, 2000) ggplot(df_looks_to_aois) + aes(x = Time, y = Proportion, color = Image) + geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + stat_summary() + facet_wrap("`Child hears`", labeller = label_both) + theme_ipsum_rc(axis_title_size = 11) + theme(legend.position = "bottom", legend.text = element_text(size = 10)) + labs(x = plot_text$x_time, y = plot_text$y_image) + xlim(0, 2000)
By AOI
ggplot(df_looks_to_aois %>% filter(Image == "Phonological foil")) + aes(x = Time, y = Proportion, color = `Child hears`) + geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + stat_summary() + facet_wrap("Image") + theme_ipsum_rc(axis_title_size = 11) + theme(legend.position = "bottom", legend.text = element_text(size = 10)) + labs(x = plot_text$x_time, y = plot_text$y_image) + xlim(0, 2000) # Use %+% to replace the data-set used in a plot last_plot() %+% filter(df_looks_to_aois, Image == "Semantic foil") last_plot() %+% filter(df_looks_to_aois, Image == "Unrelated word") last_plot() %+% filter(df_looks_to_aois, Image == "Target word")
AOI by maternal edu
ggplot(df_looks_to_aois %>% filter(Image == "Phonological foil", !is.na(`Maternal edu.`))) + aes(x = Time, y = Proportion, color = `Maternal edu.`) + geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + stat_summary() + facet_wrap("Image") + theme_ipsum_rc(axis_title_size = 11) + viridis::scale_color_viridis(discrete = TRUE, option = "inferno", end = .9) + theme(legend.position = "bottom", legend.text = element_text(size = 10)) + labs(x = plot_text$x_time, y = plot_text$y_image) + xlim(0, 2000) last_plot() %+% filter(df_looks_to_aois, Image == "Semantic foil", !is.na(`Maternal edu.`)) last_plot() %+% filter(df_looks_to_aois, Image == "Unrelated word", !is.na(`Maternal edu.`)) last_plot() %+% filter(df_looks_to_aois, Image == "Target word", !is.na(`Maternal edu.`)) ggplot(df_looks_to_aois %>% filter(!is.na(`Maternal edu.`))) + aes(x = Time, y = Proportion, color = `Maternal edu.`) + geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + stat_summary(fun.y = "mean", geom = "line", size = 1.5) + viridis::scale_color_viridis(discrete = TRUE, option = "inferno", end = .9) + facet_wrap("Image") + theme_ipsum_rc(axis_title_size = 11) + theme(legend.position = "bottom", legend.text = element_text(size = 10)) + labs(x = plot_text$x_time, y = plot_text$y_image) + xlim(0, 2000) ggplot(df_looks_to_aois) + aes(x = Time, y = Proportion, color = `Native dialect`) + geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + stat_summary(fun.y = "mean", geom = "line", size = 1.5) + viridis::scale_color_viridis(discrete = TRUE, option = "viridis", end = .9) + facet_wrap("Image") + theme_ipsum_rc(axis_title_size = 11) + theme(legend.position = "bottom", legend.text = element_text(size = 10)) + labs(x = plot_text$x_time, y = plot_text$y_image) + xlim(0, 2000) ggplot(df_looks_to_aois) + aes(x = Time, y = Proportion, color = `Child hears`) + geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + stat_summary() + facet_wrap("AOI", labeller = label_both) + theme_ipsum_rc(axis_title_size = 11) + theme(legend.position = "bottom", legend.text = element_text(size = 10)) + labs(x = plot_text$x_time, y = plot_text$y_target) + xlim(0, 2000)
By vocabulary.
ggplot(df_looks_to_aois %>% filter(Image == "Phonological foil", !is.na(`Exp. vocab.`))) + aes(x = Time, y = Proportion, color = `Exp. vocab.`) + geom_hline(yintercept = .25, size = 1.25, color = "#cccccc") + stat_summary() + facet_wrap("Image") + theme_ipsum_rc(axis_title_size = 11) + viridis::scale_color_viridis(discrete = TRUE, option = "plasma", end = .9) + theme(legend.position = "bottom", legend.text = element_text(size = 10)) + labs(x = plot_text$x_time, y = plot_text$y_image) + xlim(0, 2000) last_plot() %+% filter(df_looks_to_aois, Image == "Semantic foil", !is.na(`Exp. vocab.`)) last_plot() %+% filter(df_looks_to_aois, Image == "Unrelated word", !is.na(`Exp. vocab.`)) last_plot() %+% filter(df_looks_to_aois, Image == "Target word", !is.na(`Exp. vocab.`))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.