library("knitr") opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE, fig.width = 8, fig.height = 6, dpi = 300, out.width = "80%") wd <- rprojroot::find_rstudio_root_file() opts_knit$set(root.dir = wd)
Load in the list of participants. The table of matches is stored in a table
called CIMatching
in the database. We connect to the database, grab that table
and the test scores for the children in that table.
library(dplyr) library(L2TDatabase) # We use the main RStudio project directory as the working folder # for this session. wd <- rprojroot::find_rstudio_root_file() dir_here <- file.path(wd, "inst", "analyses", "rwl_ci_nh_matches") cnf_file <- file.path(wd, "inst", "l2t_db.cnf") # Connect to database l2t_main <- l2t_connect(cnf_file, "l2t") # Download matches and scores from various tests df_matches <- tbl(l2t_main, "CIMatching") %>% left_join(tbl(l2t_main, "EVT")) %>% left_join(tbl(l2t_main, "PPVT")) %>% left_join(tbl(l2t_main, "GFTA")) %>% select(Group = Matching_Group, Matching_PairNumber, ChildStudyID, Study, ResearchID, ChildID, Female, AAE, LateTalker, CImplant, Maternal_Education, Maternal_Education_Level, EVT_Age, EVT_Raw, EVT_GSV, EVT_Standard, PPVT_Age, PPVT_Raw, PPVT_GSV, PPVT_Standard, GFTA_Age:GFTA_Standard) %>% collect() df_matches
We can confirm adequate matching on (EVT) Age and sex.
df_matches %>% group_by(Group) %>% summarise( N = n(), CI = sum(CImplant), N_Female = sum(Female), N_Male = sum(Female == 0), N_EVT = sum(!is.na(EVT_Age)), EVT_Age = mean(EVT_Age, na.rm = TRUE), N_PPVT = sum(!is.na(PPVT_Age)), PPVT_Age = mean(PPVT_Age, na.rm = TRUE), N_GFTA = sum(!is.na(GFTA_Age)), GFTA_Age = mean(GFTA_Age, na.rm = TRUE)) %>% knitr::kable()
We also matched by general maternal education.
df_matches %>% count(Group, Maternal_Education) %>% ungroup() %>% tidyr::spread(Group, n) %>% knitr::kable()
We can also compute summary statistics.
df_matches %>% select(Group, EVT_Age, EVT_Standard, PPVT_Standard, GFTA_Standard) %>% # Convert to long format to compute summaries by group by score type tidyr::gather(Variable, Value, -Group) %>% tidyr::drop_na(Value) %>% group_by(Variable, Group) %>% summarise(N_Values = n(), Mean = mean(Value), SD = sd(Value), Min = min(Value), Max = max(Value)) %>% # Round off decimals mutate_each_(funs(round), vars = vars(Mean, SD, Min, Max)) %>% ungroup() %>% knitr::kable()
This is a tedious step. We need to download the eyetracking data. First, let's connect to the database and prepare some queries.
l2t_eyetracking <- l2t_connect(cnf_file, "eyetracking") # We could do this with the prepared queries (q_BlocksByStudy, etc.), but the # query q_LooksByStudy takes forever to run. So we will select identify the # blocks we want and get the trials and looks for just those blocks. That should # be faster than start with all the data and narrowing down to the subset we # want. # Find the numbers of the blocks tbl_blocks <- tbl(l2t_eyetracking, "Blocks") %>% filter(ChildStudyID %in% df_matches$ChildStudyID) %>% select(BlockID, ChildStudyID, Block_Basename, Block_DateTime, Block_Task, Block_Version, Block_Age) # Get the attributes for these blocks tbl_blocks_attrs <- tbl(l2t_eyetracking, "BlockAttributes") %>% inner_join(tbl_blocks) %>% select(ChildStudyID, BlockID, BlockAttribute_Name, BlockAttribute_Value) # Get trial id numbers for these blocks tbl_trials <- tbl(l2t_eyetracking, "Trials") %>% inner_join(tbl_blocks) %>% select(ChildStudyID, BlockID, TrialID, Trial_TrialNo) # Get attributes of the trials tbl_trials_attrs <- tbl(l2t_eyetracking, "TrialAttributes") %>% inner_join(tbl_trials) %>% select(ChildStudyID, BlockID, TrialID, TrialAttribute_Name, TrialAttribute_Value) # The big one. Get the looking data for these trials. tbl_looks <- tbl(l2t_eyetracking, "Looks") %>% inner_join(tbl_trials) %>% select(ChildStudyID, BlockID, TrialID, Time, GazeByImageAOI, GazeByAOI)
Downloading the data takes forever, so I'm going set a flag called refresh
.
When refresh
is true, it will redownload the eyetracking data with those
queries. Otherwise, it will load the last copy that I saved.
refresh <- TRUE if (!refresh) { df_looks <- readr::read_csv(file.path(dir_here, "looks.csv")) df_blocks <- df_looks %>% select(Study, ShortResearchID, Block_Age, Block_Basename, Block_Dialect) %>% distinct() } else { df_blocks <- collect(tbl_blocks) %>% left_join(df_matches) %>% group_by(Block_Task, ChildStudyID) %>% # We want one age per child, so use earliest. This might be dubious. mutate(Block_Age = min(Block_Age)) %>% ungroup() # Get the dialect and stimulus version df_blocks_attrs <- collect(tbl_blocks_attrs) %>% # Pivot from long to wide so have the attributes we want tidyr::spread(BlockAttribute_Name, BlockAttribute_Value) %>% select(ChildStudyID, BlockID, Block_Dialect = Dialect, StimulusSet) # Add dialect to block info df_blocks <- df_blocks %>% select(Block_Task, Study, ResearchID, BlockID, Block_Age, Block_Basename, Block_Version) %>% left_join(df_blocks_attrs) df_trials <- collect(tbl_trials) df_trials_attrs <- collect(tbl_trials_attrs, n = Inf) df_looks <- collect(tbl_looks, n = Inf) df_looks <- df_blocks %>% left_join(df_trials) %>% left_join(df_looks) %>% select(-BlockID, -ChildStudyID, -TrialID) # readr::write_csv(df_looks, file.path(dir_here, "looks.csv")) }
We exclude blocks with the TimePoint1 version of the experiment stimuli.
df_bad_version <- df_blocks %>% filter(StimulusSet == "TP1") %>% select(Block_Task:ResearchID, Block_Basename, StimulusSet) %>% print()
We identify blocks with more than 50% missing data during some analysis window (here 300--1800 ms).
df_blocks_to_drop <- df_looks %>% anti_join(df_bad_version) %>% # Offset by 20 ms because the data binned into 50ms bins: I.e., the frame at # 285ms is part of the [285, 300, 315] ms bin, so that frame needs to part of # the data screening. filter(between(Time, 280, 1820)) %>% lookr::AggregateLooks(Block_Task + Study + ResearchID + Block_Basename ~ GazeByImageAOI) %>% as_data_frame() %>% filter(PropNA > .5) %>% select(Block_Task:Block_Basename, PropNA) %>% mutate(PropNA = round(PropNA, 3)) %>% print(n = Inf)
Next, we drop individual trials with more than 50% missing data.
df_trials_to_drop <- df_looks %>% anti_join(df_blocks_to_drop) %>% filter(between(Time, 280, 1820)) %>% lookr::AggregateLooks(Block_Task + Study + ResearchID + Block_Basename + Trial_TrialNo ~ GazeByImageAOI) %>% as_data_frame() %>% filter(PropNA > .5) %>% select(Block_Task:Trial_TrialNo, PropNA) %>% mutate(PropNA = round(PropNA, 3)) %>% print() df_looks <- df_looks %>% anti_join(df_blocks_to_drop) %>% anti_join(df_trials_to_drop)
Next, we need to exclude participants who no longer have a match.
df_leftover <- df_looks %>% distinct(Block_Task, Study, ResearchID) %>% inner_join(df_matches) df_leftover %>% count(Block_Task, Group) %>% ungroup() %>% rename(NumChildren = n) df_singletons <- df_leftover %>% count(Block_Task, Matching_PairNumber) %>% ungroup() %>% rename(NumChildrenInPair = n) %>% filter(NumChildrenInPair == 1) df_singletons df_looks <- df_looks %>% inner_join(df_matches %>% select(Group, Matching_PairNumber, Study, ResearchID)) %>% anti_join(df_singletons)
Now there will be the same number of children in each group x task.
df_looks %>% distinct(Block_Task, Group, Study, ResearchID) %>% count(Block_Task, Group) %>% ungroup() %>% rename(NumChildren = n) # Make sure there are 2 children in every matching pair df_looks %>% distinct(Matching_PairNumber, Block_Task, Group, Study, ResearchID) %>% count(Block_Task, Matching_PairNumber) %>% ungroup() %>% rename(NumChildrenInPair = n) %>% filter(NumChildrenInPair != 2)
Finally, we have to separate the RWL and the MP data, so that we can attach the information about the trials to eyetracking data.
df_mp <- df_looks %>% filter(Block_Task == "MP") df_rwl <- df_looks %>% filter(Block_Task == "RWL") df_trial_info <- df_trials_attrs %>% left_join(df_trials) %>% left_join(df_blocks) %>% semi_join(df_looks) # Include the MP trial information df_mp_trial_info <- df_trial_info %>% filter(Block_Task == "MP") %>% tidyr::spread(TrialAttribute_Name, TrialAttribute_Value) %>% select(Study, ResearchID, Block_Basename, Trial_TrialNo, Condition = StimType, WordGroup, TargetWord, Bias_ImageAOI, DistractorImage, FamiliarImage, UnfamiliarImage, ImageL, ImageR, TargetImage) df_mp <- df_mp %>% left_join(df_mp_trial_info) # Do the same for the RWL data df_rwl_trial_info <- df_trial_info %>% filter(Block_Task == "RWL") %>% tidyr::spread(TrialAttribute_Name, TrialAttribute_Value) %>% select(Study, ResearchID, Block_Basename, Trial_TrialNo, TargetImage, Bias_ImageAOI, starts_with("Image"), starts_with("Stimulus"), starts_with("Word")) df_rwl <- df_rwl %>% left_join(df_rwl_trial_info)
Let's downsample into 50ms bins, and aggregate the number of looks.
df_bins <- df_rwl %>% distinct(Time) %>% # Need a number of frames divisible three. Time 0 should be center of its bin filter(between(Time, -917, 1975)) %>% arrange(Time) %>% mutate(Bin = lookr::AssignBins(Time, bin_width = 3)) %>% group_by(Bin) %>% # Round to nearest 50ms mutate(BinTime = Time %>% median() %>% round(-1)) %>% ungroup() df_binned <- df_rwl %>% inner_join(df_bins) %>% select(Study, ResearchID, Trial_TrialNo, Time, BinTime, GazeByImageAOI, GazeByAOI) %>% lookr::AggregateLooks(Study + ResearchID + BinTime ~ GazeByImageAOI) %>% as_data_frame() %>% mutate(Looks_Images = Target + Others, Prop_Target = Target / Looks_Images, Prop_PhonologicalFoil = PhonologicalFoil / Looks_Images, Prop_SemanticFoil = SemanticFoil / Looks_Images, Prop_Unrelated = Unrelated / Looks_Images) df_binned
If we want to plot a growth curve for image type (target, semantic foil, etc.), we need to reshape into a long format to have a Proportion column and an Image type column.
df_looks_to_aois <- df_binned %>% select(Study, ResearchID, Time = BinTime, starts_with("Prop_")) %>% tidyr::gather(AOI, Proportion, -Study, -ResearchID, -Time) %>% mutate(AOI = AOI %>% stringr::str_replace("Prop_", "") %>% stringr::str_replace("Foil", "")) df_looks_to_aois$AOI <- factor( df_looks_to_aois$AOI, levels = c("Target", "Phonological", "Semantic", "Unrelated"))
Do some spaghetti plots
library(ggplot2) df_looks_to_aois <- left_join(df_looks_to_aois, df_matches) %>% mutate(LineGroup = interaction(Study, ResearchID, AOI)) curr_labs <- labs( x = "Time after target onset (ms.)", y = "Proportion of looks", color = "Image") theme_aligned <- theme( axis.title.x = element_text(hjust = .995), axis.title.y = element_text(hjust = .995)) p_theme <- theme_grey(base_size = 11) + theme(legend.position = "bottom") + theme_aligned p1 <- ggplot(df_looks_to_aois) + aes(x = Time, y = Proportion, color = AOI, group = LineGroup) + geom_hline(yintercept = .25, size = 2, color = "white") + geom_line() + viridis::scale_color_viridis(discrete = TRUE, option = "inferno", end = .9) + facet_wrap("Group") + p_theme + curr_labs p1
Replace data-set in last plot with one with a narrow time window.
df_looks_to_aois_window <- df_looks_to_aois %>% filter(between(Time, 300, 1800)) p1 %+% df_looks_to_aois_window
Show average of each participant's lines.
p2 <- ggplot(df_looks_to_aois) + aes(x = Time, y = Proportion, color = AOI) + geom_hline(yintercept = .25, size = 2, color = "white") + stat_summary(fun.data = mean_se, geom = "pointrange") + facet_wrap("Group") + p_theme + curr_labs + viridis::scale_color_viridis(discrete = TRUE, option = "inferno", end = .9) p2
p2b <- ggplot(df_looks_to_aois_window) + aes(x = Time, y = Proportion, color = AOI) + geom_hline(yintercept = .25, size = 2, color = "white") + stat_summary(fun.data = mean_se, geom = "pointrange") + facet_wrap("Group") + p_theme + curr_labs + viridis::scale_color_viridis(discrete = TRUE, option = "inferno", end = .9) p2b
Actually, we don't need to facet them.
p3 <- ggplot(df_looks_to_aois) + aes(x = Time, y = Proportion, color = AOI, shape = Group) + geom_hline(yintercept = .25, size = 2, color = "white") + stat_summary(fun.data = mean_se, geom = "pointrange") + p_theme + curr_labs + viridis::scale_color_viridis(discrete = TRUE, option = "inferno", end = .9) p3
And zoom in.
p4 <- ggplot(df_looks_to_aois_window) + aes(x = Time, y = Proportion, color = AOI, shape = Group) + geom_hline(yintercept = .25, size = 2, color = "white") + stat_summary(fun.data = mean_se, geom = "pointrange") + p_theme + curr_labs + viridis::scale_color_viridis(discrete = TRUE, option = "inferno", end = .9) p4
Compare each mean of curve.
p5 <- ggplot(df_looks_to_aois_window) + aes(x = Time, y = Proportion, linetype = Group) + geom_hline(yintercept = .25, size = 2, color = "white") + stat_summary(fun.y = mean, geom = "line", size = 1.25) + facet_wrap("AOI") + p_theme + curr_labs p5
df_mp %>% distinct(Study, ResearchID, Group, Matching_PairNumber) %>% inner_join(df_matches) %>% distinct(Group, ChildID) %>% count(Group) df_mp %>% distinct(Study, ResearchID, Group, Matching_PairNumber) %>% inner_join(df_matches) %>% count(Group, ChildID) %>% ungroup() %>% count(Group, n) df_mp %>% distinct(Study, ResearchID, Group, Matching_PairNumber) %>% inner_join(df_matches) %>% group_by(Group) %>% summarise(MinAge = min(EVT_Age), MaxAge = max(EVT_Age))
Similar steps as above. Let's downsample into 50ms bins, and aggregate the number of looks.
df_bins <- df_mp %>% distinct(Time) %>% # Need a number of frames divisible three. Time 0 should be center of its bin filter(between(Time, -917, 1975)) %>% arrange(Time) %>% mutate(Bin = lookr::AssignBins(Time, bin_width = 3)) %>% group_by(Bin) %>% # Round to nearest 50ms mutate(BinTime = Time %>% median() %>% round(-1)) %>% ungroup() df_binned <- df_mp %>% inner_join(df_bins) %>% select(Study, ResearchID, Condition, Trial_TrialNo, Time, BinTime, GazeByImageAOI, GazeByAOI) %>% lookr::AggregateLooks(Condition + Study + ResearchID + BinTime ~ GazeByImageAOI) %>% as_data_frame() %>% rename(Time = BinTime) df_binned
Prepare for plotting.
df_plot <- df_binned %>% inner_join(df_matches) %>% mutate(PlotGroup = ifelse(Group == "NormalHearing", "Normal hearing", "Cochlear implant")) %>% filter(between(Time, 0, 2000)) condition_labels <- c( "real" = "real word", "MP" = "mispronunication", "nonsense" = "nonword" ) df_plot$heard <- df_plot$Condition %>% factor(levels = names(condition_labels), labels = condition_labels) curr_labs <- labs( x = "Time after target onset (ms.)", y = "Proportion of looks to familiar image", color = "Child hears") theme_aligned <- theme( axis.title.x = element_text(hjust = .995), axis.title.y = element_text(hjust = .995)) p_theme <- theme_grey(base_size = 11) + theme(legend.position = "top", legend.justification = "left") + theme_aligned
Show average of each participant's lines.
p2 <- ggplot(df_plot) + aes(x = Time, y = Proportion, color = heard) + geom_hline(yintercept = .5, size = 2, color = "white") + stat_summary(fun.data = mean_se, geom = "pointrange") + facet_wrap("PlotGroup") + p_theme + curr_labs + viridis::scale_color_viridis(discrete = TRUE, option = "viridis", end = .8) p2
Actually, we don't need to facet them.
p3 <- ggplot(df_plot) + aes(x = Time, y = Proportion, color = heard, shape = PlotGroup) + geom_hline(yintercept = .5, size = 2, color = "white") + stat_summary(fun.data = mean_se, geom = "pointrange") + p_theme + curr_labs + viridis::scale_color_viridis(discrete = TRUE, option = "viridis", end = .8) + labs(shape = "Group") p3
df_model_a <- df_plot %>% filter(between(Time, 250, 1500), Condition != "nonword") %>% mutate(Group = factor(Group, c("NormalHearing", "CochlearImplant"))) library(rstanarm) options(mc.cores = parallel::detectCores()) m <- stan_glmer( cbind(Target, Distractor) ~ poly(Time, 3) * Condition * Group + (poly(Time, 3) | ChildStudyID:Condition), family = binomial, prior = normal(0, 40), prior_covariance = decov(regularization = 2), data = df_model_a) save(m, "model.Rdata")
schemes <- tibble::tribble( ~ MPLabel, ~ WordGroup, ~ Contrast, "rice-[w]ice", "rice", "ɹ-w", "shoes-[s]oes", "shoes", "ʃ-s", "girl-[d]irl", "girl", "g-d", "soup-[ʃ]oup", "soup", "ʃ-s", "cake-[g]ake", "cake", "k-g", "duck-[g]uck", "duck", "g-d", "dog-[t]og", "dog", "t-d" ) df_items <- df_mp %>% inner_join(df_bins) %>% filter(Condition != "nonsense") %>% select(WordGroup, Study, ResearchID, Condition, Trial_TrialNo, Time, BinTime, GazeByImageAOI, GazeByAOI) %>% lookr::AggregateLooks(WordGroup + Condition + Study + ResearchID + BinTime ~ GazeByImageAOI) %>% as_data_frame() %>% rename(Time = BinTime) %>% left_join(schemes) %>% filter(Contrast != "t-d") df_items df_items %>% count(WordGroup, Contrast)
Prepare for plotting.
df_plot_items <- df_items %>% inner_join(df_matches) %>% mutate(PlotGroup = ifelse(Group == "NormalHearing", "Normal hearing", "Cochlear implant")) %>% filter(between(Time, 0, 2000)) condition_labels <- c( "real" = "real word", "MP" = "mispronunication", "nonsense" = "nonword" ) df_plot_items$heard <- df_plot_items$Condition %>% factor(levels = names(condition_labels), labels = condition_labels)
p2 <- ggplot(df_plot_items) + aes(x = Time, y = Proportion, color = heard, linetype = PlotGroup) + geom_hline(yintercept = .5, size = 2, color = "white") + stat_summary(fun.y = mean, geom = "line", size = 1) + facet_wrap("MPLabel") + p_theme + curr_labs + labs(linetype = "Group") + viridis::scale_color_viridis(discrete = TRUE, option = "viridis", end = .8) p2
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.