Tristan Mahr 2017-05-03
Load in the list of participants. The table of matches is stored in a table called CIMatching
in the database. We connect to the database, grab that table and the test scores for the children in that table.
library(dplyr)
library(L2TDatabase)
# We use the main RStudio project directory as the working folder
# for this session.
wd <- rprojroot::find_rstudio_root_file()
dir_here <- file.path(wd, "inst", "analyses", "rwl_ci_nh_matches")
cnf_file <- file.path(wd, "inst", "l2t_db.cnf")
# Connect to database
l2t_main <- l2t_connect(cnf_file, "l2t")
# Download matches and scores from various tests
df_matches <- tbl(l2t_main, "CIMatching") %>%
left_join(tbl(l2t_main, "EVT")) %>%
left_join(tbl(l2t_main, "PPVT")) %>%
left_join(tbl(l2t_main, "GFTA")) %>%
select(Group = Matching_Group, Matching_PairNumber, ChildStudyID,
Study, ResearchID,
Female, AAE, LateTalker, CImplant,
Maternal_Education, Maternal_Education_Level,
EVT_Age, EVT_Raw, EVT_GSV, EVT_Standard,
PPVT_Age, PPVT_Raw, PPVT_GSV, PPVT_Standard,
GFTA_Age:GFTA_Standard) %>%
collect()
df_matches
#> # A tibble: 82 × 24
#> Group Matching_PairNumber ChildStudyID Study ResearchID
#> <chr> <int> <int> <chr> <chr>
#> 1 NormalHearing 36 287 TimePoint2 630L
#> 2 NormalHearing 17 294 TimePoint2 640L
#> 3 CochlearImplant 28 308 TimePoint2 665L
#> 4 CochlearImplant 31 319 TimePoint2 679L
#> 5 NormalHearing 10 328 TimePoint2 002L
#> 6 NormalHearing 12 335 TimePoint2 010L
#> 7 NormalHearing 35 338 TimePoint2 014L
#> 8 NormalHearing 20 354 TimePoint2 030L
#> 9 NormalHearing 8 368 TimePoint2 044L
#> 10 NormalHearing 22 375 TimePoint2 052L
#> # ... with 72 more rows, and 19 more variables: Female <int>, AAE <int>,
#> # LateTalker <int>, CImplant <int>, Maternal_Education <chr>,
#> # Maternal_Education_Level <int>, EVT_Age <int>, EVT_Raw <int>,
#> # EVT_GSV <int>, EVT_Standard <int>, PPVT_Age <int>, PPVT_Raw <int>,
#> # PPVT_GSV <int>, PPVT_Standard <int>, GFTA_Age <int>,
#> # GFTA_RawCorrect <int>, GFTA_NumTranscribed <int>,
#> # GFTA_AdjCorrect <int>, GFTA_Standard <int>
We can confirm adequate matching on (EVT) Age and sex.
df_matches %>%
group_by(Group) %>%
summarise(
N = n(),
CI = sum(CImplant),
N_Female = sum(Female),
N_Male = sum(Female == 0),
N_EVT = sum(!is.na(EVT_Age)),
EVT_Age = mean(EVT_Age, na.rm = TRUE),
N_PPVT = sum(!is.na(PPVT_Age)),
PPVT_Age = mean(PPVT_Age, na.rm = TRUE),
N_GFTA = sum(!is.na(GFTA_Age)),
GFTA_Age = mean(GFTA_Age, na.rm = TRUE)) %>%
knitr::kable()
| Group | N| CI| N_Female| N_Male| N_EVT| EVT_Age| N_PPVT| PPVT_Age| N_GFTA| GFTA_Age| |:----------------|----:|----:|----------:|--------:|-------:|---------:|--------:|----------:|--------:|----------:| | CochlearImplant | 41| 41| 25| 16| 41| 50.09756| 41| 49.95122| 37| 50.27027| | NormalHearing | 41| 0| 25| 16| 41| 50.09756| 23| 43.08696| 27| 51.62963|
We also matched by general maternal education.
df_matches %>%
count(Group, Maternal_Education) %>%
ungroup() %>%
tidyr::spread(Group, n) %>%
knitr::kable()
| Maternal_Education | CochlearImplant| NormalHearing| |:-----------------------------|----------------:|--------------:| | College Degree | 18| 18| | Graduate Degree | 12| 12| | High School Diploma | 3| 2| | Less Than High School | NA| 1| | Some College (2+ years) | 3| 3| | Technical/Associate's Degree | 5| 4| | Trade School | NA| 1|
We can also compute summary statistics.
df_matches %>%
select(Group, EVT_Age, EVT_Standard, PPVT_Standard, GFTA_Standard) %>%
# Convert to long format to compute summaries by group by score type
tidyr::gather(Variable, Value, -Group) %>%
tidyr::drop_na(Value) %>%
group_by(Variable, Group) %>%
summarise(N_Values = n(), Mean = mean(Value), SD = sd(Value),
Min = min(Value), Max = max(Value)) %>%
# Round off decimals
mutate_each_(funs(round), vars = vars(Mean, SD, Min, Max)) %>%
ungroup() %>%
knitr::kable()
| Variable | Group | N_Values| Mean| SD| Min| Max| |:---------------|:----------------|----------:|-----:|----:|----:|----:| | EVT_Age | CochlearImplant | 41| 50| 10| 31| 66| | EVT_Age | NormalHearing | 41| 50| 10| 32| 66| | EVT_Standard | CochlearImplant | 40| 98| 19| 46| 131| | EVT_Standard | NormalHearing | 41| 118| 11| 88| 134| | GFTA_Standard | CochlearImplant | 37| 74| 19| 39| 107| | GFTA_Standard | NormalHearing | 27| 92| 12| 67| 113| | PPVT_Standard | CochlearImplant | 40| 94| 21| 40| 139| | PPVT_Standard | NormalHearing | 23| 120| 11| 94| 140|
This is a tedious step. We need to download the eyetracking data. First, let's connect to the database and prepare some queries.
l2t_eyetracking <- l2t_connect(cnf_file, "eyetracking")
# We could do this with the prepared queries (q_BlocksByStudy, etc.), but the
# query q_LooksByStudy takes forever to run. So we will select identify the
# blocks we want and get the trials and looks for just those blocks. That should
# be faster than start with all the data and narrowing down to the subset we
# want.
# Find the numbers of the blocks
tbl_blocks <- tbl(l2t_eyetracking, "Blocks") %>%
filter(ChildStudyID %in% df_matches$ChildStudyID) %>%
select(BlockID, ChildStudyID, Block_Basename,
Block_DateTime, Block_Task, Block_Version, Block_Age)
# Get the attributes for these blocks
tbl_blocks_attrs <- tbl(l2t_eyetracking, "BlockAttributes") %>%
inner_join(tbl_blocks) %>%
select(ChildStudyID, BlockID, BlockAttribute_Name, BlockAttribute_Value)
# Get trial id numbers for these blocks
tbl_trials <- tbl(l2t_eyetracking, "Trials") %>%
inner_join(tbl_blocks) %>%
select(ChildStudyID, BlockID, TrialID, Trial_TrialNo)
# Get attributes of the trials
tbl_trials_attrs <- tbl(l2t_eyetracking, "TrialAttributes") %>%
inner_join(tbl_trials) %>%
select(ChildStudyID, BlockID, TrialID,
TrialAttribute_Name, TrialAttribute_Value)
# The big one. Get the looking data for these trials.
tbl_looks <- tbl(l2t_eyetracking, "Looks") %>%
inner_join(tbl_trials) %>%
select(ChildStudyID, BlockID, TrialID, Time, GazeByImageAOI, GazeByAOI)
Downloading the data takes forever, so I'm going set a flag called refresh
. When refresh
is true, it will redownload the eyetracking data with those queries. Otherwise, it will load the last copy that I saved.
refresh <- TRUE
if (!refresh) {
df_looks <- readr::read_csv(file.path(dir_here, "looks.csv"))
df_blocks <- df_looks %>%
select(Study, ShortResearchID, Block_Age,
Block_Basename, Block_Dialect) %>%
distinct()
} else {
df_blocks <- collect(tbl_blocks) %>%
left_join(df_matches) %>%
group_by(Block_Task, ChildStudyID) %>%
# We want one age per child, so use earliest. This might be dubious.
mutate(Block_Age = min(Block_Age)) %>%
ungroup()
# Get the dialect and stimulus version
df_blocks_attrs <- collect(tbl_blocks_attrs) %>%
# Pivot from long to wide so have the attributes we want
tidyr::spread(BlockAttribute_Name, BlockAttribute_Value) %>%
select(ChildStudyID, BlockID,
Block_Dialect = Dialect, StimulusSet)
# Add dialect to block info
df_blocks <- df_blocks %>%
select(Block_Task, Study, ResearchID, BlockID, Block_Age,
Block_Basename, Block_Version) %>%
left_join(df_blocks_attrs)
df_trials <- collect(tbl_trials)
df_trials_attrs <- collect(tbl_trials_attrs, n = Inf)
df_looks <- collect(tbl_looks, n = Inf)
df_looks <- df_blocks %>%
left_join(df_trials) %>%
left_join(df_looks) %>%
select(-BlockID, -ChildStudyID, -TrialID)
# readr::write_csv(df_looks, file.path(dir_here, "looks.csv"))
}
We exclude blocks with the TimePoint1 version of the experiment stimuli.
df_bad_version <- df_blocks %>%
filter(StimulusSet == "TP1") %>%
select(Block_Task:ResearchID, Block_Basename, StimulusSet) %>%
print()
#> # A tibble: 51 × 5
#> Block_Task Study ResearchID Block_Basename StimulusSet
#> <chr> <chr> <chr> <chr> <chr>
#> 1 RWL TimePoint1 605L RWL_Block1_605L31MS1 TP1
#> 2 RWL TimePoint1 605L RWL_Block2_605L31MS1 TP1
#> 3 MP TimePoint1 605L MP_Block1_605L31MS1 TP1
#> 4 MP TimePoint1 605L MP_Block2_605L31MS1 TP1
#> 5 RWL TimePoint1 608L RWL_Block1_608L39FS2 TP1
#> 6 RWL TimePoint1 608L RWL_Block2_608L39FS2 TP1
#> 7 MP TimePoint1 608L MP_Block1_608L39FS2 TP1
#> 8 MP TimePoint1 608L MP_Block2_608L39FS2 TP1
#> 9 RWL TimePoint1 665L RWL_Block1_665L40FS2 TP1
#> 10 MP TimePoint1 665L MP_Block1_665L40FS2 TP1
#> # ... with 41 more rows
We identify blocks with more than 50% missing data during some analysis window (here 300--1800 ms).
df_blocks_to_drop <- df_looks %>%
anti_join(df_bad_version) %>%
# Offset by 20 ms because the data binned into 50ms bins: I.e., the frame at
# 285ms is part of the [285, 300, 315] ms bin, so that frame needs to part of
# the data screening.
filter(between(Time, 280, 1820)) %>%
lookr::AggregateLooks(Block_Task + Study + ResearchID +
Block_Basename ~ GazeByImageAOI) %>%
as_data_frame() %>%
filter(PropNA > .5) %>%
select(Block_Task:Block_Basename, PropNA) %>%
mutate(PropNA = round(PropNA, 3)) %>%
print(n = Inf)
#> # A tibble: 20 × 5
#> Block_Task Study ResearchID Block_Basename PropNA
#> <chr> <chr> <chr> <chr> <dbl>
#> 1 MP TimePoint2 605L MP_Block1_605L43MS3 0.881
#> 2 MP TimePoint2 665L MP_Block2_665L52FS4 0.655
#> 3 MP TimePoint2 679L MP_Block1_679L46MS4 0.524
#> 4 MP TimePoint3 605L MP_Block1_605L55MS5 0.749
#> 5 MP TimePoint3 605L MP_Block2_605L55MS5 0.864
#> 6 MP TimePoint3 665L MP_Block1_665L64FS6 0.772
#> 7 MP TimePoint3 679L MP_Block1_679L58MS6 0.532
#> 8 RWL CochlearV1 300E RWL_Block2_300E57MS2 0.535
#> 9 RWL CochlearV1 301E RWL_Block2_301E53FS2 0.738
#> 10 RWL CochlearV1 303E RWL_Block1_303E65FS2 0.608
#> 11 RWL CochlearV1 304E RWL_Block1_304E48FS2 0.533
#> 12 RWL CochlearV1 312E RWL_Block2_312E44FS1 0.614
#> 13 RWL CochlearV1 801E RWL_Block1_801E38MS1 0.569
#> 14 RWL CochlearV2 312E RWL_Block1_312E57FS2 0.590
#> 15 RWL TimePoint2 605L RWL_Block2_605L43MS3 0.716
#> 16 RWL TimePoint2 665L RWL_Block1_665L52FS4 0.592
#> 17 RWL TimePoint2 665L RWL_Block2_665L52FS4 0.571
#> 18 RWL TimePoint3 605L RWL_Block1_605L55MS5 0.780
#> 19 RWL TimePoint3 605L RWL_Block2_605L55MS5 0.789
#> 20 RWL TimePoint3 665L RWL_Block2_665L64FS6 0.748
Next, we drop individual trials with more than 50% missing data.
df_trials_to_drop <- df_looks %>%
anti_join(df_blocks_to_drop) %>%
filter(between(Time, 280, 1820)) %>%
lookr::AggregateLooks(Block_Task + Study + ResearchID + Block_Basename +
Trial_TrialNo ~ GazeByImageAOI) %>%
as_data_frame() %>%
filter(PropNA > .5) %>%
select(Block_Task:Trial_TrialNo, PropNA) %>%
mutate(PropNA = round(PropNA, 3)) %>%
print()
#> # A tibble: 1,404 × 6
#> Block_Task Study ResearchID Block_Basename Trial_TrialNo
#> <chr> <chr> <chr> <chr> <int>
#> 1 MP CochlearV1 300E MP_Block1_300E57MS2 2
#> 2 MP CochlearV1 300E MP_Block1_300E57MS2 18
#> 3 MP CochlearV1 300E MP_Block1_300E57MS2 19
#> 4 MP CochlearV1 300E MP_Block1_300E57MS2 20
#> 5 MP CochlearV1 300E MP_Block1_300E57MS2 22
#> 6 MP CochlearV1 300E MP_Block1_300E57MS2 23
#> 7 MP CochlearV1 300E MP_Block1_300E57MS2 25
#> 8 MP CochlearV1 300E MP_Block1_300E57MS2 26
#> 9 MP CochlearV1 300E MP_Block1_300E57MS2 35
#> 10 MP CochlearV1 300E MP_Block2_300E57MS2 2
#> # ... with 1,394 more rows, and 1 more variables: PropNA <dbl>
df_looks <- df_looks %>%
anti_join(df_blocks_to_drop) %>%
anti_join(df_trials_to_drop)
Next, we need to exclude participants who no longer have a match.
df_leftover <- df_looks %>%
distinct(Block_Task, Study, ResearchID) %>%
inner_join(df_matches)
df_leftover %>%
count(Block_Task, Group) %>%
ungroup() %>%
rename(NumChildren = n)
#> # A tibble: 4 × 3
#> Block_Task Group NumChildren
#> <chr> <chr> <int>
#> 1 MP CochlearImplant 39
#> 2 MP NormalHearing 41
#> 3 RWL CochlearImplant 37
#> 4 RWL NormalHearing 41
df_singletons <- df_leftover %>%
count(Block_Task, Matching_PairNumber) %>%
ungroup() %>%
rename(NumChildrenInPair = n) %>%
filter(NumChildrenInPair == 1)
df_singletons
#> # A tibble: 6 × 3
#> Block_Task Matching_PairNumber NumChildrenInPair
#> <chr> <int> <int>
#> 1 MP 22 1
#> 2 MP 23 1
#> 3 RWL 6 1
#> 4 RWL 22 1
#> 5 RWL 23 1
#> 6 RWL 28 1
df_looks <- df_looks %>%
inner_join(df_matches %>%
select(Group, Matching_PairNumber, Study, ResearchID)) %>%
anti_join(df_singletons)
Now there will be the same number of children in each group x task.
df_looks %>%
distinct(Block_Task, Group, Study, ResearchID) %>%
count(Block_Task, Group) %>%
ungroup() %>%
rename(NumChildren = n)
#> # A tibble: 4 × 3
#> Block_Task Group NumChildren
#> <chr> <chr> <int>
#> 1 MP CochlearImplant 39
#> 2 MP NormalHearing 39
#> 3 RWL CochlearImplant 37
#> 4 RWL NormalHearing 37
# Make sure there are 2 children in every matching pair
df_looks %>%
distinct(Matching_PairNumber, Block_Task, Group, Study, ResearchID) %>%
count(Block_Task, Matching_PairNumber) %>%
ungroup() %>%
rename(NumChildrenInPair = n) %>%
filter(NumChildrenInPair != 2)
#> # A tibble: 0 × 3
#> # ... with 3 variables: Block_Task <chr>, Matching_PairNumber <int>,
#> # NumChildrenInPair <int>
Finally, we have to separate the RWL and the MP data, so that we can attach the information about the trials to eyetracking data.
df_mp <- df_looks %>%
filter(Block_Task == "MP")
df_rwl <- df_looks %>%
filter(Block_Task == "RWL")
df_trial_info <- df_trials_attrs %>%
left_join(df_trials) %>%
left_join(df_blocks) %>%
semi_join(df_looks)
# Include the MP trial information
df_mp_trial_info <- df_trial_info %>%
filter(Block_Task == "MP") %>%
tidyr::spread(TrialAttribute_Name, TrialAttribute_Value) %>%
select(Study, ResearchID, Block_Basename, Trial_TrialNo,
Condition = StimType, WordGroup, TargetWord, Bias_ImageAOI,
DistractorImage, FamiliarImage, UnfamiliarImage,
ImageL, ImageR, TargetImage)
df_mp <- df_mp %>%
left_join(df_mp_trial_info)
# Do the same for the RWL data
df_rwl_trial_info <- df_trial_info %>%
filter(Block_Task == "RWL") %>%
tidyr::spread(TrialAttribute_Name, TrialAttribute_Value) %>%
select(Study, ResearchID, Block_Basename, Trial_TrialNo,
TargetImage, Bias_ImageAOI,
starts_with("Image"), starts_with("Stimulus"), starts_with("Word"))
df_rwl <- df_rwl %>%
left_join(df_rwl_trial_info)
Let's downsample into 50ms bins, and aggregate the number of looks.
df_bins <- df_rwl %>%
distinct(Time) %>%
# Need a number of frames divisible three. Time 0 should be center of its bin
filter(between(Time, -917, 1975)) %>%
arrange(Time) %>%
mutate(Bin = lookr::AssignBins(Time, bin_width = 3)) %>%
group_by(Bin) %>%
# Round to nearest 50ms
mutate(BinTime = Time %>% median() %>% round(-1)) %>%
ungroup()
df_binned <- df_rwl %>%
inner_join(df_bins) %>%
select(Study, ResearchID, Trial_TrialNo,
Time, BinTime, GazeByImageAOI, GazeByAOI) %>%
lookr::AggregateLooks(Study + ResearchID + BinTime ~ GazeByImageAOI) %>%
as_data_frame() %>%
mutate(Looks_Images = Target + Others,
Prop_Target = Target / Looks_Images,
Prop_PhonologicalFoil = PhonologicalFoil / Looks_Images,
Prop_SemanticFoil = SemanticFoil / Looks_Images,
Prop_Unrelated = Unrelated / Looks_Images)
df_binned
#> # A tibble: 4,292 × 19
#> Study ResearchID BinTime PhonologicalFoil SemanticFoil Target
#> <chr> <chr> <dbl> <int> <int> <int>
#> 1 CochlearV1 300E -900 16 9 11
#> 2 CochlearV1 300E -850 15 9 9
#> 3 CochlearV1 300E -800 15 9 9
#> 4 CochlearV1 300E -750 17 9 9
#> 5 CochlearV1 300E -700 15 9 9
#> 6 CochlearV1 300E -650 13 9 9
#> 7 CochlearV1 300E -600 12 10 9
#> 8 CochlearV1 300E -550 9 11 10
#> 9 CochlearV1 300E -500 9 12 13
#> 10 CochlearV1 300E -450 9 12 12
#> # ... with 4,282 more rows, and 13 more variables: Elsewhere <int>,
#> # Unrelated <int>, NAs <int>, Others <dbl>, Looks <dbl>,
#> # Proportion <dbl>, ProportionSE <dbl>, PropNA <dbl>,
#> # Looks_Images <dbl>, Prop_Target <dbl>, Prop_PhonologicalFoil <dbl>,
#> # Prop_SemanticFoil <dbl>, Prop_Unrelated <dbl>
If we want to plot a growth curve for image type (target, semantic foil, etc.), we need to reshape into a long format to have a Proportion column and an Image type column.
df_looks_to_aois <- df_binned %>%
select(Study, ResearchID, Time = BinTime, starts_with("Prop_")) %>%
tidyr::gather(AOI, Proportion, -Study, -ResearchID, -Time) %>%
mutate(AOI = AOI %>%
stringr::str_replace("Prop_", "") %>%
stringr::str_replace("Foil", ""))
df_looks_to_aois$AOI <- factor(
df_looks_to_aois$AOI,
levels = c("Target", "Phonological", "Semantic", "Unrelated"))
Do some spaghetti plots
library(ggplot2)
df_looks_to_aois <- left_join(df_looks_to_aois, df_matches) %>%
mutate(LineGroup = interaction(Study, ResearchID, AOI))
curr_labs <- labs(
x = "Time after target onset (ms.)",
y = "Proportion of looks",
color = "Image")
theme_aligned <- theme(
axis.title.x = element_text(hjust = .995),
axis.title.y = element_text(hjust = .995))
p_theme <- theme_grey(base_size = 11) +
theme(legend.position = "bottom") +
theme_aligned
p1 <- ggplot(df_looks_to_aois) +
aes(x = Time, y = Proportion, color = AOI, group = LineGroup) +
geom_hline(yintercept = .25, size = 2, color = "white") +
geom_line() +
viridis::scale_color_viridis(discrete = TRUE, option = "inferno", end = .9) +
facet_wrap("Group") +
p_theme + curr_labs
p1
Replace data-set in last plot with one with a narrow time window.
df_looks_to_aois_window <- df_looks_to_aois %>%
filter(between(Time, 300, 1800))
p1 %+% df_looks_to_aois_window
Show average of each participant's lines.
p2 <- ggplot(df_looks_to_aois) +
aes(x = Time, y = Proportion, color = AOI) +
geom_hline(yintercept = .25, size = 2, color = "white") +
stat_summary(fun.data = mean_se, geom = "pointrange") +
facet_wrap("Group") +
p_theme + curr_labs +
viridis::scale_color_viridis(discrete = TRUE, option = "inferno", end = .9)
p2
p2b <- ggplot(df_looks_to_aois_window) +
aes(x = Time, y = Proportion, color = AOI) +
geom_hline(yintercept = .25, size = 2, color = "white") +
stat_summary(fun.data = mean_se, geom = "pointrange") +
facet_wrap("Group") +
p_theme + curr_labs +
viridis::scale_color_viridis(discrete = TRUE, option = "inferno", end = .9)
p2b
Actually, we don't need to facet them.
p3 <- ggplot(df_looks_to_aois) +
aes(x = Time, y = Proportion, color = AOI, shape = Group) +
geom_hline(yintercept = .25, size = 2, color = "white") +
stat_summary(fun.data = mean_se, geom = "pointrange") +
p_theme + curr_labs +
viridis::scale_color_viridis(discrete = TRUE, option = "inferno", end = .9)
p3
And zoom in.
p4 <- ggplot(df_looks_to_aois_window) +
aes(x = Time, y = Proportion, color = AOI, shape = Group) +
geom_hline(yintercept = .25, size = 2, color = "white") +
stat_summary(fun.data = mean_se, geom = "pointrange") +
p_theme + curr_labs +
viridis::scale_color_viridis(discrete = TRUE, option = "inferno", end = .9)
p4
Compare each mean of curve.
p5 <- ggplot(df_looks_to_aois_window) +
aes(x = Time, y = Proportion, linetype = Group) +
geom_hline(yintercept = .25, size = 2, color = "white") +
stat_summary(fun.y = mean, geom = "line", size = 1.25) +
facet_wrap("AOI") +
p_theme + curr_labs
p5
Similar steps as above. Let's downsample into 50ms bins, and aggregate the number of looks.
df_bins <- df_mp %>%
distinct(Time) %>%
# Need a number of frames divisible three. Time 0 should be center of its bin
filter(between(Time, -917, 1975)) %>%
arrange(Time) %>%
mutate(Bin = lookr::AssignBins(Time, bin_width = 3)) %>%
group_by(Bin) %>%
# Round to nearest 50ms
mutate(BinTime = Time %>% median() %>% round(-1)) %>%
ungroup()
df_binned <- df_mp %>%
inner_join(df_bins) %>%
select(Study, ResearchID, Condition, Trial_TrialNo,
Time, BinTime, GazeByImageAOI, GazeByAOI) %>%
lookr::AggregateLooks(Condition + Study +
ResearchID + BinTime ~ GazeByImageAOI) %>%
as_data_frame() %>%
rename(Time = BinTime)
df_binned
#> # A tibble: 13,572 × 13
#> Condition Study ResearchID Time Distractor Target Elsewhere NAs
#> <chr> <chr> <chr> <dbl> <int> <int> <int> <int>
#> 1 MP CochlearV1 300E -900 14 23 2 6
#> 2 MP CochlearV1 300E -850 15 26 1 3
#> 3 MP CochlearV1 300E -800 15 27 0 3
#> 4 MP CochlearV1 300E -750 15 27 0 3
#> 5 MP CochlearV1 300E -700 15 25 0 5
#> 6 MP CochlearV1 300E -650 14 24 2 5
#> 7 MP CochlearV1 300E -600 11 23 2 9
#> 8 MP CochlearV1 300E -550 11 22 3 9
#> 9 MP CochlearV1 300E -500 12 21 1 11
#> 10 MP CochlearV1 300E -450 12 20 3 10
#> # ... with 13,562 more rows, and 5 more variables: Others <dbl>,
#> # Looks <dbl>, Proportion <dbl>, ProportionSE <dbl>, PropNA <dbl>
Prepare for plotting.
df_plot <- df_binned %>%
inner_join(df_matches) %>%
mutate(PlotGroup = ifelse(Group == "NormalHearing",
"Normal hearing", "Cochlear implant")) %>%
filter(between(Time, 0, 2000))
condition_labels <- c(
"real" = "real word",
"MP" = "mispronunication",
"nonsense" = "nonword"
)
df_plot$heard <- df_plot$Condition %>%
factor(levels = names(condition_labels), labels = condition_labels)
curr_labs <- labs(
x = "Time after target onset (ms.)",
y = "Proportion of looks to familiar image",
color = "Child hears")
theme_aligned <- theme(
axis.title.x = element_text(hjust = .995),
axis.title.y = element_text(hjust = .995))
p_theme <- theme_grey(base_size = 11) +
theme(legend.position = "top",
legend.justification = "left") +
theme_aligned
Show average of each participant's lines.
p2 <- ggplot(df_plot) +
aes(x = Time, y = Proportion, color = heard) +
geom_hline(yintercept = .5, size = 2, color = "white") +
stat_summary(fun.data = mean_se, geom = "pointrange") +
facet_wrap("PlotGroup") +
p_theme + curr_labs +
viridis::scale_color_viridis(discrete = TRUE, option = "viridis", end = .8)
p2
Actually, we don't need to facet them.
p3 <- ggplot(df_plot) +
aes(x = Time, y = Proportion, color = heard, shape = PlotGroup) +
geom_hline(yintercept = .5, size = 2, color = "white") +
stat_summary(fun.data = mean_se, geom = "pointrange") +
p_theme + curr_labs +
viridis::scale_color_viridis(discrete = TRUE, option = "viridis", end = .8)
labs(shape = "Group")
#> $shape
#> [1] "Group"
#>
#> attr(,"class")
#> [1] "labels"
p3
schemes <- tibble::tribble(
~ MPLabel, ~ WordGroup, ~ Contrast,
"rice-[w]ice", "rice", "ɹ-w",
"shoes-[s]oes", "shoes", "ʃ-s",
"girl-[d]irl", "girl", "g-d",
"soup-[ʃ]oup", "soup", "ʃ-s",
"cake-[g]ake", "cake", "k-g",
"duck-[g]uck", "duck", "g-d",
"dog-[t]og", "dog", "t-d"
)
df_items <- df_mp %>%
inner_join(df_bins) %>%
filter(Condition != "nonsense") %>%
select(WordGroup, Study, ResearchID, Condition, Trial_TrialNo,
Time, BinTime, GazeByImageAOI, GazeByAOI) %>%
lookr::AggregateLooks(WordGroup + Condition + Study +
ResearchID + BinTime ~ GazeByImageAOI) %>%
as_data_frame() %>%
rename(Time = BinTime) %>%
left_join(schemes) %>%
filter(Contrast != "t-d")
df_items
#> # A tibble: 51,098 × 16
#> WordGroup Condition Study ResearchID Time Distractor Target
#> <chr> <chr> <chr> <chr> <dbl> <int> <int>
#> 1 cake MP CochlearV1 300E -900 0 9
#> 2 cake MP CochlearV1 300E -850 0 11
#> 3 cake MP CochlearV1 300E -800 0 12
#> 4 cake MP CochlearV1 300E -750 0 12
#> 5 cake MP CochlearV1 300E -700 0 12
#> 6 cake MP CochlearV1 300E -650 0 12
#> 7 cake MP CochlearV1 300E -600 0 12
#> 8 cake MP CochlearV1 300E -550 0 11
#> 9 cake MP CochlearV1 300E -500 0 9
#> 10 cake MP CochlearV1 300E -450 0 8
#> # ... with 51,088 more rows, and 9 more variables: Elsewhere <int>,
#> # NAs <int>, Others <dbl>, Looks <dbl>, Proportion <dbl>,
#> # ProportionSE <dbl>, PropNA <dbl>, MPLabel <chr>, Contrast <chr>
df_items %>% count(WordGroup, Contrast)
#> Source: local data frame [6 x 3]
#> Groups: WordGroup [?]
#>
#> WordGroup Contrast n
#> <chr> <chr> <int>
#> 1 cake k-g 8758
#> 2 duck g-d 8758
#> 3 girl g-d 8816
#> 4 rice <U+0279>-w 7250
#> 5 shoes <U+0283>-s 8816
#> 6 soup <U+0283>-s 8700
Prepare for plotting.
df_plot_items <- df_items %>%
inner_join(df_matches) %>%
mutate(PlotGroup = ifelse(Group == "NormalHearing",
"Normal hearing", "Cochlear implant")) %>%
filter(between(Time, 0, 2000))
condition_labels <- c(
"real" = "real word",
"MP" = "mispronunication",
"nonsense" = "nonword"
)
df_plot_items$heard <- df_plot_items$Condition %>%
factor(levels = names(condition_labels), labels = condition_labels)
p2 <- ggplot(df_plot_items) +
aes(x = Time, y = Proportion, color = heard, linetype = PlotGroup) +
geom_hline(yintercept = .5, size = 2, color = "white") +
stat_summary(fun.y = mean, geom = "line", size = 1) +
facet_wrap("MPLabel") +
p_theme + curr_labs + labs(linetype = "Group") +
viridis::scale_color_viridis(discrete = TRUE, option = "viridis", end = .8)
p2
#> Warning: Removed 267 rows containing non-finite values (stat_summary).
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.