library("knitr")
opts_chunk$set(
  warning = FALSE,
  collapse = TRUE, 
  comment = "#>", 
  message = FALSE,
  fig.width = 8,
  fig.asp = 0.618,
  dpi = 300,
  out.width = "80%")

wd <- rprojroot::find_rstudio_root_file()
opts_knit$set(root.dir = wd)

JRE asks whether we should put the "less than two years of college" maternal education households with the "high school or less" households or with the "some college" and "technical/associate's degree" households. We are going to look at how the LENA measurements vary with maternal education level.

Data munging

Connect to database.

library(dplyr)
library(L2TDatabase)

# Work relative to RStudio project
wd <- rprojroot::find_rstudio_root_file()
dir_here <- file.path(wd, "inst", "analyses", "ci_matching")
cnf_file <- file.path(wd, "inst", "l2t_db.cnf")

l2t_main <- l2t_connect(cnf_file, "l2t")

Get the maternal education codes from the database.

medu_scales <- tbl(l2t_main, "Maternal_Education") %>% 
  distinct(Maternal_Education, Maternal_Education_Level) %>% 
  collect() %>% 
  arrange(Maternal_Education_Level)

knitr::kable(medu_scales)

Simplify to a low-mid-mid-high scale.

medu_groups <- tibble::tribble(
  ~ Maternal_Education_Level, ~ Maternal_Education_Group,
  1, "Low", 
  2, "Low", 
  3, "Low", 
  4, "[skip]", 
  # Kind of awkward names but alphabetical order matches original order
  5, "Mid", 
  6, "MidPlus", 
  7, "Top", 
  NA, "[missing]")

medu_scales <- medu_scales %>% 
  left_join(medu_groups, by = "Maternal_Education_Level")

knitr::kable(medu_scales)

Combine LENA data with the maternal education codes.

medu <- tbl(l2t_main, "Maternal_Education") %>% 
  select(Study, ResearchID, HouseholdID, 
         Maternal_Education, Maternal_Education_Level) %>% 
  collect()

# These LENAs may or may not have problems based on the LENA notes, so let's
# ignore them for this analysis
lenas_to_exclude <- tibble::tribble(
  ~ Study, ~ ResearchID,
  "TimePoint1", "053L",
  "TimePoint1", "102L",
  "TimePoint1", "116L"
)

lena <- tbl(l2t_main, "LENA_Averages") %>% 
  select(Study, ResearchID, 
         LENA_Age, LENA_Hours, 
         LENA_Prop_Meaningful, LENA_AWC_Hourly) %>% 
  collect() %>% 
  anti_join(lenas_to_exclude, by = c("Study", "ResearchID"))

# Combine the tables
medu_lena <- lena %>% 
  left_join(medu, by = c("Study", "ResearchID")) %>% 
  left_join(medu_scales, 
            by = c("Maternal_Education", "Maternal_Education_Level"))
medu_lena

How much LENA data we have in the different groups

# Households with LENA recordings
medu_lena %>% 
  group_by(Maternal_Education_Group) %>% 
  summarise(nRecordings = n(), 
            nChildren = n_distinct(ResearchID),
            nHouseholds = n_distinct(HouseholdID)) %>% 
  knitr::kable()

medu_lena %>% 
  group_by(Maternal_Education_Group) %>% 
  summarise(nRecordings = n(), 
            AWC_Mean = mean(LENA_AWC_Hourly) %>% round(),
            AWC_SD = sd(LENA_AWC_Hourly) %>% round(),
            Prop_Mean = mean(LENA_Prop_Meaningful),
            Prop_SD = sd(LENA_Prop_Meaningful)) %>% 
  knitr::kable(digits = 4)

Density of outcome measures

Visualize the data by group

library(ggplot2)

right_aligned <- theme(
  axis.title.x = element_text(hjust = 1), 
  axis.title.y = element_text(hjust = 1))

density_y_axis <-   theme(
  axis.text.y = element_blank(), 
  axis.ticks.y = element_blank())

df_plotting <- medu_lena %>% 
  filter(Maternal_Education_Group != "[skip]", 
         Maternal_Education_Group != "[missing]")

df_less_than_two <- medu_lena %>% 
  filter(Maternal_Education_Group == "[skip]")


ggplot(df_plotting) + 
  aes(x = LENA_Prop_Meaningful) + 
  geom_density(aes(color = Maternal_Education_Group)) + 
  geom_rug(data = df_less_than_two, size = 1) +
  xlim(0, .5) +
  labs(x = "Proportion of meaningful speech (hourly average)",
       y = "Density",
       color = NULL,
       title = "Language input by maternal education",
       caption = "Rug marks: Households with <2 years of college") +
  right_aligned + 
  density_y_axis


ggplot(df_plotting) + 
  aes(x = LENA_AWC_Hourly) + 
  geom_density(aes(color = Maternal_Education_Group)) + 
  geom_rug(data = df_less_than_two, size = 1) +
  labs(x = "Adult word count (hourly average)",
       y = "Density",
       color = NULL,
       title = "Language input by maternal education",
       caption = "Rug marks: Households with <2 years of college") +
  right_aligned + 
  density_y_axis

Means by group

ggplot(medu_lena) + 
  aes(x = Maternal_Education_Group, y = LENA_Prop_Meaningful) + 
  geom_point(color = "grey60", position = position_jitter(width = .1)) + 
  stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), size = 1) +
  labs(x = "Maternal education group",
       y = "Proportion of meaningful speech (hourly average)",
       title = "Language input by maternal education",
       caption = "Point-range: Group Mean ± SD") +
  right_aligned

ggplot(medu_lena) + 
  aes(x = Maternal_Education_Group, y = LENA_AWC_Hourly) + 
  geom_point(color = "grey60", position = position_jitter(width = .1)) + 
  stat_summary(fun.data = mean_sdl, fun.args = list(mult = 1), size = 1) +
  labs(x = "Maternal education group",
       y = "Adult word count (hourly average)",
       title = "Language input by maternal education",
       caption = "Point-range: Group Mean ± SD") +
  right_aligned

Mixed effects models

Is the low group different from the other groups, accounting for nesting of recordings in households?

library(lme4)

df_plotting$Maternal_Education_Group <- factor(
  df_plotting$Maternal_Education_Group, 
  levels = c("MidPlus", "Low", "Mid", "Top"))

m <- lmer(
  LENA_Prop_Meaningful ~ Maternal_Education_Group + 
    (1 | HouseholdID), df_plotting)
summary(m)

m2 <- lmer(
  LENA_AWC_Hourly ~ Maternal_Education_Group + 
    (1 | HouseholdID), df_plotting)
summary(m2)

Multiple imputation

Multiple imputation suggest that we assign these households to larger groups on a case-by-case basis.

library(mice)

# Create a data-frame where the [skip] group has NA for group. Code the group as
# an ordered categorical variable
df_impute <- medu_lena %>% 
  filter(Maternal_Education_Group != "[missing]") %>% 
  mutate(Maternal_Education_Group = 
           ifelse(Maternal_Education_Group == "[skip]", 
                  NA, Maternal_Education_Group), 
         Maternal_Education_Group = as.ordered(Maternal_Education_Group)) %>% 
  select(Study, ResearchID, LENA_Age, HouseholdID, 
         LENA_Prop_Meaningful, LENA_AWC_Hourly, Maternal_Education_Group)


imputation_vars <- c("LENA_Prop_Meaningful", "LENA_AWC_Hourly", 
                     "Maternal_Education_Group")

# Perform ten imputations
mice_results <- mice(df_impute[imputation_vars], printFlag = FALSE, m = 10)

mice_results$imp$Maternal_Education_Group 

## Add IDs to imputation results

imputed_rows <- mice_results$imp$Maternal_Education_Group %>% 
  row.names() %>% 
  as.numeric()


ids <- df_impute[imputed_rows, c("Study", "ResearchID", 
                                 "LENA_Prop_Meaningful", "LENA_AWC_Hourly")]

imputations <- bind_cols(ids, mice_results$imp$Maternal_Education_Group) %>% 
  # Convert from wide to long format
  tidyr::gather(Imputation, Maternal_Education_Group, -one_of(names(ids)))

imputations %>% 
  count(Study, ResearchID,
        LENA_Prop_Meaningful, LENA_AWC_Hourly, 
        Maternal_Education_Group) %>% 
  ungroup() %>% 
  rename(GroupGuess = Maternal_Education_Group, 
         nGuesses = n) %>% 
  knitr::kable()


LearningToTalk/L2TDatabase documentation built on June 24, 2020, 3:45 a.m.