inst/doc/pisa_example.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.align = 'center',
  fig.width = 6,
  fig.height = 5
)

## ---- message = FALSE---------------------------------------------------------
library(perccalc)
library(tidyr)
library(ggplot2)
library(dplyr)

## ----setup--------------------------------------------------------------------
order_edu <- c("None",
               "ISCED 1",
               "ISCED 2",
               "ISCED 3A, ISCED 4",
               "ISCED 3B, C",
               "ISCED 5A, 6",
               "ISCED 5B")

# Make ordered categories of our categorical variables and calculate avgerage
# math test scores for each year
pisa_2012 <-
  pisa_2012 %>%
  mutate(father_edu = factor(father_edu, levels = order_edu, ordered = TRUE))
         

pisa_2006 <-
  pisa_2006 %>%
  mutate(father_edu = factor(father_edu, levels = order_edu, ordered = TRUE))


# Merge them together
pisa <- rbind(pisa_2006, pisa_2012)

## -----------------------------------------------------------------------------
perc_diff(data_model = pisa,
          categorical_var = father_edu,
          continuous_var = avg_math,
          percentiles = c(90, 10))

## -----------------------------------------------------------------------------

cnt_diff <-
  pisa %>%
  nest(data = c(-year, -CNT)) %>%
  mutate(edu_diff = lapply(data, function(x) perc_diff_df(x, father_edu, avg_math, percentiles = c(90, 10)))) %>%
  select(-data) %>% 
  unnest(edu_diff)

cnt_diff

## -----------------------------------------------------------------------------
cnt_diff %>% 
  ggplot(aes(year, difference, group = CNT, color = CNT)) +
  geom_point() +
  geom_line() +
  theme_minimal() +
  scale_y_continuous(name = "Achievement gap in Math between the 90th and \n 10th percentile of father's education") +
  scale_x_continuous(name = "Year")

## -----------------------------------------------------------------------------

# Calculate the gap for the 90/50 gap
cnt_half <-
  pisa %>%
  nest(data = c(-year, -CNT)) %>%
  mutate(edu_diff = lapply(data,
                           function(x) perc_diff_df(x, father_edu, avg_math, percentiles = c(90, 50)))) %>%
  select(-data) %>% 
  unnest(edu_diff)

# Calculate the gap for the 50/10 gap
cnt_bottom <-
  pisa %>%
  nest(data = c(-year, -CNT)) %>%
  mutate(edu_diff = lapply(data,
                           function(x) perc_diff_df(x, father_edu, avg_math, percentiles = c(50, 10)))) %>%
  select(-data) %>% 
  unnest(edu_diff)

cnt_diff$type <- "90/10"
cnt_half$type <- "90/50"
cnt_bottom$type <- "50/10"

final_cnt <- rbind(cnt_diff, cnt_half, cnt_bottom)
final_cnt$type <- factor(final_cnt$type, levels = c("90/10", "90/50", "50/10"))
final_cnt


## -----------------------------------------------------------------------------
final_cnt %>% 
  ggplot(aes(year, difference, group = CNT, color = CNT)) +
  geom_point() +
  geom_line() +
  theme_minimal() +
  scale_y_continuous(name = "Achievement gap in Math between the 90th and \n 10th percentile of father's education") +
  scale_x_continuous(name = "Year") +
  facet_wrap(~ type)

## -----------------------------------------------------------------------------
perc_dist(pisa, father_edu, avg_math)

## -----------------------------------------------------------------------------
cnt_dist <-
  pisa %>%
  nest(data = c(-year, -CNT)) %>%
  mutate(edu_diff = lapply(data, function(x) perc_dist(x, father_edu, avg_math))) %>%
  select(-data) %>% 
  unnest(edu_diff)

cnt_dist

## -----------------------------------------------------------------------------

cnt_dist %>%
  mutate(year = as.character(year)) %>% 
  filter(percentile %in% seq(0, 100, by = 10)) %>%
  ggplot(aes(percentile, estimate, color = year, group = percentile)) +
  geom_point() +
  geom_line(color = "black") +
  scale_y_continuous(name = "Math test score") +
  scale_x_continuous(name = "Percentiles from father's education") +
  scale_color_discrete(name = "Year") +
  facet_wrap(~ CNT) +
  theme_minimal()

## ----echo = FALSE-------------------------------------------------------------
citation("perccalc")

Try the perccalc package in your browser

Any scripts or data that you put into this service are public.

perccalc documentation built on Dec. 18, 2019, 1:38 a.m.