README.md

The California Department of Education in R

The California Department of Education provides ample data. Now, that data is available in R.

Installation

devtools::install_github("daranzolin/rCAEDDATA")
library(rCAEDDATA)

Available Datasets

Examples

Graduates

library(rCAEDDATA)
library(tidyverse)
#> Loading tidyverse: ggplot2
#> Loading tidyverse: tibble
#> Loading tidyverse: tidyr
#> Loading tidyverse: readr
#> Loading tidyverse: purrr
#> Loading tidyverse: dplyr
#> Conflicts with tidy packages ----------------------------------------------
#> filter(): dplyr, stats
#> lag():    dplyr, stats
data("graduates")
graduates %>% 
  group_by(YEAR) %>% 
  summarize(total_grads = sum(GRADS),
            Yes = sum(UC_GRADS),
            No = total_grads - Yes) %>%
  select(-total_grads) %>% 
  gather(Eligibility, Graduates, -YEAR) %>% 
  ggplot(aes(YEAR, Graduates, fill = Eligibility)) +
  geom_bar(stat = "identity", color = "black") +
  labs(x = "Year",
       y = "Graduates",
       title = "California High School Graduates, 1992-2016",
       fill = "UC Eligible?") +
  scale_y_continuous(labels = scales::comma) +
  scale_fill_manual(values = c("yellow", "lightblue")) +
  theme_minimal() 

Dropouts

data("dropouts")
dropouts %>% 
  select(GENDER, matches("D[0-9]{1,2}")) %>% 
  gather(GRADE, DROPOUTS, -GENDER) %>% 
  mutate(GRADE = as.numeric(stringr::str_replace(GRADE, "D", ""))) %>% 
  group_by(GENDER, GRADE) %>% 
  summarize(DROPOUTS = sum(DROPOUTS)) %>% 
  ggplot(aes(GRADE, DROPOUTS, fill = GENDER)) +
  geom_bar(stat = "identity", position = "fill") +
  scale_x_continuous(breaks = c(7:12)) +
  labs(x = "Grade",
       y = "",
       title = "Proportion of Student Dropouts by Gender, Grades 7-12",
       fill = "Gender") +
  theme_minimal()

Enrollments

enrollments %>% 
  mutate(ETHNIC = case_when(
    ETHNIC == 0 ~ "Not Reported",
    ETHNIC == 1 ~ "American Indian",
    ETHNIC == 2 ~ "Asian",
    ETHNIC == 3 ~ "Pacific Islander",
    ETHNIC == 4 ~ "Filipino",
    ETHNIC == 5 ~ "Hispanic",
    ETHNIC == 6 ~ "African American",
    ETHNIC == 7 ~ "White",
    ETHNIC == 9 | ETHNIC == 8 ~ "Two or More")
    ) %>% 
  filter(DISTRICT %in% c("Santa Clara Unified",
                         "Milpitas Unified",
                         "San Jose Unified",
                         "Fremont Union High",
                         "Mountain View-Los Altos Union High",
                         "Cupertino Union",
                         "Campbell Union", 
                         "Cambrian",
                         "Palo Alto Unified")
  ) %>% 
  select(DISTRICT, YEAR, ETHNIC, starts_with("GR_")) %>% 
  gather(GRADE, STUDENTS, -DISTRICT, -YEAR, -ETHNIC) %>% 
  group_by(DISTRICT, YEAR, ETHNIC) %>% 
  summarize(TOTAL_STUDENTS = sum(STUDENTS)) %>% 
  ggplot(aes(YEAR, TOTAL_STUDENTS, fill = ETHNIC)) +
  geom_bar(stat = "identity", position = "fill") +
  facet_wrap(~DISTRICT, nrow = 3) +
  labs(x = "Year",
       y = "",
       title = "Ethnic Diversity in Silicon Valley, 2007-2017",
       subtitle = "Santa Clara Districts",
       fill = "Ethnicity") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Suspensions

library(maps)
#> 
#> Attaching package: 'maps'
#> The following object is masked from 'package:purrr':
#> 
#>     map
library(ggmap)
library(mapdata)
states <- map_data("state")
ca_df <- subset(states, region == "california")
counties <- map_data("county")
ca_county <- subset(counties, region == "california")

drug_data <- suspensions %>% 
  filter(YEAR == "2014-15",
         AGGEGATELEVEL == "O") %>% 
  group_by(NAME) %>% 
  summarize(TOTAL_DRUGS = sum(DRUGS, na.rm = TRUE),
            TOTAL = sum(TOTAL, na.rm = TRUE),
            DRUG_PROP = round(TOTAL_DRUGS/TOTAL, 2))

map_data <- left_join(ca_county, drug_data %>% 
                        mutate(subregion = stringr::str_to_lower(NAME)), 
                      by = "subregion")

ggplot(data = ca_df, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
  geom_polygon(color = "black", fill = "gray") +
  geom_polygon(data = map_data, aes(fill = DRUG_PROP), color = "white") +
  geom_polygon(color = "black", fill = NA) +
  labs(title = "Proportion of Drugs-Related Suspensions by County, 2014-2015",
       fill = "Proportion") +
  theme_void() +
  viridis::scale_fill_viridis()



daranzolin/rCAEDDATA documentation built on May 7, 2019, 2:54 a.m.