knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "README-" )
The California Department of Education provides ample data. Now, that data is available in R.
devtools::install_github("daranzolin/rCAEDDATA") library(rCAEDDATA)
Cohort Outcome Data ("cohorts") -- California Longitudinal Pupil Achievement Data System (CALPADS) cohort outcome data reported by race/ethnicity, program participation, and gender.
Dropouts by Race and Gender ("dropouts") -- Data for grade seven through twelve dropouts and enrollment by race/ethnic designation and gender by school.
English Learners by Grade and Language ("english_learners") -- Data for English learners (ELs) by grade, language, and school.
Enrollment by School ("enrollments") -- Data for school-level enrollment by racial/ethnic designation, gender, and grade.
Student Poverty FRPM ("frpm") -- Data for students eligible for Free or Reduced Price Meals (FRPM).
Graduates by Race and Gender ("graduates") -- Data for graduates and graduates meeting University of California (UC)/California State University (CSU) entrance requirements by race/ethnic designation and gender by school.
Primary and Short-Term Enrollment ("primary_and_short_term") -- Data for primary and short-term school-level enrollment by racial/ethnic designation, gender, and grade.
Expulsion and Suspension Data -- Data containing student discipline data by ethnicity. Expulsion, in-school suspension, and out-of-school suspension data are provided.
Truancy -- Data containing aggregate truancy data at the state, county, district, and school levels, including Census Day enrollment, cumulative enrollment, and rates.
library(rCAEDDATA) library(tidyverse) data("graduates") graduates %>% group_by(YEAR) %>% summarize(total_grads = sum(GRADS), Yes = sum(UC_GRADS), No = total_grads - Yes) %>% select(-total_grads) %>% gather(Eligibility, Graduates, -YEAR) %>% ggplot(aes(YEAR, Graduates, fill = Eligibility)) + geom_bar(stat = "identity", color = "black") + labs(x = "Year", y = "Graduates", title = "California High School Graduates, 1992-2016", fill = "UC Eligible?") + scale_y_continuous(labels = scales::comma) + scale_fill_manual(values = c("yellow", "lightblue")) + theme_minimal()
data("dropouts") dropouts %>% select(GENDER, matches("D[0-9]{1,2}")) %>% gather(GRADE, DROPOUTS, -GENDER) %>% mutate(GRADE = as.numeric(stringr::str_replace(GRADE, "D", ""))) %>% group_by(GENDER, GRADE) %>% summarize(DROPOUTS = sum(DROPOUTS)) %>% ggplot(aes(GRADE, DROPOUTS, fill = GENDER)) + geom_bar(stat = "identity", position = "fill") + scale_x_continuous(breaks = c(7:12)) + labs(x = "Grade", y = "", title = "Proportion of Student Dropouts by Gender, Grades 7-12", fill = "Gender") + theme_minimal()
enrollments %>% mutate(ETHNIC = case_when( ETHNIC == 0 ~ "Not Reported", ETHNIC == 1 ~ "American Indian", ETHNIC == 2 ~ "Asian", ETHNIC == 3 ~ "Pacific Islander", ETHNIC == 4 ~ "Filipino", ETHNIC == 5 ~ "Hispanic", ETHNIC == 6 ~ "African American", ETHNIC == 7 ~ "White", ETHNIC == 9 | ETHNIC == 8 ~ "Two or More") ) %>% filter(DISTRICT %in% c("Santa Clara Unified", "Milpitas Unified", "San Jose Unified", "Fremont Union High", "Mountain View-Los Altos Union High", "Cupertino Union", "Campbell Union", "Cambrian", "Palo Alto Unified") ) %>% select(DISTRICT, YEAR, ETHNIC, starts_with("GR_")) %>% gather(GRADE, STUDENTS, -DISTRICT, -YEAR, -ETHNIC) %>% group_by(DISTRICT, YEAR, ETHNIC) %>% summarize(TOTAL_STUDENTS = sum(STUDENTS)) %>% ggplot(aes(YEAR, TOTAL_STUDENTS, fill = ETHNIC)) + geom_bar(stat = "identity", position = "fill") + facet_wrap(~DISTRICT, nrow = 3) + labs(x = "Year", y = "", title = "Ethnic Diversity in Silicon Valley, 2007-2017", subtitle = "Santa Clara Districts", fill = "Ethnicity") + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1))
library(maps) library(ggmap) library(mapdata) states <- map_data("state") ca_df <- subset(states, region == "california") counties <- map_data("county") ca_county <- subset(counties, region == "california") drug_data <- suspensions %>% filter(YEAR == "2014-15", AGGEGATELEVEL == "O") %>% group_by(NAME) %>% summarize(TOTAL_DRUGS = sum(DRUGS, na.rm = TRUE), TOTAL = sum(TOTAL, na.rm = TRUE), DRUG_PROP = round(TOTAL_DRUGS/TOTAL, 2)) map_data <- left_join(ca_county, drug_data %>% mutate(subregion = stringr::str_to_lower(NAME)), by = "subregion") ggplot(data = ca_df, mapping = aes(x = long, y = lat, group = group)) + coord_fixed(1.3) + geom_polygon(color = "black", fill = "gray") + geom_polygon(data = map_data, aes(fill = DRUG_PROP), color = "white") + geom_polygon(color = "black", fill = NA) + labs(title = "Proportion of Drugs-Related Suspensions by County, 2014-2015", fill = "Proportion") + theme_void() + viridis::scale_fill_viridis()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.