knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "README-files/", cache = TRUE, message = FALSE, warning = FALSE, fig.height = 10 )
scorecard
The scorecard
package includes processed datasets from the College Scorecard, 1996-2017.
The Scorecard datasets are imbalance panels at the colleges-by-school-year level. The data was last updated in 2019. See the changelog for more details.
The following datasets are available:
scorecard::mf1996_97
scorecard::mf1997_98
scorecard::mf1998_99
scorecard::mf1999_00
scorecard::mf2000_01
scorecard::mf2001_02
scorecard::mf2002_03
scorecard::mf2003_04
scorecard::mf2004_05
scorecard::mf2005_06
scorecard::mf2006_07
scorecard::mf2007_08
scorecard::mf2008_09
scorecard::mf2009_10
scorecard::mf2010_11
scorecard::mf2011_12
scorecard::mf2012_13
scorecard::mf2013_14
scorecard::mf2014_15
scorecard::mf2015_16
scorecard::mf2016_17
scorecard::mf2017_18
scorecard::codebook
scorecard::cohort_map
The following functions are implemented:
scorecard::attach_var_label()
: Assign variable labels.scorecard::attach_val_label()
: Assign value labels for certain variablesBenjamin Skinner has created a wonderful R client rscorecard
for the College Scorecard GET API. If you're interested in
getting some specific variables quickly, I suggest using the rscorecard
package.
You can also download the datasets as an R package. It might take a while to install and load into memory. To download the most recent stable release, use
# install.packages("devtools") devtools::install_github("jjchern/scorecard@v0.4.0") # To uninstall the package, use: # remove.packages("scorecard")
All datasets are tibbles:
library(tidyverse) scorecard::mf2014_15
All the datasets have variable labels attached, which can be viewed in RStudio's Data Viewer:
View(scorecard::mf2014_15)
You can also use the labelled
package:
scorecard::mf2014_15 %>% select(1:8) %>% labelled::var_label()
Or work with the codebook directly:
## Show variable labels scorecard::codebook %>% select(var_name, var_label) ## Build a small function to shown value labels show_val_label = . %>% { filter(scorecard::codebook, var_name == .) %>% mutate(val_label = glue::glue("{val_label} = {value}")) %>% distinct(val_label)} ## Show value labels: show_val_label("curroper") show_val_label("distanceonly")
vars = c("mf_year", "iclevel", "control", "tuitionfee_in") scorecard::codebook %>% select(var_name, var_label, value, val_label) %>% filter(var_name %in% vars) %>% knitr::kable() dplyr_seq = . %>% select(one_of(vars)) %>% haven::as_factor() %>% filter(iclevel %in% c("4-year", "2-year")) %>% mutate(year = mf_year %>% parse_number() %>% as.factor()) %>% group_by(iclevel, control) %>% mutate_at(c("tuitionfee_in"), ~statar::winsorise(., probs = c(0.02, 0.98), verbose = FALSE)) %>% ungroup() ## Test the functional sequence scorecard::mf2014_15 %>% dplyr_seq() bind_rows( scorecard::mf2017_18 %>% dplyr_seq(), scorecard::mf2016_17 %>% dplyr_seq(), scorecard::mf2015_16 %>% dplyr_seq(), scorecard::mf2014_15 %>% dplyr_seq(), scorecard::mf2013_14 %>% dplyr_seq(), scorecard::mf2012_13 %>% dplyr_seq(), scorecard::mf2011_12 %>% dplyr_seq(), scorecard::mf2010_11 %>% dplyr_seq(), scorecard::mf2009_10 %>% dplyr_seq(), scorecard::mf2008_09 %>% dplyr_seq(), scorecard::mf2007_08 %>% dplyr_seq(), scorecard::mf2006_07 %>% dplyr_seq(), scorecard::mf2005_06 %>% dplyr_seq(), scorecard::mf2004_05 %>% dplyr_seq(), scorecard::mf2003_04 %>% dplyr_seq(), scorecard::mf2002_03 %>% dplyr_seq(), scorecard::mf2001_02 %>% dplyr_seq(), scorecard::mf2000_01 %>% dplyr_seq() ) -> df df %>% ggplot(aes(x = tuitionfee_in, y = year, fill = iclevel)) + ggjoy::geom_joy(scale = 2, alpha = .8, colour = "white") + ggjoy::theme_joy() + facet_grid(iclevel~control, scales = "free") + labs(x = NULL, y = NULL, title = "In-State Tuition and Fees, 2000-2017") + scale_x_continuous(labels = scales::dollar) + scale_y_discrete(breaks = seq(2017, 2000, -3), expand = c(0.01, 0)) + theme(axis.text = element_text(size = 8), legend.position = "none")
vars = c("mf_year", "iclevel", "control", "tuitionfee_in", "tuitionfee_out") dplyr_seq = . %>% select(one_of(vars)) %>% haven::as_factor() %>% filter(iclevel %in% c("4-year", "2-year")) %>% filter(control == "Public") %>% mutate(type = paste(control, iclevel)) %>% mutate(year = mf_year %>% parse_number() %>% as.factor()) %>% group_by(type) %>% mutate_at(c("tuitionfee_in", "tuitionfee_out"), ~statar::winsorise(., probs = c(0.02, 0.98), verbose = FALSE)) %>% ungroup() %>% gather(in_or_out, tuitionfee, tuitionfee_in:tuitionfee_out) %>% mutate(in_or_out = if_else(in_or_out == "tuitionfee_in", "In-state tuition and fees", "Out-of-state tuition and fees")) ## Test the functional sequence ## scorecard::mf2014_15 %>% dplyr_seq() bind_rows( scorecard::mf2017_18 %>% dplyr_seq(), scorecard::mf2016_17 %>% dplyr_seq(), scorecard::mf2015_16 %>% dplyr_seq(), scorecard::mf2014_15 %>% dplyr_seq(), scorecard::mf2013_14 %>% dplyr_seq(), scorecard::mf2012_13 %>% dplyr_seq(), scorecard::mf2011_12 %>% dplyr_seq(), scorecard::mf2010_11 %>% dplyr_seq(), scorecard::mf2009_10 %>% dplyr_seq(), scorecard::mf2008_09 %>% dplyr_seq(), scorecard::mf2007_08 %>% dplyr_seq(), scorecard::mf2006_07 %>% dplyr_seq(), scorecard::mf2005_06 %>% dplyr_seq(), scorecard::mf2004_05 %>% dplyr_seq(), scorecard::mf2003_04 %>% dplyr_seq(), scorecard::mf2002_03 %>% dplyr_seq(), scorecard::mf2001_02 %>% dplyr_seq(), scorecard::mf2000_01 %>% dplyr_seq() ) -> df df %>% ggplot(aes(x = tuitionfee, y = year, fill = in_or_out)) + ggjoy::geom_joy(scale = 2, alpha = .8, colour = "white") + ggjoy::theme_joy() + facet_wrap(~type, scales = "free") + labs(x = NULL, y = NULL, title = "In-State Vs. Out-of-State Tuition and Fees for Public Colleges", caption = "Source: College Scorecard, 2000-2017") + scale_x_continuous(labels = scales::dollar) + scale_y_discrete(breaks = seq(2017, 2000, -3), expand = c(0.01, 0)) + theme(axis.text = element_text(size = 9), legend.position = "top", legend.title = element_blank(), legend.justification = "center")
scorecard::mf2016_17 %>% select(instnm, control, iclevel, age_entry) %>% haven::as_factor() %>% filter(iclevel == "2-year") %>% mutate(age_entry = as.numeric(age_entry)) %>% ggplot(aes(x = age_entry)) + geom_histogram(fill = "white", colour = "black") + hrbrthemes::theme_ipsum_rc() + labs(x = "Average Age of Entry", y = "Count", title = "Distribution of Average Age of Entry", subtitle = "for 2-Year Colleges", caption = "Source: College Scorecard, 2016-17.") scorecard::mf2016_17 %>% select(instnm, control, iclevel, age_entry) %>% haven::as_factor() %>% mutate(age_entry = as.numeric(age_entry)) %>% ggplot(aes(x = age_entry)) + # geom_freqpoly() + geom_histogram(fill = "white", colour = "black") + facet_grid(control ~ iclevel) + hrbrthemes::theme_ipsum() + labs(x = "Average Age of Entry", y = "Count", title = "Distribution of Average Age of Entry", subtitle = "by Control and Level of Institution", caption = "Source: College Scorecard, 2016-17.")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.