knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)
library(tidyverse)
library(colleges)

Integrity checks

ncaa %>%
  group_by(Year) %>%
  summarize(N = n(), num_schools = n_distinct(school_name),
            num_bball = sum(!is.na(bball_wins)),
            num_fball = sum(!is.na(fball_wins)),
            num_military = sum(is_military),
            num_public = sum(is_private),
            num_fees = sum(!is.na(comp_fee_out_state)),
            num_act = sum(!is.na(act_composite_75)),
            num_sat = sum(!is.na(sat_75_avg)))
ncaa_summary <- ncaa %>%
  group_by(school_name) %>%
  summarize(num_years = n(), 
            avg_ugrads = mean(ugrads, na.rm = TRUE),
            avg_sat_75 = mean(sat_75_avg, na.rm = TRUE),
            avg_admit_rate = mean(admit_rate, na.rm = TRUE),
            bball_wpct = sum(bball_wins) / sum(bball_wins + bball_losses),
            fball_wpct = sum(fball_wins) / sum(fball_wins + fball_losses),
            bball_revenue = sum(bball_revenue, na.rm = TRUE) / 1e6,
            fball_revenue = sum(fball_revenue, na.rm = TRUE) / 1e6,
            donations = sum(grand_total, na.rm = TRUE) / 1e6,
            athletics_donations = sum(athletics_total, na.rm = TRUE) / 1e6,
            is_private = any(is_private)) %>%
  mutate(bball_rev_per_ugrad = bball_revenue / (num_years * avg_ugrads),
         fball_rev_per_ugrad = fball_revenue / (num_years * avg_ugrads))

Biggest schools

ncaa_summary %>%
  arrange(desc(avg_ugrads))

Basketball schools

ncaa_summary %>%
  arrange(desc(bball_wpct))
ncaa_summary %>%
  select(school_name, avg_ugrads, bball_wpct, bball_revenue, bball_rev_per_ugrad) %>%
  arrange(desc(bball_revenue))

Football schools

ncaa_summary %>%
  arrange(desc(fball_wpct))
ncaa_summary %>%
  arrange(desc(fball_revenue))

Best revenue generators

ncaa_summary %>%
  select(-fball_revenue) %>%
  arrange(desc(bball_rev_per_ugrad))
ncaa_summary %>%
  select(school_name, bball_revenue, bball_rev_per_ugrad, fball_revenue, fball_rev_per_ugrad) %>%
  arrange(desc(fball_rev_per_ugrad))

Best fundraisers

ncaa_summary %>%
  select(school_name, donations, athletics_donations) %>%
  arrange(desc(donations))
ncaa_summary %>%
  select(school_name, donations, athletics_donations) %>%
  arrange(desc(athletics_donations))
ncaa_summary %>%
  select(school_name, donations, athletics_donations) %>%
  mutate(athletic_donation_pct = athletics_donations / donations) %>%
  arrange(desc(athletic_donation_pct))

FBS schools

Private schools

FBS Correlations

fbs_summary <- ncaa_summary %>%
  filter(!is.na(fball_wpct)) 
fbs_summary %>%
  summarize(cor_cross_wpct = cor(bball_wpct, fball_wpct, use = "complete.obs"),
            cor_cross_rev = cor(bball_revenue, fball_revenue),
            cor_bball = cor(bball_wpct, bball_revenue),
            cor_fball = cor(fball_wpct, fball_revenue, use = "complete.obs"),
            cor_size = cor(avg_ugrads, fball_revenue, use = "complete.obs"))
fbs_summary %>%
  filter(is_private)

State funding per capita

ggplot(data = fbs, aes(x = dollars_per_capita)) + 
  geom_density()
ggplot(data = fbs, aes(y = yield, x = log(dollars_per_capita + 1))) + 
  geom_point() +
  facet_wrap(~Year)

Colorado receives $0 in state funding?

Long-term trends

ggplot(data = fbs, aes(x = acad_end_year, y = yield, group = school_name, color = is_private)) + 
  geom_line()

ggplot(data = fbs, aes(x = acad_end_year, y = admit_rate, group = school_name, color = is_private)) + 
  geom_line()
fbs %>%
  group_by(Year) %>%
  summarize(total_applications = sum(applied, na.rm = TRUE), 
            total_admits = sum(admitted, na.rm = TRUE), 
            total_enrolled = sum(enrolled, na.rm = TRUE), 
            cor_sport = cor(bball_wpct, fball_wpct, use = "complete.obs"), 
            sat_25 = mean(sat_25_avg, na.rm = TRUE),
            sat_75 = mean(sat_75_avg, na.rm = TRUE),
#            act_25 = mean(P25ACT, na.rm = TRUE),
            act_75 = mean(act_composite_75, na.rm = TRUE)) %>%
  mutate(admit_rate = total_admits / total_applications, 
         yield = total_enrolled / total_admits)

Top schools

fbs_summary %>%
  arrange(desc(avg_sat_75))
fbs_summary %>%
  arrange(avg_admit_rate)
fbs %>%
  mutate(sat_range = cut(admit_rate, breaks = 5)) %>%
  ggplot(aes(x = acad_end_year, y = sat_75_avg, color = is_private)) + 
    geom_line(aes(group = school_name)) + 
    facet_wrap(~sat_range, scales = "free_y")

Power Five Conference schools

power5 %>%
  group_by(school_name) %>%
  summarize(num_years = n(), num_confs = n_distinct(bb_conf), conf = last(bb_conf)) %>%
  arrange(conf, school_name) %>%
  print(n = Inf)

Modeling

mod <- lm(admit_rate ~ Year + school_name, data = fbs)
summary(mod)


beanumber/colleges documentation built on May 17, 2019, 5:47 p.m.