knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width=12, fig.height=6 )
library(nfl4th) library(tidyverse) library(ggplot2) library(ggtext) library(DescTools) library(ggthemes) library(gt) library(magick) library(cowplot) library(nflplotR) library(patchwork) # basic helper to use 538 theme and make axes look better theme_ben <- theme_fivethirtyeight() + theme( legend.position = "none", plot.title = element_markdown(size = 18, hjust = 0.5), plot.subtitle = element_markdown(size = 10, hjust = 0.5), axis.title.x = element_text(size=12, face="bold"), axis.title.y = element_text(size=12, face="bold") ) # helper function to not make every table have so many lines of code add_gt_options <- function(gt_data) { gt_data %>% tab_options( row_group.border.top.width = px(3), row_group.border.top.color = "black", row_group.border.bottom.color = "black", table_body.hlines.color = "white", table.border.top.color = "black", table.border.top.width = px(1), table.border.bottom.color = "white", table.border.bottom.width = px(1), column_labels.border.bottom.color = "black", column_labels.border.bottom.width = px(2), row.striping.background_color = '#FFFFFF', row.striping.include_table_body = TRUE, table.background.color = '#F2F2F2', data_row.padding = gt::px(2), table.font.size = gt::px(16L) ) %>% return() }
nfl4th
We begin by demonstrating the use of nfl4th
to load all seasons for which 4th down calculations are available in one line of code.
pbp <- nfl4th::load_4th_pbp(2014:2022, fast = T) %>% filter(down == 4)
The key columns generated by the main nfl4th
function, load_4th_pbp()
, are go_boost
, which gives the predicted gain (or loss, when negative) in win probability associated with going for it, relative to the next-best alternative (whether kicking a field goal or punting), and go
, which is an indicator for whether the team went for it on a given play. Note that go_boost
and go
are measured in percentage points (i.e., 0 to 100) in order to make creating figures like the following easier. This means that the values for go
are either 0
or 100
in every row.
Let's make a plot that shows that teams' likelihood of going for it in 2020 versus how large the predicted benefit would be.
pbp %>% dplyr::filter(season == 2020) %>% dplyr::mutate(go_boost = DescTools::RoundTo(go_boost, 0.5)) %>% dplyr::group_by(go_boost) %>% dplyr::summarize(go = mean(go)) %>% dplyr::ungroup() %>% dplyr::filter(between(go_boost, -10, 10)) %>% dplyr::mutate( should_go = dplyr::case_when( go_boost > .5 ~ 1, go_boost < -.5 ~ 0, TRUE ~ 2) ) %>% ggplot(aes(go_boost, go, color = as.factor(should_go))) + geom_point(size = 5, color = "black", alpha = .5) + geom_vline(xintercept = 0) + geom_smooth(method = "lm", show.legend = F, se = F, size = 3)+ theme_ben + labs(x = "Gain in win probability by going for it (nfl4th)", y = "Go-for-it percentage", title = glue::glue("NFL Go-for-it Rate on <span style='color:red'>4th down</span>") ) + scale_y_continuous(breaks = scales::pretty_breaks(n = 10), expand = c(0,2)) + scale_x_continuous(breaks = scales::pretty_breaks(n = 20), limits = c(-10, 10), expand = c(0,0)) + annotate("text",x=-4, y= 90, label = "Should\nkick", color="red", size = 5) + annotate("text",x=3, y= 90, label = "Should\ngo for it", color="red", size = 5) + annotate("label",x=-6, y= 15, label = "Teams almost always kick\nwhen they should...", size = 5) + annotate("label",x=6, y= 25, label = "...but frequently\n kick when they\nshould go for it", size = 5)
Here's how to create one of the tables shown in the piece on The Athletic:
pbp %>% dplyr::filter(season == 2020, !is.na(go_boost), !is.na(go)) %>% dplyr::mutate(type = dplyr::case_when( go_boost >= 4 ~ "Definitely go for it", go_boost > 1 & go_boost < 4 ~ "Probably go for it", go_boost >= -1 & go_boost <= 1 ~ "Toss-up", go_boost < -1 & go_boost > -4 ~ "Probably kick", go_boost <= -4 ~ "Definitely kick" )) %>% dplyr::group_by(type) %>% dplyr::summarize(go = mean(go), n = dplyr::n()) %>% dplyr::ungroup() %>% dplyr::arrange(-go) %>% gt::gt() %>% gt::cols_label( type = "Recommendation", go = "Went for it %", n = "Plays" ) %>% gt::tab_style( style = gt::cell_text(color = "black", weight = "bold"), locations = list( gt::cells_column_labels(dplyr::everything()) ) ) %>% add_gt_options() %>% gt::fmt_number(columns = dplyr::vars(go), decimals = 0) %>% gt::cols_align( columns = 2:3, align = "center" ) %>% gt::tab_header(title = "NFL team decision-making by go recommendation, 2020") %>% gt::tab_source_note(gt::md('**Notes**: "Definitely" recommendations are greater than 4 percentage point advantage,<br> "probably" 1-4 percentage points'))
Thus, we can see that the model is strongly aligned to what coaches do.
Here are the worst instances of not going for it (and instead punting or kicking a field goal) in terms of total expected win probability lost.
2020's champion is Kliff Kingsbury.
pbp %>% filter( season == 2020, go == 0, # they tried to go for it so throw this play out !(posteam == "ARI" & week == 3 & play_id == 2364) ) %>% arrange(-go_boost) %>% mutate(rank = 1 : n()) %>% head(10) %>% select(rank, posteam, defteam, week, qtr, ydstogo, score_differential, go_boost, desc) %>% gt() %>% cols_label( rank = "", posteam = "Team", defteam = "Opp", week = "Week", qtr = "Qtr", ydstogo = "YTG", score_differential = "Diff", desc = "Play", go_boost = "WP loss" ) %>% tab_style( style = cell_text(color = "black", weight = "bold"), locations = list(cells_column_labels(everything())) ) %>% text_transform( locations = cells_body(vars(posteam, defteam)), fn = function(x) web_image(url = paste0('https://a.espncdn.com/i/teamlogos/nfl/500/',x,'.png')) ) %>% cols_width(everything() ~ px(400)) %>% cols_width( vars(rank) ~ px(30), vars(go_boost) ~ px(80), vars(posteam, defteam, week, score_differential, qtr, ydstogo) ~ px(50) ) %>% add_gt_options() %>% fmt_number(columns = vars(go_boost), decimals = 1) %>% cols_align(columns = 1:8, align = "center") %>% tab_header(title = "Worst kick decisions of 2020")
nfl4th
recommendations# labels on the plot text_df <- tibble( label = c("NFL coaches<br>in <span style='color:#00BFC4'>**2020**</span>", "NFL coaches<br>in <span style='color:#F8766D'>**2014**</span>"), x = c(6, 8.2), y = c(80, 37), angle = c(10, 10), color = c("black", "black") ) pbp %>% filter(vegas_wp > .2, between(go_boost, -10, 10), season %in% c(2014, 2020)) %>% ggplot(aes(go_boost, go, color = as.factor(season))) + geom_richtext(data = text_df, aes(x, y, label = label, angle = angle), color = "black", fill = NA, label.color = NA, size = 5) + geom_vline(xintercept = 0) + stat_smooth(method = "gam", method.args = list(gamma = 1), formula = y ~ s(x, bs = "cs", k = 10), show.legend = F, se = F, size = 4) + # this is just to get the plot to draw the full 0 to 100 range geom_hline(yintercept = 100, alpha = 0) + geom_hline(yintercept = 0, alpha = 0) + theme_fivethirtyeight()+ labs(x = "Gain in win probability by going for it", y = "Go-for-it percentage", subtitle = "4th down decisions in 2020 versus 2014, win prob. > 20%", title = glue::glue("How <span style='color:red'>math</span> is changing football")) + theme( legend.position = "none", plot.title = element_markdown(size = 22, hjust = 0.5), plot.subtitle = element_markdown(size = 14, hjust = 0.5), axis.title.x = element_text(size = 14, face="bold"), axis.title.y = element_text(size = 14, face="bold") ) + scale_y_continuous(breaks = scales::pretty_breaks(n = 4), expand = c(0,0)) + scale_x_continuous(breaks = scales::pretty_breaks(n = 10), limits = c(-10, 10), expand = c(0,0)) + annotate("text", x= -1.2, y= 70, label = "Should\nkick", color="black", size = 5) + annotate("text", x= 1.2, y= 70, label = "Should\ngo for it", color="black", size = 5) + geom_segment( aes(x = -.1, y = 80, xend = -2, yend = 80), arrow = arrow(length = unit(0.05, "npc")), color = "black", size = 2 ) + geom_segment( aes(x = .1, y = 80, xend = 2, yend = 80), arrow = arrow(length = unit(0.05, "npc")), color = "black", size = 2 )
For the remainder of this page, the code that generates the tables and figures won't be shown in order to make it more readable, but all of the code can be viewed here.
nfl4th
and New York Times model recommendationsThe recommendations from nfl4th
are somewhat more aggressive on than New York Times. I think this is mostly because NYT assumed that a successful 4th down conversion would gain exactly the necessary yards to go and nothing more, resulting in an under-estimate of the benefit of going for 4th down. Let's compare the recommendations using the numbers in this NYT article.
plot <- pbp %>% filter(!is.na(go_wp)) %>% mutate( punt_prob = if_else(is.na(punt_wp), 0, punt_wp), ydstogo = ifelse(ydstogo > 10, 10, ydstogo), decision = case_when( punt_prob > fg_wp & punt_prob > go_wp ~ "Punt", fg_wp > punt_prob & fg_wp > go_wp ~ "Field goal", go_wp > punt_prob & go_wp > fg_wp ~ "Go for it", TRUE ~ NA_character_ ), # round to nearest 5 binned_yardline = 5 * round(yardline_100 / 5) ) %>% select(binned_yardline, yardline_100, ydstogo, go_boost, decision, vegas_wp, score_differential, qtr, posteam, home_team, spread_line) plot_prepare <- function(df) { df %>% # for getting percent of decisions for alpha in some plots group_by(binned_yardline, ydstogo) %>% mutate(tot_n = n()) %>% ungroup() %>% group_by(binned_yardline, ydstogo, decision) %>% summarize(n = n(), tot_n = dplyr::first(tot_n), pct = n / tot_n) %>% group_by(binned_yardline, ydstogo) %>% arrange(binned_yardline, ydstogo, -n) %>% dplyr::slice(1) %>% # for the charts: if you've been told to punt at 4th & X # you should also be told to punt from that yardline at 4th & X and longer # a better alternative would be some sort of smoother or picking a given game # but ain't nobody got time for that # and same with FGs group_by(binned_yardline) %>% mutate( has_punted = cumsum(decision == "Punt"), has_kicked = cumsum(decision == "Field goal"), decision = case_when( has_punted > 0 ~ "Punt", has_kicked > 0 ~ "Field goal", TRUE ~ decision ) ) %>% # if you've been told to punt on 4th & X from a given yardline # you should also be told to punt on 4th & X at a longer yardline group_by(ydstogo) %>% mutate( has_punted = cumsum(decision == "Punt"), decision = ifelse(has_punted > 0, "Punt", decision) ) %>% ungroup() %>% return() }
fig1 <- plot %>% plot_prepare() %>% ggplot(aes(binned_yardline, ydstogo, fill = decision)) + geom_tile(aes(binned_yardline, ydstogo, width = 4.5, height = .95), alpha = 0.75) + scale_y_reverse(breaks = scales::pretty_breaks(n = 10), expand = c(0,0)) + scale_x_reverse(breaks = scales::pretty_breaks(n = 10), expand = c(0,0)) + theme_fivethirtyeight() + theme( plot.margin = margin(1, 1, 1, 1, "cm"), legend.position = "none", plot.title = element_markdown(size = 16, hjust = 0.5), plot.subtitle = element_markdown(size = 10, hjust = 0.5), axis.title.x = element_text(size=12, face="bold"), axis.title.y = element_text(size=12, face="bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) + labs(x = "Distance to opponent end zone", y = "Yards to go", title = "nfl4th") + scale_fill_brewer(palette="Dark2") + annotate("text",x=50, y= 2, label = "Go for it", size = 6) + annotate("text",x=80, y= 7, label = "Punt", size = 6) + annotate("text",x=20, y= 7, label = "Field goal", size = 6)
fig2 <- plot %>% mutate( nyt_go = if_else( ydstogo==1 | (ydstogo==2 & yardline_100<=72) | (ydstogo==3 & yardline_100<=60 & yardline_100 >= 20) | (ydstogo==3 & yardline_100<=4) | (ydstogo==4 & yardline_100<=55 & yardline_100>=29) | (ydstogo==5 & yardline_100<=50 & yardline_100>=33) | (ydstogo==6 & yardline_100<=47 & yardline_100>=35) | (ydstogo==7 & yardline_100<=44 & yardline_100>=36) | (ydstogo==8 & yardline_100<=41 & yardline_100>=37) | (ydstogo==9 & yardline_100<=38 & yardline_100>=38), 1, 0 ), decision = case_when( nyt_go == 1 ~ "Go for it", ydstogo == 2 & nyt_go == 0 ~ "Punt", ydstogo == 3 & nyt_go == 0 & yardline_100 > 62 ~ "Punt", ydstogo == 4 & nyt_go == 0 & yardline_100 > 62 ~ "Punt", ydstogo == 5 & nyt_go == 0 & yardline_100 > 62 ~ "Punt", ydstogo == 6 & nyt_go == 0 & yardline_100 > 62 ~ "Punt", ydstogo == 7 & nyt_go == 0 & yardline_100 > 62 ~ "Punt", ydstogo == 8 & nyt_go == 0 & yardline_100 > 62 ~ "Punt", ydstogo == 9 & nyt_go == 0 & yardline_100 > 62 ~ "Punt", ydstogo == 10 & nyt_go == 0 & yardline_100 > 62 ~ "Punt", nyt_go == 0 & yardline_100 > 38 ~ "Punt", TRUE ~ "Field goal" ), binned_yardline = 5 * round(yardline_100 / 5) ) %>% select(binned_yardline, ydstogo, decision) %>% group_by(binned_yardline, ydstogo, decision) %>% summarize(n = n()) %>% group_by(binned_yardline, ydstogo) %>% arrange(binned_yardline, ydstogo, -n) %>% dplyr::slice(1) %>% ungroup() %>% ggplot(aes(binned_yardline, ydstogo, fill = decision)) + geom_tile(aes(binned_yardline, ydstogo, width = 4.5, height = .95), alpha = .75) + scale_y_reverse(breaks = scales::pretty_breaks(n = 10), expand = c(0,0)) + scale_x_reverse(breaks = scales::pretty_breaks(n = 10), expand = c(0,0)) + theme_fivethirtyeight() + theme( plot.margin = margin(1, 1, 1, 1, "cm"), legend.position = "none", plot.title = element_markdown(size = 16, hjust = 0.5), plot.subtitle = element_markdown(size = 10, hjust = 0.5), axis.title.x = element_text(size=12, face="bold"), axis.title.y = element_text(size=12, face="bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) + labs(x = "Distance to opponent end zone", y = "Yards to go", title = "NYT 4th down bot") + scale_fill_brewer(palette="Dark2") + annotate("text",x=50, y= 2, label = "Go for it", size = 6) + annotate("text",x=80, y= 7, label = "Punt", size = 6) + annotate("text",x=20, y= 7, label = "Field goal", size = 6)
p <- fig1 + fig2 + plot_annotation(title = '4th down model comparison', theme = theme(plot.title = element_text(size = 16, hjust = 0.5, face="bold")) ) & theme_fivethirtyeight() & theme( plot.margin = margin(0, 0, 0, 0, "cm"), legend.position = "none", plot.title = element_markdown(size = 16, hjust = 0.5), axis.title.x = element_text(size=12, face="bold"), axis.title.y = element_text(size=12, face="bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) p # for getting a nice figure # ggsave("tmp.png", height = 5, width = 10)
While nfl4th
and NYT agree that teams should always go for 4th & 1, nfl4th
also thinks this is the case for 4th-and-2 and even most 4th-and-3s. However, note that this is an oversimplification: the nfl4th
plot on the left shows the most frequent recommendation at a given location, but the recommendations can change given game state. For example, it will recommend that trailing teams play more aggressively, as shown below.
nfl4th
recommendations while leading and trailingThe model makes very different recommendations based on game situations. Teams that are underdogs or are trailing are told to act more aggressively to get back in the game:
fig1 <- plot %>% filter( score_differential < -7 ) %>% plot_prepare() %>% ggplot(aes(binned_yardline, ydstogo, fill = decision)) + geom_tile(aes(binned_yardline, ydstogo, width = 4.5, height = .95, alpha = pct)) + scale_y_reverse(breaks = scales::pretty_breaks(n = 10), expand = c(0,0)) + scale_x_reverse(breaks = scales::pretty_breaks(n = 10), expand = c(0,0)) + theme_fivethirtyeight() + theme( plot.margin = margin(0, 0, 0, 0, "cm"), legend.position = "none", plot.title = element_markdown(size = 12, hjust = 0.5), plot.subtitle = element_markdown(size = 10, hjust = 0.5), axis.title.x = element_text(size=12, face="bold"), axis.title.y = element_text(size=12, face="bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) + labs(x = "Distance to opponent end zone", y = "Yards to go", title = "Trailing by 8+") + scale_fill_brewer(palette="Dark2") + annotate("text",x=50, y= 2, label = "Go for it", size = 6) + annotate("text",x=80, y= 7, label = "Punt", size = 6) + annotate("text",x=20, y= 7, label = "Field goal", size = 6)
fig2 <- plot %>% filter( score_differential > 7 ) %>% plot_prepare() %>% ggplot(aes(binned_yardline, ydstogo, fill = decision)) + geom_tile(aes(binned_yardline, ydstogo, width = 4.5, height = .95, alpha = pct)) + scale_y_reverse(breaks = scales::pretty_breaks(n = 10), expand = c(0,0)) + scale_x_reverse(breaks = scales::pretty_breaks(n = 10), expand = c(0,0)) + theme_fivethirtyeight() + theme( plot.margin = margin(0, 0, 0, 0, "cm"), legend.position = "none", plot.title = element_markdown(size = 12, hjust = 0.5), plot.subtitle = element_markdown(size = 10, hjust = 0.5), axis.title.x = element_text(size=12, face="bold"), axis.title.y = element_text(size=12, face="bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) + labs(x = "Distance to opponent end zone", y = "Yards to go", title = "Leading by 8+") + scale_fill_brewer(palette="Dark2") + annotate("text",x=50, y= 2, label = "Go for it", size = 6) + annotate("text",x=80, y= 7, label = "Punt", size = 6) + annotate("text",x=20, y= 7, label = "Field goal", size = 6)
p <- fig1 + fig2 + plot_annotation(title = '4th down recommendations by situation', caption = "Darker = more frequent recommendation", theme = theme(plot.title = element_text(size = 16, hjust = 0.5, face="bold")) ) & theme_fivethirtyeight() & theme( plot.margin = margin(0, 0, 0, 0, "cm"), legend.position = "none", plot.title = element_markdown(size = 16, hjust = 0.5), axis.title.x = element_text(size=12, face="bold"), axis.title.y = element_text(size=12, face="bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) p # for getting a nice figure # ggsave("tmp.png", height = 5, width = 10)
nfl4th
recommendations given Vegas lineHere is the use of a function that takes in a point spread and generates a recommendation chart based on previous situations involving teams favored or not favored by a similar number of points. Here is what the recommendations look like for teams that are 4-10 point underdogs. As expected, these teams are encouraged to be more aggressive, although the difference isn't as large as I might have thought.
spread_plot <- function(spread, range = 2) { fig1 <- plot %>% mutate(posteam_spread = ifelse(posteam == home_team, spread_line, -spread_line)) %>% filter( between(posteam_spread, spread - range, spread + range) ) %>% plot_prepare() %>% ggplot(aes(binned_yardline, ydstogo, fill = decision)) + geom_tile(aes(binned_yardline, ydstogo, width = 4.5, height = .95, alpha = pct)) + scale_y_reverse(breaks = scales::pretty_breaks(n = 10), expand = c(0,0)) + scale_x_reverse(breaks = scales::pretty_breaks(n = 10), expand = c(0,0)) + theme_fivethirtyeight() + theme( legend.position = "none", plot.title = element_markdown(size = 18, hjust = 0.5), plot.subtitle = element_markdown(size = 10, hjust = 0.5), axis.title.x = element_text(size=12, face="bold"), axis.title.y = element_text(size=12, face="bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank() ) + labs( x = "Distance to opponent end zone", y = "Yards to go", caption = "Darker = more frequent recommendation", title = glue::glue("Team favored by {spread - range} to {spread + range} points") ) + scale_fill_brewer(palette="Dark2") + annotate("text",x=50, y= 2, label = "Go for it", size = 6) + annotate("text",x=80, y= 7, label = "Punt", size = 6) + annotate("text",x=20, y= 7, label = "Field goal", size = 6) return(fig1) } spread_plot(-7, 3)
Holding the expected "go" gain constant, coaches are more likely to go for it when ahead by a lot or trailing by a lot. This makes sense as their decision is less likely to make the difference between a win and a loss (and thus open the coach up to being blamed for a loss).
min <- 2 max <- 4 my_title <- glue::glue("NFL Go-for-it Rate on <span style='color:red'>4th down</span>") pbp %>% filter(go_boost > min & go_boost < max, season == 2020) %>% mutate(vegas_wp = 100 * vegas_wp) %>% ggplot(aes(vegas_wp, go)) + # geom_point(size = 5, color = "black", alpha = .5) + geom_smooth(show.legend = F, se = F, size = 3, color = "black")+ theme_ben + labs(x = "Win probability prior to play", y = "Go-for-it percentage", subtitle = glue::glue("Gain in win prob by going for it {min}-{max} percentage points, 2020 season"), title = my_title) + theme( legend.position = "none", plot.title = element_markdown(size = 22, hjust = 0.5), plot.subtitle = element_markdown(size = 12, hjust = 0.5) ) + scale_y_continuous(breaks = scales::pretty_breaks(n = 10), limits = c(0, 100)) + scale_x_continuous(breaks = scales::pretty_breaks(n = 20))
When do teams start going for it?
pbp %>% mutate( wp = 100 * wp, quarter = case_when( qtr == 1 ~ "1", qtr == 4 ~ "4", TRUE ~ "2-3" ) ) %>% filter(qtr <= 4, go_boost > 2, wp <= 50) %>% ggplot(aes(wp, go, color = factor(quarter), group = factor(quarter))) + geom_smooth(se = F, size = 3, formula = y ~ splines::bs(x, 3))+ theme_ben + labs(x = "Win probability prior to play", y = "Go-for-it percentage", subtitle = glue::glue("Expected WP gain at least 2 percentage points | 2014 - 2022"), title = my_title) + theme( plot.title = element_markdown(size = 22, hjust = 0.5), plot.subtitle = element_markdown(size = 12, hjust = 0.5), strip.text = element_text(size = 16, face = "bold"), panel.background = element_rect(color = "black", linetype = "solid") ) + scale_y_continuous(breaks = scales::pretty_breaks(n = 5), limits = c(0, 100)) + scale_x_continuous(breaks = scales::pretty_breaks(n = 5), expand = c(0, 0)) + annotate("text", x = 10, y = 20, label = "Q1", size = 5) + annotate("text", x = 5, y = 45, label = "Q2/Q3", size = 5) + annotate("text", x = 5, y = 90, label = "Q4", size = 5)
``` {r divs, echo = FALSE, message = FALSE} divs <- teamcolors::teamcolors %>% filter(league == "nfl") %>% select(name, division) %>% mutate(name = case_when( name == "Washington Redskins" ~ "Washington Commanders", name == "Oakland Raiders" ~ "Las Vegas Raiders", TRUE ~ name ))
nfl_logos_df <- nflfastR::teams_colors_logos %>% filter(!(team_abbr %in% c("LAR", "SD", "OAK"))) %>% left_join(divs, by = c("team_name" = "name"))
follow_bot <- function(conf) {
logos_conf <- image_read(nfl_logos_df %>% filter(substr(division, 1, 3) == conf) %>% pull(team_logo_espn)) my_title <- glue::glue("{conf}: when did teams follow the bot? 2020")
pbp %>% filter(season == 2020) %>% left_join(nfl_logos_df, by = c("posteam" = "team_abbr")) %>% filter(substr(division, 1, 3) == conf) %>% filter(between(go_boost, -5, 15)) %>% mutate(go = as_factor(go)) %>% select(game_id, posteam, team_logo_espn, go, go_boost, yardline_100, score_differential, ydstogo, desc) %>% ggplot(aes(x = posteam, y=go_boost, color=go)) + geom_hline(yintercept = 0) + geom_jitter(aes(y = go_boost, fill = go), size = 3, width = 0.2, show.legend=FALSE, alpha=.5) + scale_y_continuous(name = "4th down bot gain in going for it", breaks = scales::pretty_breaks(n = 5), expand = c(0,1)) + theme_ben + labs(title = my_title, x = "Went for it", subtitle = "Kicked versus went for it" ) + theme( panel.grid.major = element_blank(), plot.subtitle = element_markdown(size = 16, hjust = 0.5), axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank() ) + geom_hline(yintercept = -6, color = "gray", alpha = .1) + # doing this in a smart way is annoying so just brute forcing it draw_image(logos_conf[1], x = .5, y = -5.75, scale = 2) + draw_image(logos_conf[2], x = 1.5, y = -5.75, scale = 2) + draw_image(logos_conf[3], x = 2.5, y = -5.75, scale = 2) + draw_image(logos_conf[4], x = 3.5, y = -5.75, scale = 2) + draw_image(logos_conf[5], x = 4.5, y = -5.75, scale = 2) + draw_image(logos_conf[6], x = 5.5, y = -5.75, scale = 2) + draw_image(logos_conf[7], x = 6.5, y = -5.75, scale = 2) + draw_image(logos_conf[8], x = 7.5, y = -5.75, scale = 2) + draw_image(logos_conf[9], x = 8.5, y = -5.75, scale = 2) + draw_image(logos_conf[10], x = 9.5, y = -5.75, scale = 2) + draw_image(logos_conf[11], x = 10.5, y = -5.75, scale = 2) + draw_image(logos_conf[12], x = 11.5, y = -5.75, scale = 2) + draw_image(logos_conf[13], x = 12.5, y = -5.75, scale = 2) + draw_image(logos_conf[14], x = 13.5, y = -5.75, scale = 2) + draw_image(logos_conf[15], x = 14.5, y = -5.75, scale = 2) + draw_image(logos_conf[16], x = 15.5, y = -5.75, scale = 2)
}
Here is each decision made by every team in 2020. ```r follow_bot("AFC")
follow_bot("NFC")
Seeing teams like the Ravens, Colts, and Browns high up here should not be surprising.
current <- pbp %>% filter(season == 2020) %>% filter(go_boost > 1.5, !is.na(go_boost), !is.na(go)) %>% filter(vegas_wp > .2) %>% group_by(posteam) %>% summarize(go = mean(go), n = n()) %>% ungroup() %>% left_join(nflfastR::teams_colors_logos, by=c('posteam' = 'team_abbr')) %>% arrange(-go) %>% mutate(rank = 1:n()) %>% arrange(posteam) my_title <- glue::glue("Which teams <span style='color:red'>go for it</span> when they <span style='color:red'>should?</span> 2020") ggplot(data = current, aes(x = reorder(posteam, -go), y = go)) + geom_col(data = current, aes(fill = ifelse(posteam=="SEA", team_color2, team_color)), width = 0.5, alpha = .6, show.legend = FALSE ) + nflplotR::geom_nfl_logos(aes(team_abbr = posteam), width = 0.035) + scale_fill_identity(aesthetics = c("fill", "colour")) + theme_ben + theme( panel.grid.major.x = element_blank(), axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank() ) + scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) + labs( x = "", y = "Go rate", title= my_title, subtitle = "Gain in win prob. at least 1.5 percentage points", caption = glue::glue("Sample size in parentheses\nExcl. final 30 seconds of game. Win prob >20%") ) + geom_text(data = current, aes(x = rank, y = -.015, label = glue::glue("({n})")), size = 3, show.legend = FALSE, nudge_x = 0, color="black")
The teams that lost the most win probability in 2020 by conservative decision-making.
current <- pbp %>% filter(season == 2020) %>% group_by(posteam) %>% mutate( games = n_distinct(game_id), ) %>% ungroup() %>% filter(go_boost > 0, go == 0) %>% group_by(posteam) %>% summarize( go = sum(go_boost), n = n(), games = dplyr::first(games), go = go/games ) %>% ungroup() %>% left_join(nflfastR::teams_colors_logos, by=c('posteam' = 'team_abbr')) %>% arrange(-go) %>% mutate(rank = 1:n()) %>% arrange(posteam) my_title <- glue::glue("WP per game <span style='color:red'>lost by kicking in go situations</span>, 2020") ggplot(data = current, aes(x = reorder(posteam, -go), y = go)) + geom_col(data = current, aes(fill = ifelse(posteam=="SEA", team_color2, team_color)), width = 0.5, alpha = .6, show.legend = FALSE ) + nflplotR::geom_nfl_logos(aes(team_abbr = posteam), width = 0.035) + scale_fill_identity(aesthetics = c("fill", "colour")) + theme_ben + theme( panel.grid.major.x = element_blank(), axis.title.x=element_blank(), axis.text.x=element_blank(), axis.ticks.x=element_blank() ) + scale_y_continuous(n.breaks = 10) + labs( x = "", y = "Win probability lost per game", title= my_title, caption = glue::glue("Excl. final 30 seconds of game") )
This includes a function for making the timeline for a given team, so if you're interested in seeing another team, see the source code here.
# function to make team timeline make_timeline <- function(team) { current <- pbp %>% filter(go_boost > 1.5, !is.na(go), !is.na(go_boost)) %>% filter(vegas_wp > .2) %>% group_by(posteam, season) %>% summarize(go = mean(go), n = n()) %>% ungroup() %>% left_join(nflfastR::teams_colors_logos, by=c('posteam' = 'team_abbr')) %>% arrange(-go) %>% mutate(rank = 1:n()) %>% arrange(posteam, season) means <- current %>% group_by(season) %>% summarize(league_go = mean(go)) %>% ungroup() prim <- nflfastR::teams_colors_logos %>% filter(team_abbr == team) %>% pull(team_color) sec <- nflfastR::teams_colors_logos %>% filter(team_abbr == team) %>% pull(team_color2) name <- nflfastR::teams_colors_logos %>% filter(team_abbr == team) %>% pull(team_nick) chart <- current %>% filter(posteam==team) teams <- current %>% filter(posteam != team) ### pass downs over time my_title <- glue::glue("How often do the <span style='color:red'>{name}</span> go for it when they <span style='color:red'>should?</span>") fig <- ggplot(data=chart, aes(x=season,y=go)) + geom_line(data=chart, aes(x=season,y=go),color=prim,size=3) + geom_point(data=chart, aes(x=season,y=go),color=sec,size=8) + geom_line(data=means, aes(x=season,y=league_go),color="black",size=1, linetype="dashed", alpha=.6) + geom_jitter(data=teams, aes(x=season,y=go), color=teams$team_color, size=4, alpha=.6, width = .045) + labs( x = "", y = "Go rate", title= my_title, subtitle = "Gain in win prob. at least 1.5 percentage points", caption = glue::glue("Excl. final 30 seconds of game. Win prob >20%") ) + scale_x_continuous(breaks=c(min(chart$season):max(chart$season))) + scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) + theme_ben+ theme(axis.title.x = element_blank(), axis.title.y = element_text(size = 18), panel.grid.minor.x = element_blank(), axis.text.x = element_text(size=16), axis.text.y = element_text(size=16) ) return(fig) }
Our Ravens: very good at this.
make_timeline("BAL")
The Seahawks: less so!
make_timeline("SEA")
The worst playoff games since 2014 in terms of total win probability lost by conservative decisions on 4th downs. Five of these happened in 2020!
current <- pbp %>% filter(go_boost > 0, go == 0, season_type != "REG") %>% group_by(game_id, posteam, defteam) %>% summarize( go = sum(go_boost), n = n(), season = dplyr::first(season), week = dplyr::first(week) ) %>% ungroup() %>% left_join(nflfastR::teams_colors_logos, by=c('posteam' = 'team_abbr')) %>% arrange(-go) %>% mutate(rank = 1:n()) %>% arrange(rank) %>% head(20) %>% select(rank, posteam, defteam, season, week, go) %>% mutate(week = case_when( week == 18 ~ "WC", week == 19 ~ "DIV", week == 20 ~ "CONF", week == 21 ~ "SB" )) # to split decisions up into two columns so the figure isn't so long d <- bind_cols( current %>% dplyr::slice(1:10), current %>% dplyr::slice(11:20) ) names(d) <- c("rank", "posteam", "defteam", "season", "week", "go", "rank1", "posteam1", "defteam1", "season1", "week1", "go1") d %>% gt() %>% cols_label( rank = " ", posteam = "Team", defteam = "Opp", week = "Week", season = "Season", go = "WP Lost", rank1 = " ", posteam1 = "Team", defteam1= "Opp", week1 = "Week", season1 = "Season", go1 = "WP Lost" ) %>% tab_style( style = cell_text(color = "black", weight = "bold"), locations = list( cells_row_groups(), cells_column_labels(everything()) ) ) %>% text_transform( locations = cells_body(vars(posteam, posteam1, defteam, defteam1)), fn = function(x) web_image(url = paste0('https://a.espncdn.com/i/teamlogos/nfl/500/',x,'.png')) ) %>% cols_width( everything() ~ px(400), ) %>% cols_width( vars(posteam, posteam1) ~ px(50), vars(defteam, defteam1) ~ px(50), vars(week, week1) ~ px(50), vars(rank, rank1) ~ px(50), vars(season, season1) ~ px(70), vars(go, go1) ~ px(80) ) %>% add_gt_options() %>% fmt_number( columns = vars(go, go1), decimals = 1 ) %>% cols_align( columns = 1:12, align = "center" ) %>% tab_header( title = md(glue::glue("Win probability lost by kicking in playoff games, 2014-2022")) ) %>% tab_style( style = cell_borders( sides = c("left"), color = "#BBBBBB", weight = px(1.5), style = "solid" ), locations = cells_body( columns = vars(rank1), rows = everything() ) )
More coming, maybe. . .
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.