inst/doc/cricsheet.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  echo = TRUE
)

## ----setup--------------------------------------------------------------------
library(cricketdata)
library(readr)
library(dplyr)
library(stringr)
library(showtext)
library(ggplot2)
library(gghighlight)
library(ggtext)
library(patchwork)

## ----getdata, eval=FALSE, echo=FALSE------------------------------------------
#  # Avoid downloading the data when the package is checked by CRAN.
#  # This only needs to be run once to store the data locally
#  wbbl_bbb <- fetch_cricsheet(competition = "wbbl", gender = "female")
#  wbbl_match_info <- fetch_cricsheet(competition = "wbbl", type = "match", gender = "female")
#  saveRDS(wbbl_bbb, "inst/extdata/wbbl_bbb.rds")
#  saveRDS(wbbl_match_info, "inst/extdata/wbbl_match_info.rds")

## ----loaddata, include=FALSE--------------------------------------------------
wbbl_bbb <- readRDS("../inst/extdata/wbbl_bbb.rds")
wbbl_match_info <- readRDS("../inst/extdata/wbbl_match_info.rds")

## -----------------------------------------------------------------------------
# Data from 2015/16 (WBBL01) excluded due to only having 10 matches worth of data
# in the Cricsheet spreadsheet.
# Data from 2021/22 and later (WBBL07) excluded as incomplete at time of article
wbbl_bbb_tidy <- wbbl_bbb %>%
  filter(!season %in% c("2015/16", "2021/22")) %>%
  filter(start_date < "2021-11-07")

## -----------------------------------------------------------------------------
# Alyssa Healy compared to all players who have batted in 3+ innings in a season.
batting_per_season <- wbbl_bbb_tidy %>%
  group_by(season, striker) %>%
  summarise(
    innings_total = length(unique(match_id)),
    runs_off_bat_total = sum(runs_off_bat),
    balls_faced_total = length(ball),
    .groups = "keep"
  ) %>%
  mutate(
    runs_per_innings_avg = round(runs_off_bat_total / innings_total, 1),
    strike_rate = round(runs_off_bat_total / balls_faced_total * 100, 1)
  ) %>%
  filter(innings_total > 2) %>%
  mutate(is_healy = (striker == "AJ Healy")) %>%
  ungroup()

## ----warning=FALSE, out.width="100%"------------------------------------------
# Import fonts from Google Fonts
font_add_google("Roboto Condensed", "roboto_con")
font_add_google("Staatliches", "staat")
showtext_auto()

# Build plot
batting_per_season %>%
  ggplot(aes(
    x = season, y = runs_per_innings_avg,
    group = striker, colour = is_healy
  )) +
  geom_line(linewidth = 2, colour = "#F80F61FF") +
  gghighlight(is_healy,
    label_key = striker,
    label_params = aes(
      size = 6, force_pull = 0.1, nudge_y = 10, label.size = 1,
      family = "roboto_con", label.padding = 0.5,
      fill = "#19232FFF",
      colour = "#F80F61FF"
    ),
    unhighlighted_params = list(size = 1, color = "#187999FF")
  ) +
  labs(
    title = "WBBL: Average runs scored per innings (3+ innings)",
    x = NULL, y = NULL,
    caption = "**Source:** Cricsheet.org // **Plot:** @jacquietran"
  ) +
  theme_minimal() +
  theme(
    text = element_text(size = 18, family = "roboto_con", colour = "#FFFFFF"),
    plot.title = element_text(family = "staat", margin = margin(0, 0, 15, 0)),
    plot.caption = element_markdown(size = NULL, margin = margin(15, 0, 0, 0)),
    axis.text = element_text(colour = "#FFFFFF"),
    legend.position = "none",
    panel.grid.major = element_line(linetype = "dashed"),
    panel.grid.minor = element_blank(),
    plot.background = element_rect(
      fill = "#171F2AFF", colour = NA
    ),
    panel.spacing = unit(2, "lines"),
    plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm")
  )

## -----------------------------------------------------------------------------
# Create new variable for ball number in each over
ball_number_faced <- wbbl_bbb_tidy %>%
  mutate(ball_num_in_over = sub(".*\\.", "", ball))

# Summarise number of balls faced of each ball number, per batter
ball_number_faced_summary <- ball_number_faced %>%
  group_by(ball_num_in_over, striker) %>%
  summarise(balls_faced = n(), .groups = "drop")

# Dismissals by ball number
dismissals_by_ball_number <- ball_number_faced %>%
  select(ball_num_in_over, striker, wicket_type) %>%
  filter(wicket_type != "") %>%
  group_by(ball_num_in_over, striker) %>%
  summarise(dismissals_n = n(), .groups = "drop")

## -----------------------------------------------------------------------------
# Merge data and summarise to league-wide dismissals rate by ball number
dismissals_by_ball_number_summary <- left_join(
  ball_number_faced_summary, dismissals_by_ball_number,
  by = c("ball_num_in_over", "striker")
) %>%
  tidyr::replace_na(list(dismissals_n = 0)) %>%
  group_by(striker) %>%
  mutate(total_balls_faced = sum(balls_faced)) %>%
  ungroup() %>%
  mutate(dismissals_pct = round(dismissals_n / balls_faced * 100, 2)) %>%
  # Include those who have faced more than 200 balls total
  filter(total_balls_faced >= 200) %>%
  # Exclude balls beyond 6 - infrequent occurrences
  filter(ball_num_in_over < 7)

# Extract data for specific players
# Healy
dismissals_by_ball_number_summary_healy <- dismissals_by_ball_number_summary %>%
  filter(striker == "AJ Healy")
# Mooney
dismissals_by_ball_number_summary_mooney <- dismissals_by_ball_number_summary %>%
  filter(striker == "BL Mooney")
# Lanning
dismissals_by_ball_number_summary_lanning <- dismissals_by_ball_number_summary %>%
  filter(striker == "MM Lanning")
# Perry
dismissals_by_ball_number_summary_perry <- dismissals_by_ball_number_summary %>%
  filter(striker == "EA Perry")
# Devine
dismissals_by_ball_number_summary_devine <- dismissals_by_ball_number_summary %>%
  filter(striker == "SFM Devine")
# Knight
dismissals_by_ball_number_summary_knight <- dismissals_by_ball_number_summary %>%
  filter(striker == "HC Knight")

## ---- out.width="100%"--------------------------------------------------------
# Define consistent plot features ----------------------------------------------
plot_features <- list(
  coord_cartesian(ylim = c(0, 10)),
  theme_minimal(),
  theme(
    text = element_text(family = "roboto_con", colour = "#FFFFFF"),
    plot.title = element_text(
      size = 11, family = "staat", margin = margin(0, 0, 15, 0)
    ),
    plot.subtitle = element_text(
      size = 12, family = "staat", margin = margin(0, 0, 15, 0)
    ),
    plot.caption = element_markdown(
      size = 10, margin = margin(15, 0, 0, 0)
    ),
    axis.text = element_text(size = 9, colour = "#FFFFFF"),
    legend.position = "none",
    panel.grid.major.y = element_line(linetype = "dashed"),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    plot.background = element_rect(
      fill = "#171F2AFF", colour = NA
    ),
    panel.spacing = unit(2, "lines"),
    plot.margin = unit(c(0.25, 0.25, 0.25, 0.25), "cm")
  ),
  labs(x = NULL, y = NULL)
)

# Build plots ------------------------------------------------------------------
showtext_auto()

# Healy
p1 <- dismissals_by_ball_number_summary %>%
  ggplot(aes(x = ball_num_in_over, y = dismissals_pct)) +
  geom_boxplot(
    fill = "#FFFFFF", colour = "#FFFFFF", size = .5,
    alpha = 0.25, notch = TRUE, outlier.shape = NA, coef = 0
  ) +
  geom_point(
    data = dismissals_by_ball_number_summary_healy,
    colour = "#F80F61FF", size = 3
  ) +
  labs(
    title = "WBBL: % dismissals by ball number",
    subtitle = "AJ Healy"
  ) +
  plot_features

# Mooney
p2 <- dismissals_by_ball_number_summary %>%
  ggplot(aes(x = ball_num_in_over, y = dismissals_pct)) +
  geom_boxplot(
    fill = "#FFFFFF", colour = "#FFFFFF", size = .5,
    alpha = 0.25, notch = TRUE, outlier.shape = NA, coef = 0
  ) +
  geom_point(
    data = dismissals_by_ball_number_summary_mooney,
    colour = "#FA6900FF", size = 3
  ) +
  labs(subtitle = "BL Mooney") +
  plot_features

# Lanning
p3 <- dismissals_by_ball_number_summary %>%
  ggplot(aes(x = ball_num_in_over, y = dismissals_pct)) +
  geom_boxplot(
    fill = "#FFFFFF", colour = "#FFFFFF", size = .5,
    alpha = 0.25, notch = TRUE, outlier.shape = NA, coef = 0
  ) +
  geom_point(
    data = dismissals_by_ball_number_summary_lanning,
    colour = "#018821FF", size = 3
  ) +
  labs(subtitle = "MM Lanning") +
  plot_features

# Perry
p4 <- dismissals_by_ball_number_summary %>%
  ggplot(aes(x = ball_num_in_over, y = dismissals_pct)) +
  geom_boxplot(
    fill = "#FFFFFF", colour = "#FFFFFF", size = .5,
    alpha = 0.25, notch = TRUE, outlier.shape = NA, coef = 0
  ) +
  geom_point(
    data = dismissals_by_ball_number_summary_perry,
    colour = "#F80F61FF", size = 3
  ) +
  labs(subtitle = "EA Perry") +
  plot_features

# Devine
p5 <- dismissals_by_ball_number_summary %>%
  ggplot(aes(x = ball_num_in_over, y = dismissals_pct)) +
  geom_boxplot(
    fill = "#FFFFFF", colour = "#FFFFFF", size = .5,
    alpha = 0.25, notch = TRUE, outlier.shape = NA, coef = 0
  ) +
  geom_point(
    data = dismissals_by_ball_number_summary_devine,
    colour = "#FA6900FF", size = 3
  ) +
  labs(subtitle = "SFM Devine") +
  plot_features

# Knight
p6 <- dismissals_by_ball_number_summary %>%
  ggplot(aes(x = ball_num_in_over, y = dismissals_pct)) +
  geom_boxplot(
    fill = "#FFFFFF", colour = "#FFFFFF", size = .5,
    alpha = 0.25, notch = TRUE, outlier.shape = NA, coef = 0
  ) +
  geom_point(
    data = dismissals_by_ball_number_summary_knight,
    colour = "#95C65CFF", size = 3
  ) +
  labs(
    subtitle = "HC Knight",
    caption = "**Source:** Cricsheet.org // **Plot:** @jacquietran"
  ) +
  plot_features

# Quilt the plots --------------------------------------------------------------
(p1 + p2 + p3) / (p4 + p5 + p6)

## -----------------------------------------------------------------------------
# Subset match metadata for WBB07 games
wbbl07_match_info_tidy <- wbbl_match_info %>%
  filter(season == "2021/22", date <= "2021/11/07") %>%
  select(
    match_id, winner, winner_runs, winner_wickets, method, outcome,
    eliminator
  ) %>%
  mutate(match_id = factor(match_id))

# Subset ball-by-ball data for WBBL07 games
wbbl07_bbb_tidy <- wbbl_bbb %>%
  filter(match_id %in% wbbl07_match_info_tidy$match_id) %>%
  mutate(
    match_id = factor(match_id),
    runs_scored = runs_off_bat + extras,
    wicket_type = case_when(
      wicket_type == "" ~ NA_character_,
      TRUE ~ wicket_type
    )
  ) %>%
  group_by(match_id, innings) %>%
  mutate(
    temp_var = 1,
    balls_cumulative = cumsum(temp_var),
    runs_cumulative = cumsum(runs_scored),
    runs_total = max(runs_cumulative)
  ) %>%
  ungroup() %>%
  select(-temp_var) %>%
  # Merge match metadata and ball-by-ball data
  left_join(., wbbl07_match_info_tidy, by = "match_id") %>%
  mutate(
    outcome_batting_team = case_when(
      outcome %in% c("no result", "tie") ~ as.character(outcome),
      winner == batting_team ~ "won",
      TRUE ~ "lost"
    ),
    outcome_bowling_team = case_when(
      outcome %in% c("no result", "tie") ~ as.character(outcome),
      winner == bowling_team ~ "won",
      TRUE ~ "lost"
    )
  )

## -----------------------------------------------------------------------------
team_strike_rate <- wbbl07_bbb_tidy %>%
  # Exclude matches that ended with a Super Over ("tie")
  # and matches that were called off ("no result")
  filter(!outcome_batting_team %in% c("tie", "no result")) %>%
  group_by(match_id, innings) %>%
  mutate(
    rolling_strike_rate = round(
      runs_cumulative / balls_cumulative * 100, 1
    ),
    wicket_ball_num = case_when(
      !is.na(wicket_type) ~ balls_cumulative,
      TRUE ~ NA_real_
    ),
    wicket_strike_rate = case_when(
      !is.na(wicket_type) ~ rolling_strike_rate,
      TRUE ~ NA_real_
    ),
    innings_description = case_when(
      innings == 1 ~ "Batting 1st",
      innings == 2 ~ "Batting 2nd"
    ),
    bowling_team_short = word(bowling_team, -1),
    start_date_day = lubridate::day(start_date),
    start_date_month = lubridate::month(start_date),
    match_details = glue::glue(
      "{innings_description} vs. {bowling_team_short} ({start_date_day}/{start_date_month})"
    )
  ) %>%
  arrange(match_id, innings, balls_cumulative) %>%
  mutate(
    match_details = factor(
      match_details,
      levels = unique(match_details)
    ),
    outcome_batting_team = factor(
      outcome_batting_team,
      levels = c("won", "lost")
    )
  )

## ----echo=FALSE, out.width="75%", fig.align="center"--------------------------
knitr::include_graphics("figs/tweet.png")

## ---- fig.width=9, fig.height=12, out.width="100%", warning=FALSE, message=FALSE----
# Filter to Renegades' innings only --------------------------------------------
team_strike_rate_renegades <- team_strike_rate %>%
  filter(str_detect(batting_team, "Renegades"))

# Build plot -------------------------------------------------------------------
showtext_auto()
team_strike_rate_renegades %>%
  ggplot(aes(x = balls_cumulative, y = rolling_strike_rate)) +
  facet_wrap(~match_details, ncol = 3) +
  geom_hline(yintercept = 100, linetype = "dashed", colour = "#CCCCCC") +
  geom_line(aes(colour = outcome_batting_team), linewidth = 1.5) +
  geom_point(
    aes(
      x = team_strike_rate_renegades$wicket_ball_num,
      y = team_strike_rate_renegades$wicket_strike_rate
    ),
    colour = "red", size = 3, alpha = 0.75
  ) +
  labs(
    title = "WBBL07: Melbourne Renegades - Team strike rate (games up to 7 Nov 2021)",
    x = "Ball number in an innings", y = NULL,
    caption = "**Source:** Cricsheet.org // **Plot:** @jacquietran"
  ) +
  scale_x_continuous(breaks = seq(0, 120, by = 30)) +
  scale_color_manual(
    values = c("won" = "#4a8bad", "lost" = "#AD4A8B"),
    labels = c("Renegades won", "Renegades lost")
  ) +
  coord_cartesian(ylim = c(0, 200)) +
  theme_minimal() +
  theme(
    text = element_text(size = 18, family = "roboto_con", colour = "#FFFFFF"),
    legend.position = "top",
    legend.title = element_blank(),
    legend.key.size = unit(1.5, "cm"),
    legend.margin = margin(0, 0, 0, 0),
    legend.spacing.x = unit(0, "cm"),
    legend.spacing.y = unit(0, "cm"),
    plot.title = element_text(family = "staat", margin = margin(0, 0, 15, 0)),
    plot.caption = element_markdown(size = NULL, margin = margin(15, 0, 0, 0)),
    strip.text = element_text(colour = "#FFFFFF", size = 12),
    axis.text = element_text(colour = "#FFFFFF"),
    axis.title.x = element_text(margin = margin(15, 0, 0, 0)),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.x = element_line(colour = "#203b60", linetype = "dotted"),
    panel.grid.minor.x = element_blank(),
    plot.background = element_rect(
      fill = "#171F2AFF",
      colour = NA
    ),
    panel.spacing = unit(2, "lines"),
    plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm")
  )

Try the cricketdata package in your browser

Any scripts or data that you put into this service are public.

cricketdata documentation built on Aug. 29, 2023, 5:10 p.m.