inst/doc/cricketdata_R_pkg.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  echo = TRUE,
  cache = TRUE,
  warning = FALSE
)

## -----------------------------------------------------------------------------
library(cricketdata)
library(dplyr)
library(ggplot2)

## ----getdata, eval=FALSE, echo=FALSE------------------------------------------
#  # Avoid downloading the data when the package is checked by CRAN.
#  # This only needs to be run once to store the data locally
#  ipl_bbb <- fetch_cricsheet("bbb", "male", "ipl")
#  wt20 <- fetch_cricinfo("T20", "Women", "Bowling")
#  menODI <- fetch_cricinfo("ODI", "Men", "Batting",
#    type = "innings",
#    country = "United States of America"
#  )
#  meg_lanning_id <- find_player_id("Meg Lanning")$ID
#  MegLanning <- fetch_player_data(meg_lanning_id, "ODI") %>%
#    mutate(NotOut = (Dismissal == "not out"))
#  aus_women <- fetch_player_meta(c(329336, 275487))
#  
#  saveRDS(wt20, here::here("inst/extdata/wt20.rds"))
#  saveRDS(menODI, here::here("inst/extdata/usmenODI.rds"))
#  saveRDS(MegLanning, here::here("inst/extdata/MegLanning.rds"))
#  saveRDS(meg_lanning_id, here::here("inst/extdata/meg_lanning_id.rds"))
#  saveRDS(ipl_bbb, here::here("inst/extdata/ipl_bbb.rds"))
#  saveRDS(aus_women, here::here("inst/extdata/aus_women.rds"))

## ----loaddata, include=FALSE--------------------------------------------------
ipl_bbb <- readRDS(here::here("inst/extdata/ipl_bbb.rds"))
wt20 <- readRDS(here::here("inst/extdata/wt20.rds"))
menODI <- readRDS(here::here("inst/extdata/usmenODI.rds"))
MegLanning <- readRDS(here::here("inst/extdata/MegLanning.rds"))
meg_lanning_id <- readRDS(here::here("inst/extdata/meg_lanning_id.rds"))
aus_women <- readRDS(here::here("inst/extdata/aus_women.rds"))

## ---- eval=FALSE--------------------------------------------------------------
#  install.packages("cricketdata", dependencies = TRUE)

## ---- eval=FALSE--------------------------------------------------------------
#  install.packages("devtools")
#  devtools::install_github("robjhyndman/cricketdata")

## ---- eval=FALSE, echo=TRUE---------------------------------------------------
#  # Fetch all Women's Bowling data for T20 format
#  wt20 <- fetch_cricinfo("T20", "Women", "Bowling")

## ----tbl-wt20-----------------------------------------------------------------
# Looking at data
wt20 %>%
  glimpse()

# Table showing certain features of the data
wt20 %>%
  select(Player, Country, Matches, Runs, Wickets, Economy, StrikeRate) %>%
  head() %>%
  knitr::kable(
    digits = 2, align = "c",
    caption = "Women Player career profile for international T20"
  )

## ----fig-wt20SRvER, fig.cap="Strike Rate (balls bowled per wicket) Vs Average (runs conceded per wicket) for Women international T20 bowlers. Each observation represents one player, who has taken at least 50 international wickets."----
# Plotting Data
wt20 %>%
  filter(Wickets >= 50) %>%
  ggplot(aes(y = StrikeRate, x = Average)) +
  geom_point(alpha = 0.3, col = "blue") +
  ggtitle("Women International T20 Bowlers") +
  ylab("Balls bowled per wicket") +
  xlab("Runs conceded per wicket")

## ---- echo=TRUE, eval=FALSE---------------------------------------------------
#  # Fetch all USA Men's ODI data by innings
#  menODI <- fetch_cricinfo("ODI", "Men", "Batting",
#    type = "innings",
#    country = "United States of America"
#  )

## ----tbl-USA100s--------------------------------------------------------------
# Table of USA player who have scored a century
menODI %>%
  filter(Runs >= 100) %>%
  select(Player, Runs, BallsFaced, Fours, Sixes, Opposition) %>%
  knitr::kable(digits = 2)

## ---- echo=FALSE--------------------------------------------------------------
# menODI %>%
#   filter(Runs >= 50) %>%
#   ggplot(aes(y = Runs, x = BallsFaced) ) +
#   geom_point(size = 2) +
#   geom_text(aes(label= Player), vjust=-0.5, color="#013369",
#             position = position_dodge(0.9), size=2) +
#   ylab("Runs Scored") + xlab("Balls Faced")

## -----------------------------------------------------------------------------
meg_lanning_id

## ----echo=TRUE, eval=FALSE----------------------------------------------------
#  # Fetching the player Meg Lanning's playing data
#  MegLanning <- fetch_player_data(meg_lanning_id, "ODI") %>%
#    mutate(NotOut = (Dismissal == "not out"))

## ----fig-meglanning, fig.cap="Meg Lanning, Australian captain, has shown amazing consistency over her career, with centuries scored in every year of her career except for 2021, when her highest score from 6 matches was 53."----
dim(MegLanning)
names(MegLanning)

# Compute batting average
MLave <- MegLanning %>%
  filter(!is.na(Runs)) %>%
  summarise(Average = sum(Runs) / (n() - sum(NotOut))) %>%
  pull(Average)
names(MLave) <- paste("Average =", round(MLave, 2))

# Plot ODI scores
ggplot(MegLanning) +
  geom_hline(aes(yintercept = MLave), col = "gray") +
  geom_point(aes(x = Date, y = Runs, col = NotOut)) +
  ggtitle("Meg Lanning ODI Scores") +
  scale_y_continuous(sec.axis = sec_axis(~., breaks = MLave))

## ----echo=TRUE, eval=FALSE----------------------------------------------------
#  # Fetch all IPL ball-by-ball data
#  ipl_bbb <- fetch_cricsheet("bbb", "male", "ipl")

## -----------------------------------------------------------------------------
ipl_bbb %>%
  glimpse()

## ----fig-iplbatter, fig.cap="Top 20 prolific batters in IPL 2022. We show what percentage of balls they hit for a boundary (4 or 6) against percentage of how many balls they do not score off of (dot percent). Ideally we want to be in top left quadrant, high boundary % and low dot %."----
# Top 20 batters wrt Boundary and Dot % in IPL 2022 season
ipl_bbb %>%
  filter(season == "2022") %>%
  group_by(striker) %>%
  summarize(
    Runs = sum(runs_off_bat), BallsFaced = n() - sum(!is.na(wides)),
    StrikeRate = Runs / BallsFaced, DotPercent = sum(runs_off_bat == 0) * 100 / BallsFaced,
    BoundaryPercent = sum(runs_off_bat %in% c(4, 6)) * 100 / BallsFaced
  ) %>%
  arrange(desc(Runs)) %>%
  rename(Batter = striker) %>%
  slice(1:20) %>%
  ggplot(aes(y = BoundaryPercent, x = DotPercent, size = BallsFaced)) +
  geom_point(color = "red", alpha = 0.3) +
  geom_text(aes(label = Batter),
    vjust = -0.5, hjust = 0.5, color = "#013369",
    position = position_dodge(0.9), size = 3
  ) +
  ylab("Boundary Percent") +
  xlab("Dot Percent") +
  ggtitle("IPL 2022: Top 20 Batters")

## ----tbl-IPL2022Batters-------------------------------------------------------
# Top 10 prolific batters in IPL 2022 season.
ipl_bbb %>%
  filter(season == "2022") %>%
  group_by(striker) %>%
  summarize(
    Runs = sum(runs_off_bat), BallsFaced = n() - sum(!is.na(wides)),
    StrikeRate = Runs / BallsFaced,
    DotPercent = sum(runs_off_bat == 0) * 100 / BallsFaced,
    BoundaryPercent = sum(runs_off_bat %in% c(4, 6)) * 100 / BallsFaced
  ) %>%
  arrange(desc(Runs)) %>%
  rename(Batter = striker) %>%
  slice(1:10) %>%
  knitr::kable(digits = 1, align = "c")

## ----tbl-playermetadata-------------------------------------------------------
player_meta %>%
  filter(!is.na(playing_role)) %>%
  select(-cricinfo_id, -unique_name) %>%
  head() %>%
  knitr::kable(
    digits = 1, align = "c", format = "pipe",
    col.names = c(
      "ID", "FullName", "Country", "DOB", "BirthPlace",
      "BattingStyle", "BowlingStyle", "PlayingRole"
    )
  )

## ----echo=TRUE, eval=FALSE----------------------------------------------------
#  # Download meta data on Meg Lanning and Ellyse Perry
#  aus_women <- fetch_player_meta(c(329336, 275487))

## ----tbl-ausplayermetadata----------------------------------------------------
aus_women %>%
  knitr::kable(
    digits = 1, align = "c", format = "pipe",
    col.names = c(
      "ID", "FullName", "Country", "DOB", "BirthPlace", "BattingStyle",
      "BowlingStyle", "PlayingRole"
    )
  )

Try the cricketdata package in your browser

Any scripts or data that you put into this service are public.

cricketdata documentation built on Aug. 29, 2023, 5:10 p.m.