knitr::opts_chunk$set(echo = TRUE)
pacman::p_load_current_gh("r-lib/pkgapi")
pacman::p_load_current_gh("hadley/emo")
library(pkgapi)
pkg <- pkgapi::map_package(path = "../")
library(dplyr)
exported <- pkg$defs %>% dplyr::filter(exported == TRUE)
cfbd_funcs <- sum(stringr::str_detect(exported$file,"cfbd")) - sum(stringr::str_detect(exported$file,"cfbd_pbp_data"))
pkg_name <- "sportsdataverse/cfbfastR"
url <- paste0("https://raw.githubusercontent.com/", pkg_name, "/main/DESCRIPTION")


x <- readLines(url)
remote_version <- gsub("Version:\\s*", "", x[grep('Version:', x)])

Play by play data comparisons

cfbfastR::cfbd_pbp_data() (1 season, \~6-7 minutes r emo::ji("confused_face"))

library(dplyr)
library(tidyr)
library(purrr)
# Play-by-Play Data Pull --------------------------------------------------
week_vector <- 1:15
year_vector <- 2019
weekly_year_df <- expand.grid(year = year_vector, week = week_vector)
### scrape yearly
year_split <- split(weekly_year_df, weekly_year_df$year)

### starting time
tictoc::tic()

for (i in 1:length(year_split)) {
  i <- 1
  print(paste0("Working on ", year_split[[i]][1, 1]))

  progressr::with_progress({
    year_split[[i]] <- year_split[[i]] %>%
      dplyr::mutate(
        pbp = purrr::map2(
          .x = year,
          .y = week,
          cfbd_pbp_data,
          season_type = "both",
          epa_wpa = TRUE
        )
      )
  })
}

tictoc::toc()
## (~6-7 minutes)
year_split <- lapply(year_split, function(x) {
  x %>% tidyr::unnest(pbp, names_repair = "minimal")
})

all_years <- dplyr::bind_rows(year_split)

cfbscrapR::cfb_pbp_data() (1 season, \~8-10 minutes r emo::ji("old_man"))

week_vector = 1:15
year_vector = 2019

weekly_year_df = expand.grid(year = year_vector, week = week_vector)
### scrape yearly
year_split = split(weekly_year_df, weekly_year_df$year)
### starting time
tictoc::tic()
for (i in 1:length(year_split)) {
  print(paste0("Working on ", year_split[[i]][1,1]))
  year_split[[i]] = year_split[[i]] %>% 
    dplyr::mutate(
      pbp = purrr::map2(
        .x = year,
        .y = week,
        cfbscrapR::cfb_pbp_data,
        season_type = 'both',
        epa_wpa=TRUE
      )
    )
}
tictoc::toc()
## (~8-10 minutes)
year_split19 = lapply(year_split, function(x) {
  x %>% tidyr::unnest(pbp)
})

pbp_2019 = bind_rows(year_split19)

The fastR way: load_cfb_pbp() (7 seasons, \~1-1.5 minutes r emo::ji("flame"))

We are going to load in data for seasons 2014-2020, it'll take between 45-90 seconds to run.

tictoc::tic()
pbp <- data.frame()
seasons <- 2014:2020
progressr::with_progress({

  pbp <- cfbfastR::load_cfb_pbp(seasons)
})
tictoc::toc()


saiemgilani/cfbfastR documentation built on Sept. 14, 2024, 2:39 p.m.