# TODO: move utility functions into ./R/utils.R? Move blocks of code into functions?
current_season <- function(){
# make sure the most recent season start year is correct.
# require(lubridate)
todayy <- today()
(todayy %>% year()) - ifelse(todayy %>% month() > 6, 0, 1) - 2000
}
make_season <- function(yr) paste0(yr, yr+1)
make_url <-
function(yr, url_base = 'https://www.football-data.co.uk/mmz4281/')
paste0(url_base, make_season(yr), '/data.zip')
possibly_read_csv <- function(url_csv){ # ----
# purrr::possibly(
readr::read_csv(url_csv,
col_types = cols(.default = "c"), # keep cols as character
locale = locale(encoding = "windows-1252")) # essential
# , otherwise = tibble())
}
# read_csv <- # memoise limit_rate safely ----
# memoise::memoise(
# ratelimitr::limit_rate(
# possibly( function(...) {
# pb$tick(tokens = list(what = "urll: ")) ;
# readr::read_csv(...) # function (file, col_names = TRUE
# # TODO: fread to speed up read?
# # data.table::fread # function (input = "", file = NULL,
# }, otherwise = tibble() ) ,
# ratelimitr::rate(2, 1)))
# # memoise::memoise / ratelimitr::limit_rate / purrr::safely
# # read_csv <- readr::read_csv %>% safely() %>% limit_rate(rate(1, 1)) %>% memoise()
extract_name <- function(urll)
# extract season name (e.g. 1415) from football-data url to zip file
(urll %>% str_match('/([0-9]{4})/'))[1,2]
get_fdata <- function(season_starts, board_name) # ----
# download data for _all_ leagues from season_starts to now.
# Returns a single tibble
tibble(season_start = season_starts) %>%
mutate(
season = season_start %>% make_season, # season name
urll = season_start %>% make_url, # url to raw data
csv = urll %>% # for each season
map( ~ map_dfr( # read each league from a csv then row bind them into one tibble per season.
# pin each zip file - WARNING: name argument is essential else data overwritten each season
# store in _sub_folder for git to store in _raw_ data else u have to download it each time.
#
# NB: we cache and read csv
# i.e. read.csv(pin(resource)) over a list of csvs (for one season/zip file)
pin(., name = extract_name(.), board = board_name,
description = glue("Source: football-data.co.uk. ",
"All leagues for one season ({extract_name(.)})") ),
~ possibly_read_csv(.)) )
) # mutate.
# list(urll) %>% # for each season
# map(.x = urll, .f = ~ map_dfr( # read each league from a csv then row bind them into one tibble per season.
# # pin each zip file - WARNING: name argument is essential else data overwritten each season
# # store in _sub_folder for git to store in _raw_ data else u have to download it each time.
# .x = .f = function(.){
# pin(urll, name = extract_name(urll), board = board_name,
# description = glue("Source: football-data.co.uk. ",
# "Football-Data is a free football betting portal ",
# "providing historical results & odds for many years of data")) %>%
# pin_info(name = extract_name(urll), board = board_name)
# pin_get(name = extract_name(urll), board = board_name, extract=TRUE) %>%
# str()
# }) )
transform_fdata <- # tranform raw data ----
. %>%
unnest(csv) %>%
type.convert() %>%
mutate(Date = Date %>% as.character()) %>%
mutate(datee = Date %>%
as_date(format = paste0("%d/%m/",
ifelse(nchar(Date) == 8, "%y", "%Y"))),
.after = Date) %>%
select(-Date) %>%
arrange(desc(datee), desc(Time), Div) %>%
relocate(datee, Time, Div, .before = season_start) %>%
filter(!is.na(Div)) # TODO: check which season/Div generate blanks
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.