fdata
) repo2021-01-26 20:11:14
# # A tibble: 4 x 3
# datee Time Div
# <date> <fct> <fct>
# 1 2021-01-25 20:15 P1
# 2 2021-01-25 20:00 I2
# 3 2021-01-25 20:00 SP1
# 4 2021-01-25 20:00 SP2
# Most recent match date: 2021-01-25
# Most recent match dates (desc) for the top 5 leagues:
# # A tibble: 5 x 2
# Div most_recent_match
# <fct> <date>
# 1 SP1 2021-01-25
# 2 D1 2021-01-24
# 3 F1 2021-01-24
# 4 I1 2021-01-24
# 5 E0 2021-01-23
# Most recent match dates (asc) for all leagues:
# # A tibble: 22 x 2
# Div datee
# <fct> <date>
# 1 SC2 2021-01-02
# 2 SC3 2021-01-02
# 3 E0 2021-01-23
# 4 E2 2021-01-23
# 5 E3 2021-01-23
# 6 EC 2021-01-23
# 7 SC0 2021-01-23
# 8 SC1 2021-01-23
# 9 B1 2021-01-24
# 10 D1 2021-01-24
# # … with 12 more rows
The goal of this repo is
football-data.co.uk
The raw data files from football-data
, stored in a local cache (as
pins
), are organized as follows:
# ./data/pins
# └── local
# ├── 1415
# ├── 1516
# ├── 1617
# ├── 1718
# ├── 1819
# ├── 1920
# ├── 2021
# ├── data.txt
# └── data.txt.lock
| File | Purpose |
| ------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| run.sh
| Shell script to run run.R
in a persistent background process. Works on Unix-like systems. |
| run.R
| R script to run tar_make()
or tar_make_clustermq()
(uncomment the function of your choice.) |
| _targets.R
| The special R script that declares the targets
pipeline. See tar_script()
for details. |
| R/functions.R
| An R script with user-defined functions. |
| README.Rmd
| An R Markdown report summarizing the results of the analysis. For more information on how to include R Markdown reports as reproducible components of the pipeline, see the tar_render()
function from the tarchetypes
package and the literate programming chapter of the manual. |
targets
pipeline by either running
run.R
or
run.sh
.
(The latter is for Unix-like systems only).README.md
file.targets
skip steps that are already up to date.targets
R package
manages the workflow.pins
package to speed up data updates.# pins/0.4.5/092094df6204c08a37248b1d5202a306/pins/views/data/index.html
# Registering
library(pins)
library(reticulate)
# get a _local_ set of csvs for one season (2014-15) and all leagues
pin_get("1415", board = "local") %>% str(max.level = 1)
# chr [1:22] "/home/runner/.cache/pins/local/1415/B1.csv" ...
# get a _remote_ set of csvs for one season (2014-15) and all leagues
board_register("github",
repo = "JohnGavin/fdata",
branch = 'master',
# TODO: revert from GITHUB_TOKEN to GITHUB_PAT
token = Sys.getenv(c('GITHUB_PAT', 'GITHUB_TOKEN')[2])
)
# get the 2014-15 season for all leagues.
pin_get("data/pins/local/1415", board = "github") %>%
map_dfr(read_csv, col_types = cols()) %>%
type.convert() %>%
head(c(5, 5))
# # A tibble: 5 x 5
# Div Date HomeTeam AwayTeam FTHG
# <fct> <fct> <fct> <fct> <int>
# 1 B1 25/07/14 Standard Charleroi 3
# 2 B1 26/07/14 Cercle Brugge Gent 0
# 3 B1 26/07/14 Lierse Oostende 2
# 4 B1 26/07/14 Waasland-Beveren Club Brugge 0
# 5 B1 26/07/14 Westerlo Lokeren 1
# pin_info("data/pins/local/1415", board = "github")
# https://raw.githubusercontent.com/JohnGavin/fdata/master/data/pins/1415/data.txt
# pin_find("1415", board = "github")
# Sharing
# Once your collaborators gain access to the repo, they can follow the same steps to register the same GitHub board to allow them to upload and download pins with ease.
# Pinning
# pin(iris, description = "The iris data set", board = "github")
# pin(mtcars, description = "The motor trend cars data set", board = "github")
# Discovering
# pin_get("iris", board = "github")
pin_find("football", board = "github")
# # A tibble: 0 x 4
# # … with 4 variables: name <chr>, description <chr>, type <chr>, board <chr>
pin_find("odds", board = "github", extended = TRUE)
# # A tibble: 0 x 4
# # … with 4 variables: name <chr>, description <chr>, type <chr>, board <chr>
# pin_info("mtcars", board = "github")
# GitHub repo only supports files under 25MB in size
# (100MB in theory but
# there is additional overhead when using the GitHub API).
# to support large files, pins makes use of GitHub release files.
# pins will create a new GitHub release file for that particular pin
# The only noticeable change is new releases being created in your repo
# board_register("rsconnect", server = "{{server_name}}")
# Retrieve Pin
# {{retrieve_pin}}
# [1] "Tue Jan 26 20:11:26 2021"
Session info
# R version 4.0.3 (2020-10-10)
# Platform: x86_64-pc-linux-gnu (64-bit)
# Running under: Ubuntu 18.04.5 LTS
#
# Matrix products: default
# BLAS: /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
# LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
#
# locale:
# [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
# [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
# [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
# [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#
# attached base packages:
# [1] stats graphics grDevices datasets utils methods base
#
# other attached packages:
# [1] magrittr_2.0.1 glue_1.4.2 pins_0.4.5 gt_0.2.2
# [5] lubridate_1.7.9.2 reticulate_1.18 details_0.2.1 visdat_0.5.3
# [9] fs_1.5.0 ggplot2_3.3.3 tidyr_1.1.2 purrr_0.3.4
# [13] stringr_1.4.0 rmarkdown_2.6 tibble_3.0.5 readr_1.4.0
# [17] dplyr_1.0.3 renv_0.12.5 tarchetypes_0.0.1 targets_0.0.2.9000
#
# loaded via a namespace (and not attached):
# [1] Rcpp_1.0.6 lattice_0.20-41 png_0.1-7 ps_1.5.0
# [5] assertthat_0.2.1 rprojroot_2.0.2 digest_0.6.27 utf8_1.1.4
# [9] R6_2.5.0 backports_1.2.1 evaluate_0.14 highr_0.8
# [13] httr_1.4.2 pillar_1.4.7 rlang_0.4.10 curl_4.3
# [17] data.table_1.13.6 callr_3.5.1 Matrix_1.3-2 desc_1.2.0
# [21] labeling_0.4.2 igraph_1.2.6 munsell_0.5.0 compiler_4.0.3
# [25] xfun_0.20 pkgconfig_2.0.3 clipr_0.7.1 htmltools_0.5.1
# [29] tidyselect_1.1.0 codetools_0.2-18 fansi_0.4.2 crayon_1.3.4
# [33] withr_2.4.0 rappdirs_0.3.1 grid_4.0.3 jsonlite_1.7.2
# [37] gtable_0.3.0 lifecycle_0.2.0 scales_1.1.1 zip_2.1.1
# [41] cli_2.2.0 stringi_1.5.3 farver_2.0.3 xml2_1.3.2
# [45] ellipsis_0.3.1 filelock_1.0.2 generics_0.1.0 vctrs_0.3.6
# [49] tools_4.0.3 hms_1.0.0 processx_3.4.5 yaml_2.2.1
# [53] colorspace_2.0-0 knitr_1.30
# tar_glimpse needs visnetwork package
# tar_glimpse() # (allow = starts_with('h'))
# alt cmd g how targets co-depend - relationships via static code analysis
# details(summary = 'tar_glimpse plots', imgur = FALSE)
# tar_visnetwork()
# %>% print() %>% details(summary = 'tar_glimpse plots', imgur = FALSE)
# [1] "board_name" "chk_data_recent" "chk_datee_na"
# [4] "create_board" "divs_dates_recent" "fdata"
# [7] "fdata_change" "fp_cache" "gg_dat_miss"
# [10] "pin_fdata" "pins_path" "raw_csv_list"
# [13] "README" "season_starts" "top_divs_dates_recent"
codetools::findGlobals()
.Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.