knitr::opts_chunk$set( collapse = TRUE, comment = "#", echo=FALSE, results = 'hide', warning = FALSE, message = FALSE # R.options = list(width = 60) )
# TODO: install shiny to see if displays missing tables below. pkgs <- c('glue', 'dplyr', 'targets', 'fs', 'magrittr', 'gt', 'details', 'purrr', 'stringr', 'lubridate', 'reticulate', 'tidyr', 'readr', 'rmarkdown', # 'visNetwork', 'pins', 'reticulate' # 'dygraphs', 'codetools' ) # logger purrr::walk(pkgs, library, character.only = TRUE, quiet = TRUE, logical.return = TRUE, warn.conflicts = TRUE) # TODO: add logger log_info <- print
tar_load(fdata) # fdata %>% str(max.level = 1) fdata %>% head(c(4, 3))
r prettyNum(nrow(fdata), big.mark=',')
matches and
r prettyNum(ncol(fdata), big.mark=',')
variables.r prettyNum(length(fdata$Div %>% unique), big.mark=',')
leagues.r fdata$datee %>% max
r fdata$datee %>% min
r Sys.time()
paged_table(fdata %>% head(c(10, 10)))
#library(dygraphs) #dygraph(ldeaths)
rm(fdata) # plot of (sampled) missing football-data data (fdata) tar_read(gg_dat_miss)
tar_load(fdata) log_info(glue("Most recent match date: {fdata$datee %>% max()}")) log_info(glue("\nMost recent match dates (desc) for the top 5 leagues:")) tar_read(top_divs_dates_recent) log_info(glue("\nMost recent match dates (asc) for all leagues:")) tar_load(divs_dates_recent) divs_dates_recent
The goal of this repo is
football-data.co.uk
The raw data files from football-data
, stored in a local cache (as pins
), are organized as follows:
tar_load(fp_cache) fp_cache %>% fs::dir_tree(recurse = 1)
File | Purpose
---|---
run.sh
| Shell script to run run.R
in a persistent background process. Works on Unix-like systems.
run.R
| R script to run tar_make()
or tar_make_clustermq()
(uncomment the function of your choice.)
_targets.R
| The special R script that declares the targets
pipeline. See tar_script()
for details.
R/functions.R
| An R script with user-defined functions.
README.Rmd
| An R Markdown report summarizing the results of the analysis. For more information on how to include R Markdown reports as reproducible components of the pipeline, see the tar_render()
function from the tarchetypes
package and the literate programming chapter of the manual.
targets
pipeline by either running run.R
or run.sh
. (The latter is for Unix-like systems only).README.md
file.targets
skip steps that are already up to date.targets
R package manages the workflow. pins
package to speed up data updates.```{python access1, eval=FALSE}
import pins pins.pin_get("1415", board = "local")
pins.board_register("github", repo = "JohnGavin/fdata", branch = 'master')
pins.pin_get("{{account_name}}/{{pin_name}}", board = "github") pins.pin_get("{{account_name}}/{{pin_name}}", board = "rsconnect")
### R + TODO: add examples ```r # pins/0.4.5/092094df6204c08a37248b1d5202a306/pins/views/data/index.html # Registering library(pins) library(reticulate) # get a _local_ set of csvs for one season (2014-15) and all leagues pin_get("1415", board = "local") %>% str(max.level = 1) # get a _remote_ set of csvs for one season (2014-15) and all leagues board_register("github", repo = "JohnGavin/fdata", branch = 'master', # TODO: revert from GITHUB_TOKEN to GITHUB_PAT token = Sys.getenv(c('GITHUB_PAT', 'GITHUB_TOKEN')[2]) ) # get the 2014-15 season for all leagues. pin_get("data/pins/local/1415", board = "github") %>% map_dfr(read_csv, col_types = cols()) %>% type.convert() %>% head(c(5, 5)) # pin_info("data/pins/local/1415", board = "github") # https://raw.githubusercontent.com/JohnGavin/fdata/master/data/pins/1415/data.txt # pin_find("1415", board = "github") # Sharing # Once your collaborators gain access to the repo, they can follow the same steps to register the same GitHub board to allow them to upload and download pins with ease. # Pinning # pin(iris, description = "The iris data set", board = "github") # pin(mtcars, description = "The motor trend cars data set", board = "github") # Discovering # pin_get("iris", board = "github") pin_find("football", board = "github") pin_find("odds", board = "github", extended = TRUE) # pin_info("mtcars", board = "github") # GitHub repo only supports files under 25MB in size # (100MB in theory but # there is additional overhead when using the GitHub API). # to support large files, pins makes use of GitHub release files. # pins will create a new GitHub release file for that particular pin # The only noticeable change is new releases being created in your repo # board_register("rsconnect", server = "{{server_name}}") # Retrieve Pin # {{retrieve_pin}}
# log_success("End of README.Rmd") date()
Session info
sessionInfo() # ( fpp <- rstudioapi::getSourceEditorContext()$path ) # script_fn <- sys.frame(1)$ofile %>% basename() # log_success("End of {script_fn}") # https://stackoverflow.com/questions/1815606/determine-path-of-the-executing-script # install.packages("arrow") # devtools::install_github('jerryzhujian9/ezR', force = TRUE) # tmp <- ezR::ez.csf() # log_success("End of file {basename(tmp)}\n\r\tFolder: {dirname(tmp)}")
# fig.align = 'left', list_to_tibble <- function(lst, rownames = 'param' ) lst %>% unclass %>% purrr::map_chr(as.character) %>% as_tibble(rownames = rownames) # params # log_info("Script parameters (x{length(params)}) printed.") params %>% list_to_tibble() %>% mutate(param = param %>% str_replace_all('_', ' ')) %>% # # https://malco.io/2020/05/16/replicating-an-nyt-table-of-swedish-covid-deaths-with-gt/ gt(rowname_col = 'param') %>% cols_align(align = c("auto", "left", "center", "right")[4], columns = 'param') %>% tab_stubhead(label = 'Rmarkdown parameters') %>% tab_style( style = cell_text(size = c("xx-small", "x-small", "small", "medium", "large", "x-large", "xx-large")[2], stretch = c("ultra-condensed", "extra-condensed", "condensed", "semi-condensed", "normal", "semi-expanded", "expanded", "extra-expanded", "ultra-expanded")[3]), locations = cells_stub() # cells_stubhead() ) %>% tab_style( style = cell_text(size = c("xx-small", "x-small", "small", "medium", "large", "x-large", "xx-large")[2], stretch = c("ultra-condensed", "extra-condensed", "condensed", "semi-condensed", "normal", "semi-expanded", "expanded", "extra-expanded", "ultra-expanded")[3]), locations = cells_body() # cells_stubhead() ) %>% cols_width( vars(param) ~ px(250), # ends_with("r") ~ px(100), # starts_with("date") ~ px(200), everything() ~ px(450) ) %>% # cols_hide(vars(ret_typ_mtrc)) %>% tab_header( title = glue::glue("Rmarkdown yaml script parameters") , subtitle = glue::glue("Passed in via command line.") ) %>% # fmt_date( # columns = vars(date), # date_style = 3 # ) %>% # fmt_currency( # columns = vars(open, high, low, close), # currency = "USD" # ) %>% # fmt_number( # columns = vars(sharpe_pa), # # TODO: what is suffixing? # suffixing = TRUE # ) %>% tab_options( column_labels.font.size = "x-smaller", table.font.size = "x-smaller", data_row.padding = px(3) ) %>% tab_source_note( source_note = "TODO: Remove params moved to drake plan." ) # %>% # tab_spanner( # label = "Sharpe", # columns = starts_with('sharpe_pa') # ) %>% # cols_move_to_start( # cols_move_to_end # columns = starts_with('sharpe_pa') # ) # %>% # cols_label( # .list = c( # cols_label_nms_list(top_n_abs_shrp_strt, 'sharpe_pa'), # cols_label_nms_list(top_n_abs_shrp_strt, 'max_dd_sd_pa')), # no harm to include invalide col names as examples. # Ozone = html("Ozone,<br>ppbV"), # )
tar_outdated() %>% details::details(summary = 'tar_outdated ')
tar_validate() %>% details::details(summary = 'tar_validate ')
# tar_glimpse needs visnetwork package # tar_glimpse() # (allow = starts_with('h')) # alt cmd g how targets co-depend - relationships via static code analysis # details(summary = 'tar_glimpse plots', imgur = FALSE)
# tar_visnetwork() # %>% print() %>% details(summary = 'tar_glimpse plots', imgur = FALSE)
tar_objects() # List saved targets
tar_manifest() %>% details::details(summary = 'tar_manifest ')
# tar_meta(names = starts_with("churn"), fields = path) tar_meta(fields = c('name', 'seconds', 'warnings', 'error', 'type', 'format', 'path')) %>% arrange(error, warnings, desc(seconds)) tar_meta(fields = 'warnings') %>% drop_na() %>% pull()
codetools::findGlobals()
.# dependency relationships using `codetools::findGlobals()` # TODO: add codetools to pkgs? library(codetools) codetools::findGlobals(codetools::findGlobals, merge = FALSE) # find dependency relationships for yourself
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.