knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#",
  echo=FALSE, 
  results = 'hide',
  warning = FALSE,
  message = FALSE
  #   R.options = list(width = 60)
)
# TODO: install shiny to see if displays missing tables below.
pkgs <- c('glue', 'dplyr', 'targets', 'fs', 'magrittr',
  'gt', 'details', 'purrr', 'stringr', 'lubridate', 'reticulate',
  'tidyr', 'readr', 'rmarkdown', # 'visNetwork',
  'pins', 'reticulate' # 'dygraphs', 'codetools'
) # logger
 purrr::walk(pkgs, library, character.only = TRUE, quiet = TRUE,
    logical.return = TRUE, warn.conflicts = TRUE)
# TODO: add logger
log_info <- print

Most recent dates - summary

tar_load(fdata)
# fdata %>% str(max.level = 1)
fdata %>% head(c(4, 3))

Paged table

paged_table(fdata %>% head(c(10, 10)))
#library(dygraphs)
#dygraph(ldeaths)

Missing data

rm(fdata)
# plot of (sampled) missing football-data data (fdata)
tar_read(gg_dat_miss)

Most recent dates - details

tar_load(fdata)
log_info(glue("Most recent match date: {fdata$datee %>% max()}"))

log_info(glue("\nMost recent match dates (desc) for the top 5 leagues:"))
tar_read(top_divs_dates_recent)

log_info(glue("\nMost recent match dates (asc) for all leagues:"))
tar_load(divs_dates_recent)
divs_dates_recent

Objectives

The goal of this repo is

Details {.tabset}

File structure

The raw data files from football-data , stored in a local cache (as pins), are organized as follows:

tar_load(fp_cache)
fp_cache %>% fs::dir_tree(recurse = 1) 

Files purpose

File | Purpose ---|--- run.sh | Shell script to run run.R in a persistent background process. Works on Unix-like systems. run.R | R script to run tar_make() or tar_make_clustermq() (uncomment the function of your choice.) _targets.R | The special R script that declares the targets pipeline. See tar_script() for details. R/functions.R | An R script with user-defined functions. README.Rmd | An R Markdown report summarizing the results of the analysis. For more information on how to include R Markdown reports as reproducible components of the pipeline, see the tar_render() function from the tarchetypes package and the literate programming chapter of the manual.

How to run

  1. Run the targets pipeline by either running run.R or run.sh. (The latter is for Unix-like systems only).
  2. View the validation results in the output README.md file.
  3. Make changes to the R code, rerun the pipeline, and watch targets skip steps that are already up to date.

Pipeline

How to access

Python

```{python access1, eval=FALSE}

reticulate::py_install('pins')

(bash) conda env list ; source activate ... ; pip install pins

library(reticulate)

import pins pins.pin_get("1415", board = "local")

pins.board_register("github", repo = "JohnGavin/fdata", branch = 'master')

Retrieve Pin

pins.pin_get("{{account_name}}/{{pin_name}}", board = "github") pins.pin_get("{{account_name}}/{{pin_name}}", board = "rsconnect")

### R 
+ TODO: add examples
```r
# pins/0.4.5/092094df6204c08a37248b1d5202a306/pins/views/data/index.html
# Registering
library(pins)
library(reticulate)
# get a _local_ set of csvs for one season (2014-15) and all leagues
pin_get("1415", board = "local") %>% str(max.level = 1)

# get a _remote_ set of csvs for one season (2014-15) and all leagues
board_register("github", 
  repo = "JohnGavin/fdata", 
  branch = 'master', 
  # TODO: revert from GITHUB_TOKEN to GITHUB_PAT
  token = Sys.getenv(c('GITHUB_PAT', 'GITHUB_TOKEN')[2])
)
# get the 2014-15 season for all leagues.
pin_get("data/pins/local/1415", board = "github") %>% 
  map_dfr(read_csv, col_types = cols()) %>% 
  type.convert() %>% 
  head(c(5, 5))
# pin_info("data/pins/local/1415", board = "github")
# https://raw.githubusercontent.com/JohnGavin/fdata/master/data/pins/1415/data.txt
# pin_find("1415", board = "github")

# Sharing
# Once your collaborators gain access to the repo, they can follow the same steps to register the same GitHub board to allow them to upload and download pins with ease.

# Pinning
# pin(iris, description = "The iris data set", board = "github")
# pin(mtcars, description = "The motor trend cars data set", board = "github")

# Discovering
# pin_get("iris", board = "github")
pin_find("football", board = "github")
pin_find("odds", board = "github", extended = TRUE)
# pin_info("mtcars", board = "github")

# GitHub repo only supports files under 25MB in size 
#   (100MB in theory but 
#   there is additional overhead when using the GitHub API). 
# to support large files, pins makes use of GitHub release files. 
#   pins will create a new GitHub release file for that particular pin
#   The only noticeable change is new releases being created in your repo

# board_register("rsconnect", server = "{{server_name}}")
# Retrieve Pin
# {{retrieve_pin}}

Notes {.appendix .tabset .tabset-pills}

References


R information

# log_success("End of README.Rmd")
date()

Session info

sessionInfo()
# ( fpp <- rstudioapi::getSourceEditorContext()$path ) 
# script_fn <- sys.frame(1)$ofile %>% basename()
# log_success("End of {script_fn}")

# https://stackoverflow.com/questions/1815606/determine-path-of-the-executing-script
# install.packages("arrow")
# devtools::install_github('jerryzhujian9/ezR', force = TRUE)
# tmp <- ezR::ez.csf()
# log_success("End of file {basename(tmp)}\n\r\tFolder: {dirname(tmp)}")


TODOs {.active}


README parameters

#  fig.align = 'left', 
list_to_tibble <- function(lst, rownames = 'param' )
  lst %>% unclass %>% purrr::map_chr(as.character) %>% 
  as_tibble(rownames = rownames)

# params
# log_info("Script parameters (x{length(params)}) printed.")
params %>% 
  list_to_tibble() %>% 
  mutate(param = param %>% str_replace_all('_', ' ')) %>% 
  # 
  # https://malco.io/2020/05/16/replicating-an-nyt-table-of-swedish-covid-deaths-with-gt/
  gt(rowname_col = 'param') %>% 
    cols_align(align = c("auto", "left", "center", "right")[4], columns = 'param') %>% 
     tab_stubhead(label = 'Rmarkdown parameters') %>% 
      tab_style(
        style = cell_text(size = c("xx-small", "x-small", "small", "medium", "large", "x-large", "xx-large")[2], 
          stretch = c("ultra-condensed", "extra-condensed", "condensed", "semi-condensed", "normal", "semi-expanded", "expanded", "extra-expanded", "ultra-expanded")[3]),
        locations = cells_stub() # cells_stubhead()
      ) %>% 
      tab_style(
        style = cell_text(size = c("xx-small", "x-small", "small", "medium", "large", "x-large", "xx-large")[2], 
          stretch = c("ultra-condensed", "extra-condensed", "condensed", "semi-condensed", "normal", "semi-expanded", "expanded", "extra-expanded", "ultra-expanded")[3]),
        locations = cells_body() # cells_stubhead()
      ) %>% 
      cols_width(
        vars(param) ~ px(250),
        # ends_with("r") ~ px(100),
        # starts_with("date") ~ px(200),
        everything() ~ px(450)
      ) %>% 
      # cols_hide(vars(ret_typ_mtrc)) %>% 
      tab_header(
        title = glue::glue("Rmarkdown yaml script parameters")
        , subtitle = glue::glue("Passed in via command line.")
      ) %>%
      # fmt_date(
      #   columns = vars(date),
      #   date_style = 3
      # ) %>%
      # fmt_currency(
      #   columns = vars(open, high, low, close),
      #   currency = "USD"
      # ) %>%
      # fmt_number( 
      #   columns = vars(sharpe_pa),
      #   # TODO: what is suffixing?
      #   suffixing = TRUE
      # ) %>% 
      tab_options(
        column_labels.font.size = "x-smaller",
        table.font.size = "x-smaller",
        data_row.padding = px(3)
      ) %>% 
      tab_source_note(
        source_note = "TODO: Remove params moved to drake plan."
      ) # %>% 
      # tab_spanner(
      #   label = "Sharpe",
      #   columns = starts_with('sharpe_pa')
      # ) %>%
      # cols_move_to_start( # cols_move_to_end
      #   columns = starts_with('sharpe_pa')
      # ) # %>%
      # cols_label(
      #   .list = c(
      #     cols_label_nms_list(top_n_abs_shrp_strt, 'sharpe_pa'),
      #     cols_label_nms_list(top_n_abs_shrp_strt, 'max_dd_sd_pa')),
        # no harm to include invalide col names as examples.
      #   Ozone = html("Ozone,<br>ppbV"),
      # )

Code metrics {.tabset .tabset-pills}

Outdated

tar_outdated() %>% details::details(summary = 'tar_outdated ')

Validate

tar_validate() %>% details::details(summary = 'tar_validate ')

Glimpse

# tar_glimpse needs visnetwork package
# tar_glimpse() # (allow = starts_with('h'))
# alt cmd g how targets co-depend - relationships via static code analysis
# details(summary = 'tar_glimpse plots', imgur = FALSE)

Network {.active}

# tar_visnetwork() 
# %>% print() %>% details(summary = 'tar_glimpse plots', imgur = FALSE)

Minor metrics {.tabset .tabset-pills}

Targets list {.active}

tar_objects() # List saved targets

Manifest

tar_manifest() %>% details::details(summary = 'tar_manifest ')

Meta

# tar_meta(names = starts_with("churn"), fields = path) 
tar_meta(fields = c('name', 'seconds', 'warnings', 'error', 'type', 'format', 'path')) %>% arrange(error, warnings, desc(seconds))
tar_meta(fields = 'warnings') %>% drop_na() %>% pull()

Relationships

# dependency relationships using `codetools::findGlobals()`
# TODO: add codetools to pkgs?
library(codetools)
codetools::findGlobals(codetools::findGlobals, merge = FALSE) # find dependency relationships for yourself

{-}



JohnGavin/fdata documentation built on Jan. 29, 2021, 1:38 p.m.