Nothing
#' @rdname retrosheet_data
#' @title **Get, Parse, and Format Retrosheet Event and Roster Files**
#' @description This function requires the use of the [**`Chadwick CLI`**](https://github.com/chadwickbureau/chadwick/releases). Follow
#' the directions at the repository for installation of the CLI release for your platform.
#' Specifically from the **`Chadwick CLI`** tools, this function requires the **`cwevent`** application to be available from the command line.
#' For unix platform users: the ```retrosheet_data()``` function uses the ```system()``` interface under the hood.
#' For Windows and other platform users: the ```retrosheet_data()``` function interacts with the **`cwevent`** application
#' using the ```shell()``` interface under the hood.
#' @details
#' ```r
#' retrosheet_data(path_to_directory = NULL,
#' years_to_acquire = most_recent_mlb_season()-1,
#' sequence_years = FALSE)
#' ```
#' @param path_to_directory (default: NULL) A file path that if set, either:
#' 1) creates a new directory, or
#' 2) uses the path to an existing directory
#' @param years_to_acquire (format: YYYY) The seasons to collect. Single, multiple, and
#' sequential years can be passed. If passing multiple years, enclose in a
#' vector (i.e. c(2017,2018)). Defaults to ```most_recent_mlb_season()```.
#' @param sequence_years (logical, default: FALSE): If the seasons passed in the years_to_acquire parameter
#' should be sequenced so that the function returns all years including and
#' between the vector passed, set the argument to TRUE. Defaults to FALSE.
#' @return If `path_to_directory` is not set (default), the process will return a named list
#' of tibbles: 'events' and 'rosters' for each season provided to `years_to_acquire`
#' If `path_to_directory` is set, will also write two csv files to the unzipped directory: 1) a combined csv
#' of the event data for a given year and 2) a combined csv of each team's
#' roster for each year provided to `years_to_acquire`
#' @importFrom dplyr mutate mutate_if
#' @importFrom janitor clean_names
#' @importFrom purrr map
#' @importFrom utils download.file tail unzip write.csv
#' @export
retrosheet_data <- function(path_to_directory = NULL,
years_to_acquire = most_recent_mlb_season()-1,
sequence_years = FALSE){
# create a record for the starting working directory
oldwd <- getwd()
# reset to starting working directory
on.exit(setwd(oldwd))
path_temp_dir <- tempdir()
if(!is.null(path_to_directory)) {
# create folders at directory path specified
ifelse(!dir.exists(path_to_directory),
dir.create(path_to_directory), FALSE)
ifelse(!dir.exists(paste0(path_to_directory, "/download.folder")),
dir.create(paste0(path_to_directory, "/download.folder")), FALSE)
ifelse(!dir.exists(paste0(path_to_directory, "/download.folder/unzipped")),
dir.create(paste0(path_to_directory, "/download.folder/unzipped")), FALSE)
ifelse(!dir.exists(paste0(path_to_directory, "/download.folder/zipped")),
dir.create(paste0(path_to_directory, "/download.folder/zipped")), FALSE)
# setwd
setwd(path_to_directory)
} else {
# create folders at directory path of temporary directory
ifelse(!dir.exists(paste0(path_temp_dir, "/download.folder")),
dir.create(paste0(path_temp_dir, "/download.folder")), FALSE)
ifelse(!dir.exists(paste0(path_temp_dir, "/download.folder/unzipped")),
dir.create(paste0(path_temp_dir, "/download.folder/unzipped")), FALSE)
ifelse(!dir.exists(paste0(path_temp_dir, "/download.folder/zipped")),
dir.create(paste0(path_temp_dir, "/download.folder/zipped")), FALSE)
# setwd to temporary directory path
setwd(path_temp_dir)
path_to_directory <- path_temp_dir
}
fields <- csv_from_url("https://raw.githubusercontent.com/maxtoki/baseball_R/master/data/fields.csv", encoding ="UTF-8")
data.table::fwrite(fields, file = paste0(path_to_directory, "/fields.csv"))
if(sequence_years == FALSE) {
years <- years_to_acquire
} else {
years <-seq(years_to_acquire[1], years_to_acquire[2],1)
}
if(identical(Sys.which("cwevent")[["cwevent"]], "")){
cli::cli_abort("Please install the Chadwick CLI program from:\n https://github.com/chadwickbureau/chadwick/releases")
}
rs_season <- c(purrr::map(.x=years,
function(.x){
acquire_parse_restrosheet_event(season = .x,
wd = path_to_directory)
}))
names(rs_season) <- years
return(rs_season)
}
#' @rdname get_retrosheet_data
#' @title **(legacy) Get, Parse, and Format Retrosheet Event and Roster Files**
#' @inheritParams retrosheet_data
#' @return If `path_to_directory` is not set (default), the process will return a named list
#' of tibbles: 'events' and 'rosters' for each season provided to `years_to_acquire`
#' If `path_to_directory` is set, will also write two csv files to the unzipped directory: 1) a combined csv
#' of the event data for a given year and 2) a combined csv of each team's
#' roster for each year provided to `years_to_acquire`
#' @keywords legacy
#' @export
get_retrosheet_data <- retrosheet_data
acquire_parse_restrosheet_event <- function(season, wd){
## This code was adapted from Jim Albert
## source: https://gist.github.com/bayesball/8892981
## Asssumes you have downloaded and installed The Chadwick Files
## source: https://sourceforge.net/projects/chadwick/files/
# ADJUSTED FOR MAC -- function will work for WINDOWS and MAC
# download, unzip, append retrosheet data
# assume current directory has a folder download.folder
# download.folder has two subfolders unzipped and zipped
# program cwevent.exe is in unzipped folder (for windows)
# create a record for the starting working directory
oldwd <- getwd()
# reset to starting working directory
on.exit(setwd(oldwd))
setwd(wd)
download.retrosheet(wd, season)
unzip.retrosheet(season)
create.csv.file(wd, season)
create.csv.roster(wd, season)
cleanup(wd)
events <- data.table::fread(paste0(wd,"/download.folder/unzipped/all",season,".csv")) %>%
make_baseballr_data("Retrosheet MLB events data from retrosheet.org",Sys.time())
rosters <- data.table::fread(paste0(wd,"/download.folder/unzipped/roster",season,".csv")) %>%
make_baseballr_data("Retrosheet MLB rosters data from retrosheet.org",Sys.time())
retrosheet_season <- c(list(events),list(rosters))
names(retrosheet_season) <- c("events","rosters")
return(retrosheet_season)
}
download.retrosheet <- function(wd, season){
# get zip file from retrosheet website
download.file(
url=paste0("https://www.retrosheet.org/events/", season, "eve.zip")
, destfile=paste0(wd, "/download.folder", "/zipped/",
season, "eve.zip")
)
}
unzip.retrosheet <- function(season){
#unzip retrosheet files
unzip(paste0("download.folder", "/zipped/", season, "eve.zip"),
exdir=paste0("download.folder", "/unzipped"))
}
create.csv.file <- function(wd, year){
# http://chadwick.sourceforge.net/doc/cwevent.html#cwtools-cwevent
# shell("cwevent -y 2000 2000TOR.EVA > 2000TOR.bev")
# create a record for the starting working directory
oldwd <- getwd()
# reset to starting working directory
on.exit(setwd(oldwd))
setwd(paste0(wd, "/download.folder/unzipped"))
character_vars <- c('GAME_ID', 'AWAY_TEAM_ID', 'PITCH_SEQ_TX',
'BAT_ID', 'BAT_HAND_CD', 'RESP_BAT_ID',
'RESP_BAT_HAND_CD', 'PIT_ID', 'PIT_HAND_CD',
'RESP_PIT_ID', 'RESP_PIT_HAND_CD', 'POS2_FLD_ID',
'POS3_FLD_ID', 'POS4_FLD_ID', 'POS5_FLD_ID',
'POS6_FLD_ID', 'POS7_FLD_ID', 'POS8_FLD_ID',
'POS9_FLD_ID', 'BASE1_RUN_ID', 'BASE2_RUN_ID',
'BASE3_RUN_ID', 'EVENT_TX', 'BATTEDBALL_CD',
'BATTEDBALL_LOC_TX', 'ERR1_CD', 'ERR2_CD',
'ERR3_CD', 'RUN1_RESP_PIT_ID', 'RUN2_RESP_PIT_ID',
'RUN3_RESP_PIT_ID', 'REMOVED_FOR_PR_RUN1_ID',
'REMOVED_FOR_PR_RUN2_ID', 'REMOVED_FOR_PH_BAT_ID')
numeric_vars <- c('INN_CT', 'BAT_HOME_ID', 'OUTS_CT',
'BALLS_CT', 'STRIKES_CT', 'AWAY_SCORE_CT',
'HOME_SCORE_CT', 'BAT_FLD_CD', 'BAT_LINEUP_ID',
'EVENT_CD', 'H_FL', 'EVENT_OUTS_CT',
'RBI_CT', 'FLD_CD', 'ERR_CT',
'ERR1_FLD_CD', 'ERR2_FLD_CD', 'ERR3_FLD_CD',
'BAT_DEST_ID', 'RUN1_DEST_ID', 'RUN2_DEST_ID',
'RUN3_DEST_ID', 'BAT_PLAY_TX', 'RUN1_PLAY_TX',
'RUN2_PLAY_TX', 'REMOVED_FOR_PH_BAT_FLD_CD',
'PO1_FLD_CD', 'PO2_FLD_CD', 'PO3_FLD_CD',
'ASS1_FLD_CD', 'ASS2_FLD_CD', 'ASS3_FLD_CD',
'ASS4_FLD_CD', 'ASS5_FLD_CD', 'EVENT_ID', 'year')
logical_vars <- c('LEADOFF_FL', 'PH_FL', 'BAT_EVENT_FL',
'AB_FL', 'SH_FL', 'SF_FL', 'DP_FL',
'TP_FL', 'WP_FL', 'PB_FL', 'BUNT_FL',
'FOUL_FL', 'RUN3_PLAY_TX', 'RUN1_SB_FL',
'RUN2_SB_FL', 'RUN3_SB_FL', 'RUN1_CS_FL',
'RUN2_CS_FL', 'RUN3_CS_FL', 'RUN1_PK_FL',
'RUN2_PK_FL', 'RUN3_PK_FL', 'GAME_NEW_FL',
'GAME_END_FL', 'PR_RUN1_FL', 'PR_RUN2_FL',
'PR_RUN3_FL', 'REMOVED_FOR_PR_RUN3_ID')
if (.Platform$OS.type == "unix"){
system(paste(paste("cwevent -y", year, "-f 0-96 -q"),
paste(year,"*.EV*",sep=""),
paste("> all", year, ".csv", sep="")))
} else {
shell(paste(paste("cwevent -y", year, "-f 0-96 -q"),
paste(year,"*.EV*",sep=""),
paste("> all", year, ".csv", sep="")))
}
fields <- data.table::fread(paste0(wd, "/fields.csv"))
payload <- data.table::fread(paste0("all", year, ".csv"), header = FALSE)
names(payload) <- fields$Header
payload <- payload %>%
dplyr::mutate(year = year)
suppressWarnings(
payload <- payload %>%
dplyr::mutate_if(names(payload) %in% character_vars, as.character) %>%
dplyr::mutate_if(names(payload) %in% numeric_vars, as.numeric) %>%
dplyr::mutate_if(names(payload) %in% logical_vars, as.logical)
)
payload <- payload %>%
janitor::clean_names()
data.table::fwrite(payload, paste0(wd, "/download.folder/unzipped/all", year, ".csv"))
}
create.csv.roster <- function(wd, year){
# creates a csv file of the rosters
filenames <- list.files(path = paste0(wd, "/download.folder/unzipped/"))
filenames.roster <-
subset(filenames, substr(filenames, 4, 11) == paste0(year,".ROS"))
R <- do.call("rbind", lapply(filenames.roster, function(x){
read.csv2(wd = wd, file = x)
}))
names(R)[1:6] = c("Player.ID", "Last.Name", "First.Name",
"Bats", "Pitches", "Team")
R <- R %>%
dplyr::mutate(year = year) %>%
janitor::clean_names()
data.table::fwrite(R, file = paste0(wd, "/download.folder/unzipped/roster",
year, ".csv"))
}
read.csv2 <- function(wd, file){
data.table::fread(file = paste0(wd,
"/download.folder/unzipped/", file),
header = FALSE)
}
cleanup <- function(wd){
# create a record for the starting working directory
oldwd <- getwd()
# reset to starting working directory
on.exit(setwd(oldwd))
# removes retrosheet files not needed from unzipped directory
setwd(paste0(wd, "/download.folder/unzipped"))
if (.Platform$OS.type == "unix") {
system("rm *.EDN")
system("rm *.EDA")
system("rm *.EVN")
system("rm *.EVA")
system("rm *.ROS")
system("rm TEAM*")
} else {
shell("if exist *.EDN del *.EDN")
shell("if exist *.EDA del *.EDA")
shell("if exist *.EVN del *.EVN")
shell("if exist *.EVA del *.EVA")
shell("if exist *.ROS del *.ROS")
shell("if exist TEAM* del TEAM*")
}
# removes retrosheet files not needed from unzipped directory
setwd(paste0(wd, "/download.folder/zipped"))
if (.Platform$OS.type == "unix") {
system("rm *.zip")
} else {
shell("del *.zip")
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.