#' Formats swimming and diving data read with \code{read_results} into a data
#' frame
#'
#' Takes the output of \code{read_results} and cleans it, yielding a data frame
#' of swimming (and diving) results
#'
#' @importFrom dplyr mutate
#' @importFrom dplyr lag
#' @importFrom dplyr case_when
#' @importFrom dplyr select
#' @importFrom dplyr arrange
#' @importFrom dplyr filter
#' @importFrom dplyr bind_rows
#' @importFrom dplyr everything
#' @importFrom dplyr pull
#' @importFrom dplyr select
#' @importFrom dplyr starts_with
#' @importFrom stringr str_replace_all
#' @importFrom stringr str_extract
#' @importFrom stringr str_split
#' @importFrom stringr str_detect
#' @importFrom stringr str_length
#' @importFrom stringr str_sort
#' @importFrom purrr map_lgl
#' @importFrom purrr map
#' @importFrom stats setNames
#'
#' @param file output from \code{read_results}
#' @param avoid a list of strings. Rows in \code{file} containing these strings
#' will not be included. For example "Pool:", often used to label pool
#' records, could be passed to \code{avoid}. The default is
#' \code{avoid_default}, which contains many strings similar to "Pool:", such
#' as "STATE:" and "Qual:". Users can supply their own lists to \code{avoid}.
#' \code{avoid} is handled before \code{typo} and \code{replacement}.
#' @param typo a list of strings that are typos in the original results.
#' \code{swim_parse} is particularly sensitive to accidental double spaces, so
#' "Central High School", with two spaces between "Central" and "High" is a
#' problem, which can be fixed. Pass "Central High School" to \code{typo}.
#' Unexpected commas as also an issue, for example "Texas, University of"
#' should be fixed using \code{typo} and \code{replacement}
#' @param replacement a list of fixes for the strings in \code{typo}. Here one
#' could pass "Central High School" (one space between "Central" and "High")
#' and "Texas" to \code{replacement} fix the issues described in \code{typo}
#' @param format_results should the results be formatted for analysis (special
#' strings like \code{"DQ"} replaced with \code{NA}, \code{Finals} as
#' definitive column)? Default is \code{TRUE}
#' @param splits either \code{TRUE} or the default, \code{FALSE} - should
#' \code{swim_parse} attempt to include splits.
#' @param split_length either \code{25} or the default, \code{50}, the length of
#' pool at which splits are recorded. Not all results are internally
#' consistent on this issue - some have races with splits by 50 and other
#' races with splits by 25.
#' @param relay_swimmers either \code{TRUE} or the default, \code{FALSE} -
#' should relay swimmers be reported. Relay swimmers are reported in separate
#' columns named \code{Relay_Swimmer_1} etc.
#' @return returns a data frame with columns \code{Name}, \code{Place},
#' \code{Age}, \code{Team}, \code{Prelims}, \code{Finals},
#' \code{Points}, \code{Event} & \code{DQ}. Note all swims will have a
#' \code{Finals}, even if that time was actually swam in the prelims
#' (i.e. a swimmer did not qualify for finals). This is so that final results
#' for an event can be generated from just one column.
#'
#' @examples \dontrun{
#' swim_parse(read_results("http://www.nyhsswim.com/Results/Boys/2008/NYS/Single.htm", node = "pre"),
#' typo = c("-1NORTH ROCKL"), replacement = c("1-NORTH ROCKL"),
#' splits = TRUE,
#' relay_swimmers = TRUE)
#' }
#' \dontrun{
#' swim_parse(read_results("inst/extdata/Texas-Florida-Indiana.pdf"),
#' typo = c("Indiana University", ", University of"), replacement = c("Indiana University", ""),
#' splits = TRUE,
#' relay_swimmers = TRUE)
#' }
#' @seealso \code{swim_parse} must be run on the output of
#' \code{\link{read_results}}
Swim_Parse <-
function(file,
avoid = NULL,
typo = typo_default,
replacement = replacement_default,
format_results = TRUE,
splits = FALSE,
split_length = 50,
relay_swimmers = FALSE) {
# file <-
# read_results(
# "https://raw.githubusercontent.com/gpilgrim2670/Pilgrim_Data/master/Splash/Glenmark_Senior_Nationals_2019.pdf"
# )
#
# file <-
# read_results(system.file("extdata", "2018_jimi_flowers_PARA.pdf", package = "SwimmeR"))
# avoid = avoid_default
# typo = typo_default
# replacement = replacement_default
# format_results = TRUE
# splits = FALSE
# split_length = 100
# relay_swimmers = TRUE
#### default typo and replacement strings ####
typo_default <- c("typo")
replacement_default <- c("typo")
if(length(typo) != length(replacement)) {
stop("typo and replacement must have the same number of elements (be the same length)")
}
if(any(!is.logical(format_results), is.na(format_results)) == TRUE) {
stop("format_results must be logical, either TRUE or FALSE")
}
if(any(!is.logical(splits), is.na(splits)) == TRUE) {
stop("splits must be logical, either TRUE or FALSE")
}
if(is.numeric(split_length) == FALSE) {
stop("split_length must be numeric, usually 50 or 25")
}
if(any(!is.logical(relay_swimmers), is.na(relay_swimmers)) == TRUE) {
stop("relay_swimmers must be logical, either TRUE or FALSE")
}
#### strings that if a line begins with one of them the line is ignored ####
avoid_default <-
c(
# "[:upper:]\\:",
"[A-S]\\:", # to allow EVENT:
"[U-Z]\\:", # to allow EVENT:
"[A-MO-Z]T\\:", # to allow EVENT:
"[a-q]\\:", # want to make sure to include r: for reaction times in splits lines
"[s-z]\\:", # want to make sure to include r: for reaction times in splits lines
"[:alpha:]r\\:",
"\\.\\:",
"\\d\\:\\s",
"\\'\\:",
"QUALIFYING "
# "Record",
# "RECORD",
# "^\\s*NYSPHSAA",
# "^\\s*NYSPHAA",
# "^\\s*Finals",
# "^\\s*Prelims",
# "^\\s*Hosted",
# "^\\s*Meet",
# "^\\s*MEET",
# "^\\s*Points",
# "^\\s*League",
# "^\\s*LEAGUE",
# "^\\s*School\\s*Prelims\\s*Finals",
# "^\\s*r\\:",
# "NCAA",
)
#### define avoid_minimal ####
avoid_minimal <- c("^\\s{1,}r\\:")
#### combine avoid and avoid_default
avoid_non_splash <- unique(c(avoid, avoid_default))
#### message only posts once per session ####
## removed in v0.11.0 7/14/21 ##
# if(getOption("age_team_warning_0.6.0", TRUE)) {
# message("Beginning with SwimmeR v0.6.0 the Grade and School output columns are renamed Age and Team respectively. Please adjust your work flows as needed.")
#
# options("age_team_warning_0.6.0" = FALSE)
# }
if(stringr::str_detect(file[1], "^read_results_flag$") == TRUE){
# remove read_results flag
file <- file[-1]
} else {
# if read_results flag isn't present post an error
stop("Please run read_results on file prior to running swim_parse.")
}
if(length(file) <= 1){
stop("No results found. Please check source.")
}
if (stringr::str_detect(file[1], "^A107") == TRUE) { # for .hy3 files
# file <- add_row_numbers(text = file)
data <- hy3_parse(file = file)
return(data)
} else if (any(stringr::str_detect(file[1:6], "S\\.A\\.M\\.M\\.S\\.|MEET SANCTION NUMBER")) == TRUE) {
#### S.A.M.M.S. ####
data <- swim_parse_samms(file_samms = file,
avoid_samms = avoid_non_splash,
typo_samms = typo,
replacement_samms = replacement,
format_samms = format_results)
return(data)
} else if (any(stringr::str_detect(file, "Splash Meet Manager"))) {
#### Splash Meet Results ####
avoid_default_splash <-
c(
"abcxyz"
)
avoid_splash <- unique(c(avoid, avoid_default_splash))
data <- swim_parse_splash(
file_splash = file,
avoid_splash = avoid_splash,
typo_splash = typo,
replacement_splash = replacement,
splits = splits,
split_length_splash = split_length,
relay_swimmers_splash = relay_swimmers
)
return(data)
} else if (any(
stringr::str_detect(file[1:6], "( ISL )|(^ISL )"),
stringr::str_detect(
file,
"Cali Condors|LA Current|Energy Standard|DC Trident|Aqua Centurions|London Roar"
)
) == TRUE) {
#### ISL ####
data <- swim_parse_ISL(
file = file,
splits = splits,
relay_swimmers = relay_swimmers
)
return(data)
} else if (any(stringr::str_detect(file, "(Official Timekeeping by Omega)|(Report created )")) == TRUE) {
#### Omega Files ####
data <- swim_parse_omega(
file_omega = file,
avoid_omega = avoid_non_splash,
typo_omega = typo,
replacement_omega = replacement,
splits = splits,
split_length_omega = split_length,
relay_swimmers_omega = relay_swimmers
)
return(data)
} else if(any(stringr::str_detect(file[1:10], "Top Times")) == TRUE) {
#### Hytek Top Times ####
data <- toptimes_parse_hytek(
file_hytek_toptimes = file,
avoid_hytek_toptimes = avoid_non_splash,
typo_hytek_toptimes = typo,
replacement_hytek_toptimes = replacement
)
return(data)
} else {
#### Hytek Meet Results ####
data <- swim_parse_hytek(
file_hytek = file,
avoid_hytek = avoid_non_splash,
typo_hytek = typo,
replacement_hytek = replacement,
splits = splits,
split_length_hytek = split_length,
relay_swimmers_hytek = relay_swimmers
)
return(data)
}
}
#' @rdname Swim_Parse
#' @export
swim_parse <- Swim_Parse
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.