Nothing
#' Formats swimming and diving data read with \code{read_results} into a data
#' frame
#'
#' Takes the output of \code{read_results} and cleans it, yielding a data frame
#' of swimming (and diving) results
#'
#' @importFrom dplyr mutate
#' @importFrom dplyr lag
#' @importFrom dplyr case_when
#' @importFrom dplyr select
#' @importFrom dplyr arrange
#' @importFrom dplyr filter
#' @importFrom dplyr bind_rows
#' @importFrom dplyr everything
#' @importFrom dplyr pull
#' @importFrom dplyr select
#' @importFrom dplyr starts_with
#' @importFrom stringr str_replace_all
#' @importFrom stringr str_extract
#' @importFrom stringr str_split
#' @importFrom stringr str_detect
#' @importFrom stringr str_length
#' @importFrom stringr str_sort
#' @importFrom purrr map_lgl
#' @importFrom purrr map
#' @importFrom stats setNames
#'
#' @param file output from \code{read_results}
#' @param avoid a list of strings. Rows in \code{file} containing these strings
#' will not be included. For example "Pool:", often used to label pool
#' records, could be passed to \code{avoid}. The default is
#' \code{avoid_default}, which contains many strings similar to "Pool:", such
#' as "STATE:" and "Qual:". Users can supply their own lists to \code{avoid}.
#' \code{avoid} is handled before \code{typo} and \code{replacement}.
#' @param typo a list of strings that are typos in the original results.
#' \code{swim_parse} is particularly sensitive to accidental double spaces, so
#' "Central High School", with two spaces between "Central" and "High" is a
#' problem, which can be fixed. Pass "Central High School" to \code{typo}.
#' Unexpected commas as also an issue, for example "Texas, University of"
#' should be fixed using \code{typo} and \code{replacement}
#' @param replacement a list of fixes for the strings in \code{typo}. Here one
#' could pass "Central High School" (one space between "Central" and "High")
#' and "Texas" to \code{replacement} fix the issues described in \code{typo}
#' @param format_results should the results be formatted for analysis (special
#' strings like \code{"DQ"} replaced with \code{NA}, \code{Finals} as
#' definitive column)? Default is \code{TRUE}
#' @param splits either \code{TRUE} or the default, \code{FALSE} - should
#' \code{swim_parse} attempt to include splits.
#' @param split_length either \code{25} or the default, \code{50}, the length of
#' pool at which splits are recorded. Not all results are internally
#' consistent on this issue - some have races with splits by 50 and other
#' races with splits by 25.
#' @param relay_swimmers either \code{TRUE} or the default, \code{FALSE} -
#' should relay swimmers be reported. Relay swimmers are reported in separate
#' columns named \code{Relay_Swimmer_1} etc.
#' @return returns a data frame with columns \code{Name}, \code{Place},
#' \code{Age}, \code{Team}, \code{Prelims}, \code{Finals},
#' \code{Points}, \code{Event} & \code{DQ}. Note all swims will have a
#' \code{Finals}, even if that time was actually swam in the prelims
#' (i.e. a swimmer did not qualify for finals). This is so that final results
#' for an event can be generated from just one column.
#'
#' @examples \dontrun{
#' swim_parse(read_results("http://www.nyhsswim.com/Results/Boys/2008/NYS/Single.htm", node = "pre"),
#' typo = c("-1NORTH ROCKL"), replacement = c("1-NORTH ROCKL"),
#' splits = TRUE,
#' relay_swimmers = TRUE)
#' }
#' \dontrun{
#' swim_parse(read_results("inst/extdata/Texas-Florida-Indiana.pdf"),
#' typo = c("Indiana University", ", University of"), replacement = c("Indiana University", ""),
#' splits = TRUE,
#' relay_swimmers = TRUE)
#' }
#' @seealso \code{swim_parse} must be run on the output of
#' \code{\link{read_results}}
Swim_Parse <-
function(file,
avoid = NULL,
typo = typo_default,
replacement = replacement_default,
format_results = TRUE,
splits = FALSE,
split_length = 50,
relay_swimmers = FALSE) {
# file <-
# read_results(
# "https://raw.githubusercontent.com/gpilgrim2670/Pilgrim_Data/master/Splash/Glenmark_Senior_Nationals_2019.pdf"
# )
#
# file <-
# read_results(system.file("extdata", "2018_jimi_flowers_PARA.pdf", package = "SwimmeR"))
# avoid = avoid_default
# typo = typo_default
# replacement = replacement_default
# format_results = TRUE
# splits = FALSE
# split_length = 100
# relay_swimmers = TRUE
#### default typo and replacement strings ####
typo_default <- c("typo")
replacement_default <- c("typo")
if(length(typo) != length(replacement)) {
stop("typo and replacement must have the same number of elements (be the same length)")
}
if(any(!is.logical(format_results), is.na(format_results)) == TRUE) {
stop("format_results must be logical, either TRUE or FALSE")
}
if(any(!is.logical(splits), is.na(splits)) == TRUE) {
stop("splits must be logical, either TRUE or FALSE")
}
if(is.numeric(split_length) == FALSE) {
stop("split_length must be numeric, usually 50 or 25")
}
if(any(!is.logical(relay_swimmers), is.na(relay_swimmers)) == TRUE) {
stop("relay_swimmers must be logical, either TRUE or FALSE")
}
#### strings that if a line begins with one of them the line is ignored ####
avoid_default <-
c(
# "[:upper:]\\:",
"[A-S]\\:", # to allow EVENT:
"[U-Z]\\:", # to allow EVENT:
"[A-MO-Z]T\\:", # to allow EVENT:
"[a-q]\\:", # want to make sure to include r: for reaction times in splits lines
"[s-z]\\:", # want to make sure to include r: for reaction times in splits lines
"[:alpha:]r\\:",
"\\.\\:",
"\\d\\:\\s",
"\\'\\:",
"QUALIFYING "
# "Record",
# "RECORD",
# "^\\s*NYSPHSAA",
# "^\\s*NYSPHAA",
# "^\\s*Finals",
# "^\\s*Prelims",
# "^\\s*Hosted",
# "^\\s*Meet",
# "^\\s*MEET",
# "^\\s*Points",
# "^\\s*League",
# "^\\s*LEAGUE",
# "^\\s*School\\s*Prelims\\s*Finals",
# "^\\s*r\\:",
# "NCAA",
)
#### define avoid_minimal ####
avoid_minimal <- c("^\\s{1,}r\\:")
#### combine avoid and avoid_default
avoid_non_splash <- unique(c(avoid, avoid_default))
#### message only posts once per session ####
## removed in v0.11.0 7/14/21 ##
# if(getOption("age_team_warning_0.6.0", TRUE)) {
# message("Beginning with SwimmeR v0.6.0 the Grade and School output columns are renamed Age and Team respectively. Please adjust your work flows as needed.")
#
# options("age_team_warning_0.6.0" = FALSE)
# }
if(stringr::str_detect(file[1], "^read_results_flag$") == TRUE){
# remove read_results flag
file <- file[-1]
} else {
# if read_results flag isn't present post an error
stop("Please run read_results on file prior to running swim_parse.")
}
if(length(file) <= 1){
stop("No results found. Please check source.")
}
if (stringr::str_detect(file[1], "^A107") == TRUE) { # for .hy3 files
# file <- add_row_numbers(text = file)
data <- hy3_parse(file = file)
return(data)
} else if (any(stringr::str_detect(file[1:6], "S\\.A\\.M\\.M\\.S\\.|MEET SANCTION NUMBER")) == TRUE) {
#### S.A.M.M.S. ####
data <- swim_parse_samms(file_samms = file,
avoid_samms = avoid_non_splash,
typo_samms = typo,
replacement_samms = replacement,
format_samms = format_results)
return(data)
} else if (any(stringr::str_detect(file, "Splash Meet Manager"))) {
#### Splash Meet Results ####
avoid_default_splash <-
c(
"abcxyz"
)
avoid_splash <- unique(c(avoid, avoid_default_splash))
data <- swim_parse_splash(
file_splash = file,
avoid_splash = avoid_splash,
typo_splash = typo,
replacement_splash = replacement,
splits = splits,
split_length_splash = split_length,
relay_swimmers_splash = relay_swimmers
)
return(data)
} else if (any(
stringr::str_detect(file[1:6], "( ISL )|(^ISL )"),
stringr::str_detect(
file,
"Cali Condors|LA Current|Energy Standard|DC Trident|Aqua Centurions|London Roar"
)
) == TRUE) {
#### ISL ####
data <- swim_parse_ISL(
file = file,
splits = splits,
relay_swimmers = relay_swimmers
)
return(data)
} else if (any(stringr::str_detect(file, "(Official Timekeeping by Omega)|(Report created )")) == TRUE) {
#### Omega Files ####
data <- swim_parse_omega(
file_omega = file,
avoid_omega = avoid_non_splash,
typo_omega = typo,
replacement_omega = replacement,
splits = splits,
split_length_omega = split_length,
relay_swimmers_omega = relay_swimmers
)
return(data)
} else if(any(stringr::str_detect(file[1:10], "Top Times")) == TRUE) {
#### Hytek Top Times ####
data <- toptimes_parse_hytek(
file_hytek_toptimes = file,
avoid_hytek_toptimes = avoid_non_splash,
typo_hytek_toptimes = typo,
replacement_hytek_toptimes = replacement
)
return(data)
} else {
#### Hytek Meet Results ####
data <- swim_parse_hytek(
file_hytek = file,
avoid_hytek = avoid_non_splash,
typo_hytek = typo,
replacement_hytek = replacement,
splits = splits,
split_length_hytek = split_length,
relay_swimmers_hytek = relay_swimmers
)
return(data)
}
}
#' @rdname Swim_Parse
#' @export
swim_parse <- Swim_Parse
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.