Nothing
#' Collects splits within \code{swim_parse_ISL}
#'
#' Takes the output of \code{read_results} and, inside of \code{swim_parse_ISL},
#' extracts split times and associated row numbers
#'
#' @importFrom dplyr bind_rows
#' @importFrom dplyr rename_with
#' @importFrom dplyr vars
#' @importFrom stringr str_replace_all
#' @importFrom stringr str_remove_all
#' @importFrom stringr str_extract_all
#' @importFrom stringr str_extract
#' @importFrom stringr str_split
#' @importFrom stringr str_detect
#' @importFrom purrr map
#'
#' @param text output of \code{read_results} with tow numbers appended by
#' \code{add_row_numbers}
#' @return returns a data frame with split times and row numbers
#'
#' @seealso \code{splits_parse_ISL} runs inside \code{\link{swim_parse_ISL}} on
#' the output of \code{\link{read_results}} with row numbers from
#' \code{\link{add_row_numbers}}
splits_parse_ISL <- function(text) {
### testing ###
# file <- "https://github.com/gpilgrim2670/Pilgrim_Data/raw/master/ISL/Season_1_2019/ISL_16112019_CollegePark_Day_1.pdf"
# file <- read_results(file)
# text <- add_row_numbers(text = file)
### define strings ###
split_string <- "\\s\\d\\d\\.\\d\\d\\s"
### collect row numbers from rows containing splits ###
row_numbs_ind <- text %>%
.[stringr::str_detect(.,
split_string)] %>%
.[stringr::str_detect(.,
"[:alpha:]", negate = TRUE)] %>%
stringr::str_extract_all("\\d{1,}$")
#### if there are still no valid splits return blank dataframe ####
if (length(row_numbs_ind) > 0) {
minimum_row <- min(as.numeric(row_numbs_ind))
#### pull out rows containing splits, which will also remove row numbers ####
split_rows_ind <- unlist(row_numbs_ind) %>%
paste0(" ", ., "$") %>%
paste(collapse = "|")
suppressWarnings(
data_1_ind <- text %>%
.[stringr::str_detect(., split_rows_ind)] %>%
stringr::str_remove_all("\n") %>%
stringr::str_remove_all("r\\:\\+\\s?\\d\\.\\d\\d") %>%
stringr::str_extract_all(paste0(
"^\\s+\\d\\d\\.\\d\\d|", split_string
)) %>%
stringr::str_remove_all('\\"') %>%
stringr::str_replace_all("\\(", " ") %>%
stringr::str_replace_all("\\)", " ") %>%
stringr::str_remove_all("c") %>%
stringr::str_remove_all(',') %>%
trimws()
)
#### add in blank spot for missing 50 split ####
data_1_ind <- paste("00.00", data_1_ind, sep = " ") # add in blank spot for missing 50 split
#### add row numbers back in since they were removed ####
data_1_ind <- paste(row_numbs_ind, data_1_ind, sep = " ")
#### break out by length ####
data_1_ind <-
unlist(purrr::map(data_1_ind, stringr::str_split, "\\s{2,}"),
recursive = FALSE)
data_length_3 <- data_1_ind[purrr::map(data_1_ind, length) == 3] # 100s
data_length_5 <- data_1_ind[purrr::map(data_1_ind, length) == 5] # 200s
data_length_9 <- data_1_ind[purrr::map(data_1_ind, length) == 9] # 400s
#### transform all lists to dataframes ####
if (length(data_length_9) > 0) { # splits from 400M races
df_9 <- data_length_9 %>%
list_transform() %>%
mutate(V1 = as.numeric(V1) -1) %>%
mutate(V1 = as.character(V1))
} else {
df_9 <- data.frame(Row_Numb = character(),
stringsAsFactors = FALSE)
}
if (length(data_length_5) > 0) { # splits from 200M races
df_5 <- data_length_5 %>%
list_transform()
} else {
df_5 <- data.frame(Row_Numb = character(),
stringsAsFactors = FALSE)
}
if (length(data_length_3) > 0) { # splits from 100M races
df_3 <- data_length_3 %>%
list_transform()
} else {
df_3 <- data.frame(Row_Numb = character(),
stringsAsFactors = FALSE)
}
#### relays ####
row_numbs_relay <- text %>%
.[stringr::str_detect(.,
split_string)] %>%
.[stringr::str_detect(.,
"[:alpha:]")] %>%
.[stringr::str_detect(.,
"^\n\\s*\\=?\\d", negate = TRUE)] %>%
stringr::str_extract_all("\\d{1,}$")
#### pull out rows containing splits, which will also remove row numbers ####
split_rows_relay <- unlist(row_numbs_relay) %>%
paste0(" ", ., "$") %>%
paste(collapse = "|")
suppressWarnings(
data_1_relay <- text %>%
.[stringr::str_detect(., split_rows_relay)] %>%
stringr::str_remove_all("\n") %>%
stringr::str_remove_all("(?<=\\)) [\\d|\\.\\:]+") %>%
stringr::str_extract_all("\\d?\\:?\\d\\d\\.\\d\\d") %>%
stringr::str_remove_all('\\"') %>%
stringr::str_replace_all("\\(", " ") %>%
stringr::str_replace_all("\\)", " ") %>%
stringr::str_remove_all("c") %>%
stringr::str_replace_all("harater 0 ", "00.00, 00.00") %>%
stringr::str_remove_all(',') %>%
stringr::str_replace_all(" ", " ") %>%
.[stringr::str_detect(.,
"[:alpha:]", negate = TRUE)] %>%
trimws()
)
data_1_relay <- paste(row_numbs_relay, data_1_relay, sep = " ")
data_1_relay <-
unlist(purrr::map(data_1_relay, stringr::str_split, "\\s{2,}"),
recursive = FALSE)
data_length_3_relay <- data_1_relay[purrr::map(data_1_relay, length) == 3] # 100 splits for relays
if (length(data_length_3_relay) > 0) {
# splits from 100M relay legs
df_3_relay <- data_length_3_relay %>%
list_transform()
} else {
df_3_relay <- data.frame(Row_Numb = character(),
stringsAsFactors = FALSE)
}
#### bind up results ####
# results are bound before going to lines_sort so that in cases where there are multiple rows with splits for the same race,
# like in longer events with many splits, those splits can be collected and treated together
data_splits <-
dplyr::bind_rows(df_9, df_5, df_3, df_3_relay) %>%
lines_sort(min_row = minimum_row) %>%
dplyr::mutate(Row_Numb = as.numeric(Row_Numb) - 1) # make row number of split match row number of performance
# dplyr::filter(Row_Numb < maximum_row)
#### rename columns V1, V2 etc. by 50 ####
old_names <- names(data_splits)[grep("^V", names(data_splits))]
new_names <-
paste("Split", seq(1, length(names(data_splits)) - 1) * 50, sep = "_")
data_splits <- data_splits %>%
dplyr::rename_at(dplyr::vars(old_names), ~ new_names)
# data_splits <- data_splits %>%
# dplyr::rename_with(~ new_names[which(old_names == .x)], cols = old_names)
} else { # if there are no rows with valid splits return blank dataframe
data_splits <- data.frame(Row_Numb = as.numeric())
}
data_splits <- data_splits %>%
na_if_character("00.00")
return(data_splits)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.