Nothing
#' Cleans input strings
#'
#' Cleans input from \code{read_results} is passed to \code{hytek_swim_parse}
#' to remove unnneded characters and otherwise set it up for sorting. Input is
#' in the form of character strings
#'
#' @importFrom stringr str_remove
#' @importFrom stringr str_length
#' @importFrom stringr str_count
#' @importFrom stringr str_remove_all
#' @importFrom stringr str_detect
#' @importFrom stringr str_replace
#' @importFrom stringr str_replace_all
#'
#' @param x a list of character strings
#' @param time_score_string a regex string for matching results (times and
#' scores) but not special strings like DQ
#' @return returns a list of character strings that have been cleaned in
#' preparation for parsing/sorting
#'
#' #' @seealso \code{hytek_clean_strings} runs inside of
#' \code{hytek_parse_splash}
hytek_clean_strings <- function(x,
time_score_string = Time_Score_String){
#### testing ####
# x <- as_lines_list_2
#### check input types ####
if(is.character(time_score_string) == FALSE){
stop("time_score_string should be regex, as a character string")
}
#### actual function ####
data_cleaned <- x %>%
stringr::str_remove("^\n\\s{0,}") %>%
.[stringr::str_length(.) > 50] %>% # slight speed boost from cutting down length of file
.[stringr::str_detect(., paste0(time_score_string, "|DQ|SCR|DFS"))] %>% # must have \\.\\d\\d because all swimming and diving times do
.[stringr::str_detect(., "[:alpha:]{2,}.*[:alpha:]")] %>% # need some letters, need them to not just be a single instance of DQ etc.
.[stringr::str_detect(., "r\\:\\+?\\-?\\s?\\d", negate = TRUE)] %>% # remove reaction times
.[stringr::str_detect(., "^50m", negate = TRUE)] %>% # remove British results that start with 50m for splits lines
.[stringr::str_count(., "\\d\\)") < 2] %>% # remove inline splits from older style hy-tek results circa 2005
stringr::str_replace_all("\\s?[&%]\\s?", " ") %>% # added 8/21 for removing "&" and "%" as record designator
stringr::str_remove_all("(?<=\\d\\.\\d{2}\\s?)[:punct:]") %>% # remove symbols attached to times as record designator
stringr::str_remove_all("(?<=\\.\\d{2}\\s?)[A-WYZ|\\$|q](?=\\s)") %>% # remove letters attached to times as record designator
stringr::str_replace_all(" [qQ](?=\\d{1,5} )", " ") %>% # remove q|Q attached to points
# removed J etc. from next to swim, but does not remove X or x (for exhibition tracking)
stringr::str_replace_all("[A-WYZa-wyz]+(\\d{1,2}\\:\\d{2}\\.\\d{2})", "\\1") %>%
stringr::str_replace_all("(\\d{1,2}\\:\\d{2}\\.\\d{2})[A-WYZa-wyz]+", "\\1") %>%
stringr::str_replace_all("[A-WYZa-wyz]+(\\d{2,3}\\.\\d{2})", "\\1") %>%
stringr::str_replace_all("(\\d{2,3}\\.\\d{2})[A-WYZa-wyz]+", "\\1") %>%
stringr::str_replace_all(" [:punct:]+(\\d{1,2}\\:\\d{2}\\.\\d{2})", " \\1") %>%
stringr::str_replace_all("(\\d{1,2}\\:\\d{2}\\.\\d{2})[:punct:]+", " \\1") %>%
stringr::str_replace_all(" [:punct:]+(\\d{2,3}\\.\\d{2})", " \\1") %>%
stringr::str_replace_all("(\\d{2,3}\\.\\d{2})[:punct:]+", "\\1 ") %>%
stringr::str_remove_all("\\s{2}J\\s{2}") %>%
# remove 'A', 'B' etc. relay designators - should this go in typo instead?
stringr::str_replace_all(" \\'[A-Z]\\' ", " ") %>%
stringr::str_replace_all(" [A-WYZ] ", " ") %>%
stringr::str_replace_all("\\'\\'", " ") %>%
# remove meet ID from old result
stringr::str_replace_all(" \\d{4} ", " ") %>%
stringr::str_replace_all(" \\d{1,3} (?=\\s*\\d{1,2}\\.?\\d?\\d?\\s+\\d{1,5}$)", " ") %>% # remove column of powerpoint values in NYS Boys 2007. It goes time, powerpoint, actual points, row numb
stringr::str_replace_all("(?<=\\.\\d)(\\d)\\s+X(?=\\s+\\d{1,5}$)", "\\1X") %>% # brings Xs for exhibition that are spaced out in closer
# remove q from next to time 10/21/2020
stringr::str_remove_all(" q ") %>% # removes " q " sometimes used to designate a qualifying time
stringr::str_replace_all("-{2,5}", "10000") %>% #8/26
stringr::str_replace_all("(\\.\\d{2})\\d+", "\\1 ") %>% # added 8/21 for illinois to deal with points column merging with final times column
stringr::str_replace_all("\\d{1,2} (\\d{1,})$", " \\1 ") %>% # added 8/21 for illinois to deal with points column merging with final times column
stringr::str_replace_all("\\*(?=[:alpha:])", "_") %>% # for * prior to name for foreign athletes
stringr::str_replace_all("\\*", " ") %>%
stringr::str_replace_all("(?<=\\d)\\.\\s{1,}(?=[:alpha:])", " ") %>% # for British results where places are 1. Name
stringr::str_replace_all("(?<=\\s)\\+\\s(?=[:digit:])", " +") %>% # for reaction times
stringr::str_replace_all("(?<=\\s)\\-\\s(?=[:digit:])", " -") %>% # for reaction times
trimws()
#### insert double spaces where needed ####
data_cleaned <- data_cleaned %>%
stringr::str_replace_all("(?<=\\d)\\s+[:upper:]?\\s+(?=\\d)", " ") %>% # letters like P or M to denote pool or meet record
stringr::str_replace_all("(?<=^\\s?\\d{1,5}) (?=[:alpha:])", " ") %>% # mostly to split place and name
stringr::str_replace_all("(?<=SB?M?\\d{1,2}) (?=[:alpha:])", " ") %>% # split para classifications 1
stringr::str_replace_all("(?<=[:alpha:]) (?=SB?M?\\d{1,2})", " ") %>% # split para classifications 2
stringr::str_replace_all("(?<=\\d) (?=_)", " ") %>% # mostly to split place and name, if name is preceded by "_" as a stand-in for "*"
stringr::str_replace_all("(?<=\\d) (?=\\d)", " ") %>% # mostly to split place team names that start with a number, like in NYS results (5-Fairport etc.)
stringr::str_replace_all("(?<=[:alpha:]),(?=[:alpha:])", ", ") %>% # split names that don't have a space between last,first
stringr::str_replace_all("(?<=[:alpha:])\\. (?=[:digit:])", "\\. ") %>% # split abbreviated team names like Southern Cal. and times
stringr::str_replace_all("(?<=\\d) (?=_)", " ") %>% # spacing between place and athletes with */_ leading name
stringr::str_replace_all("(?<=\\)) (?=[:alpha:])", " ") %>% # spacing between place) and names
stringr::str_replace_all("\\((?=[:digit:])", " \\(") %>% # spacing between (YoB) and name, for British results
stringr::str_replace_all(" ((M|F|W|B|G)?\\d{1,3}) ", " \\1 ") %>% # split age and team
stringr::str_replace_all(" ((M|F|W|B|G)?FR) ", " \\1 ") %>% # split age and team
stringr::str_replace_all(" ((M|F|W|B|G)?SO) ", " \\1 ") %>% # split age and team
stringr::str_replace_all(" ((M|F|W|B|G)?JR) ", " \\1 ") %>% # split age and team
stringr::str_replace_all(" ((M|F|W|B|G)?SR) ", " \\1 ") %>% # split age and team
stringr::str_replace_all("(SM?B?1\\d{1})(\\d{1,2})", "\\1 \\2") %>% # split para classification and age
stringr::str_replace_all("(\\d{6,7})([:alpha:])", "\\1 \\2") %>% # split Brit ID and Name
stringr::str_replace_all(" NT ", " NT ") %>% # split prelim and final
stringr::str_replace_all(" DQ ", " DQ ") %>% # split prelim and final
stringr::str_replace_all(" DFS ", " DFS ") %>% # split prelim and final
stringr::str_replace_all(" SCR ", " SCR ") %>% # split prelim and final
stringr::str_replace_all("(?<=[:alpha:])\\s{2,3}(?=[:alpha:])", " ") %>% # testing 12/21/2020 would help with typos
stringr::str_replace_all("(?<=[:alpha:]) (?=\\d)", " ") %>% # split name and age
stringr::str_replace_all("(?<=[:alpha:])(\\d{1,3}\\-\\d{2})", " \\1 ") %>% # split name and yyy-mm age
stringr::str_replace_all("(?<=\\,) (?=\\d)", " ") %>% # split name and age if name is so long that it ends with a ","
stringr::str_replace_all("(?<=\\d) (?=\\d{1,}$)", " ") %>% # split off row_numb
stringr::str_replace_all(
"(?<=\\.\\d\\d\\s{1,10}\\d?\\d?\\:?\\d?\\d\\.\\d\\d)\\s{1,10}[:alpha:]{1,5}\\d?\\s{1,10}(?=\\d{1,})",
" "
) %>% # removes "AAC" or "AAA" or "NYS" or "SEC1" etc. from after finals time
# stringr::str_replace_all("NYS|AAA|AAC|SEC\\d{1,2}", " ") %>%
# stringr::str_replace_all("SEC\\d{1,2}", " ") %>% # for old NYS results
stringr::str_replace_all(
"\\d{3}\\.\\d{2}\\s+(\\d{3}\\.\\d{2})\\s+\\d{3}\\.\\d{2}\\s+(\\d{3}\\.\\d{2})",
"\\1 \\2"
) %>% # for old NCAA results with diving scores and DDs
stringr::str_replace_all("(?<=\\s)S(?=B?M?\\d{1,2})", " S") %>% # for para classifications
stringr::str_remove_all("(?<=\\d\\d\\.\\d\\d )[:upper:]{1,}") # removes AAA, AAC from after time
return(data_cleaned)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.