Nothing
#' Cleans input strings
#'
#' Cleans input from \code{read_results} is passed to \code{splash_swim_parse}
#' to remove unnneded characters and otherwise set it up for sorting. Input is
#' in the form of character strings
#'
#' @importFrom stringr str_remove
#' @importFrom stringr str_length
#' @importFrom stringr str_detect
#' @importFrom stringr str_replace
#' @importFrom stringr str_replace_all
#'
#' @param x a list of character strings
#' @param indent_length a numeric value denoting the number of spaces some
#' results are indented by. \code{indent_length} is determined by
#' \code{splash_determine_indent_length}. Must be a whole number.
#' @param time_score_string a regex string for matching results (times and
#' scores) but not special strings like DQ
#' @param record_string a regex string for matching denoted records, rather than
#' results
#' @param header_string a regex string from matching splash headers/footers
#' included in result documents
#' @param sponsorship_string a regex string for matching sponsorship text within
#' result documents
#' @param reaction_string a regex string for matching reaction times
#' @param rule_string a regex string for matching rule text e.g. 'Rule 4.24'
#' that sometimes accompanies DQs
#' @return returns a list of character strings that have been cleaned in
#' preparation for parsing/sorting
#'
#' #' @seealso \code{splash_clean_strings} runs inside of
#' \code{swim_parse_splash}
splash_clean_strings <-
function(x,
indent_length = Indent_Length,
time_score_string = Time_Score_String,
record_string = Record_String,
header_string = Header_String,
sponsorship_string = Sponsorship_String,
reaction_string = Reaction_String,
rule_string = Rule_String) {
# testing
# x <- as_lines_list_2
# indent_length <- 12
# time_score_string <- "1?\\:?\\d{0,2}\\:?\\d{1,3}\\.\\d{2}"
# record_string <- "^NMR|^E J C|^W J C|^Open [W|M] |[:alpha:] Record "
# header_string <- "\\sDisqualified\\s|\\sReaction\\sTime\\s|Prelims|Finals|Semifinal"
# sponsorship_string <- "sponsored by"
# reaction_string <-
# "^\\+\\s?\\d\\.\\d{3}$|^\\-\\s?\\d\\.\\d{3}$|^0\\.00$|^0\\.\\d\\d$"
# rule_string <- "SW \\d\\d?\\.\\d\\d?"
#### check input types ####
if (all.equal(indent_length, as.integer(indent_length)) == FALSE) {
stop("indent_length must be a whole number")
}
if(is.character(time_score_string) == FALSE){
stop("time_score_string should be regex, as a character string")
}
if(is.character(record_string) == FALSE){
stop("record_string should be regex, as a character string")
}
if(is.character(header_string) == FALSE){
stop("header_string should be regex, as a character string")
}
if(is.character(sponsorship_string) == FALSE){
stop("sponsorship_string should be regex, as a character string")
}
if(is.character(reaction_string) == FALSE){
stop("reaction_string should be regex, as a character string")
}
if(is.character(record_string) == FALSE){
stop("record_string should be regex, as a character string")
}
#### actual function ####
data_cleaned <- x %>%
stringr::str_remove("^\n") %>%
.[stringr::str_detect(.,
paste0("^\\s{", indent_length, ",}"),
negate = TRUE)] %>% # removes relay swimmer rows
stringr::str_remove("^\\s{0,}") %>%
.[stringr::str_length(.) > 50] %>% # slight speed boost from cutting down length of file
.[stringr::str_detect(.,
paste0(time_score_string, "|DSQ|SCR|DNS"))] %>% # must have \\.\\d\\d because all swimming and diving times do
.[stringr::str_detect(.,
paste0(record_string, "|Splash Meet Manager"),
negate = TRUE)] %>%
.[stringr::str_detect(.,
header_string,
negate = TRUE)] %>%
.[stringr::str_detect(.,
sponsorship_string,
negate = TRUE)] %>%
.[stringr::str_detect(., "\\dm\\:", negate = TRUE)] %>% # removes split lines
.[stringr::str_detect(., "\\d\\.\\d{2}\\s+[[:alpha:]\\'\\.]{2,}", negate = TRUE)] %>% # removes relay swimmer rows
.[stringr::str_detect(., reaction_string, negate = TRUE)] %>% # also removes relay swimmer rows
.[stringr::str_detect(., rule_string, negate = TRUE)] %>% # also removes rows with rule numbers for DQ reasons
stringr::str_replace_all("(?<=\\d\\.) (?=[:alpha:])", " ") %>% # split places (1.) and names
stringr::str_replace_all("(?<=^DNS)(?=[:alpha:])", " ") %>% # split DNS and names
stringr::str_replace_all("(?<=^DSQ)(?=[:alpha:])", " ") %>% # split DNS and names
stringr::str_replace_all("(?<=\\d) (?=\\d)", " ") %>% # split times and scores
stringr::str_replace_all("(?<=[:alpha:]\\.) (?=\\d\\d)", " ") %>% # split names ending in "." and ages
stringr::str_replace_all("(?<=[:alpha:]) (?=\\d)", " ") %>% # split names and ages
stringr::str_replace_all(" \\? ", " ") %>% # remove ? as label
stringr::str_replace_all(" \\* ", " ") %>%
stringr::str_replace_all("(?<=\\d)\\s+[:upper:]R?\\*?\\s", " ") %>% # remove Q, R etc. as label
stringr::str_replace_all("(?<=\\d)[:upper:]R?\\*?\\s", " ") %>% # remove Q, R etc. as label
stringr::str_replace_all("(?<=\\d)[:upper:]{1,2}[:lower:]{0,2}\\.?\\*?\\s", " ") %>% # remove Q, R etc. as label
stringr::str_replace_all("(?<=\\d)\\*[:alpha:]{0,4}\\.?\\s", " ") %>% # remove * as label
stringr::str_replace_all(" q ", " ") %>% # remove Q, R etc. as label
stringr::str_replace_all("(?<=\\d)\\.(?=[:alpha:])", "\\. ") %>%
stringr::str_replace_all(" ([:upper:]{2,3})\\s\\s+([:alpha:]{2,}\\s?[:alpha:]{0,})",
" \\1-\\2 ") %>% # merge team and country names
trimws()
data_cleaned <- data_cleaned %>%
stringr::str_replace("^DNS", "888\\. DNS") %>% # splash for dealing with ties, DQS etc.
stringr::str_replace("^DFS", "888\\. DFS") %>% # splash for dealing with ties
stringr::str_replace("^DSQ", "888\\. DSQ") %>% # splash for dealing with ties
stringr::str_replace("^([^[0-9]])", "999\\. \\1") %>% # splash for dealing with ties
stringr::str_replace(" \\/ ", "/") %>% # splash for dealing with Heat/Lane columns
stringr::str_replace_all("([:alpha:]\\.?\\:?\\s?)(\\d{1,2}[\\:|\\.])", "\\1 \\2") %>% # splits teams and times
stringr::str_remove("^888\\. ") %>% # want to keep DSQ, DNS in first column, but need to move ties over one column
stringr::str_replace("(?<=[:alpha:])\\s{1,}\\d{1,4}\\s{0,}(?=\\s{2}\\d{1,2}(\\:|\\.))",
" ") %>% # remove numbers floating off of team names
stringr::str_replace("(?<=[:alpha:]\\s{1,4})\\d{1,4}\\s{0,}(?=\\s{2}\\d{1,5}$)",
" ") %>% # remove numbers floating off of team names
stringr::str_replace("(?<=[:alpha:])\\s{1,2}\\d{4}\\s{1,2}(?=[:alpha:])", " ") %>%
stringr::str_replace_all("(\\s{2}\\d{2}\\s)(?=[:alpha:])", "\\1 ") %>% # splits ages and teams
stringr::str_replace_all("(?<=[:alpha:])(?=\\d{1,3})", " ") %>% # splits ages and teams
stringr::str_replace_all("(\\.\\d{2}\\s)\\s(?=\\d{3})", "\\1 ") %>% # splits times and scores
stringr::str_replace_all("(\\d{2,3})\\s(?=[\\+|\\-]\\d\\.\\d{2})", " ") %>% # splits reaction times and scores
stringr::str_replace("DNS ", "DNS ") %>%
stringr::str_replace("DFS ", "DFS ") %>%
stringr::str_replace("DSQ ", "DSQ ") %>%
stringr::str_replace_all("1950 e.V:", " ") # bug fix for 2018 Euros
return(data_cleaned)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.