#' @export
spr_qual_scrape <- function(url,event_info,event_type = "Sprint"){
#Same as distance case except rename rank to rankqual
spr_qual_out <- dst_scrape(url = url,event_info = event_info,event_type = event_type)
spr_qual_out[["result"]] <- spr_qual_out[["result"]] %>%
rename(rankqual = rank) %>%
mutate(eventid_sq = paste0("SQ",eventid)) %>%
select(eventid_sq,everything())
spr_qual_out
}
#' @importFrom purrr discard
spr_final_scrape <- function(event_info,idx,override_eventid = NULL){
message("Pulling spr final results...")
#Load html
page <- safe_retry_read_html(x = event_info$url$final[idx])
#Two attempts tp get competitor ids
compids <- page %>%
html_nodes(xpath = "//*[contains(@data-link,'athlete-biography')]") %>%
html_attrs() %>%
map(.x = .,.f = magrittr::extract2,"data-link") %>%
stringr::str_extract("competitorid=[0-9]+") %>%
stringr::str_replace("competitorid=","") %>%
trim_compids()
if (length(compids) == 0){
compids <- page %>%
html_nodes(xpath = "//*[contains(@href,'athlete-biography')]") %>%
html_attrs() %>%
map(.x = .,.f = magrittr::extract2,"href") %>%
stringr::str_extract("competitorid=[0-9]+") %>%
stringr::str_replace("competitorid=","") %>%
trim_compids()
}
#Site
site <- get_event_site(event_info$url$final[idx])
# All rows with sanctions
page_tbl <- page %>%
html_nodes(css = ".g-row.justify-sb,.g-xs-24.bold,.g-xs-24.container") %>%
map(.f = row_text_extractor)
#Remove Final, Semifinal, Quarterfinal & Qualification header rows
flag_headers <- function(x) length(x) == 1 && x %in% c("Final","Semifinal","Quarterfinal","Qualification")
page_tbl <- purrr::discard(.x = page_tbl,.p = flag_headers)
#Remove garbage leading rows, start with row beginning with 'Rank'
first_row <- min(which(sapply(page_tbl,function(x) x[1] == "Rank")))
page_tbl <- page_tbl[first_row:length(page_tbl)]
any_notes <- any(lengths(page_tbl) == 1)
# All rows without sanctions
race <- page %>%
html_nodes(css = ".g-row.justify-sb") %>%
map(.f = row_text_extractor)
cn <- keep(race,function(x) x[1] == "Rank")[[1]]
race <- race %>%
purrr::keep(~length(.) >= 5) %>%
purrr::discard(~grepl("Obstruction|Written|Verbal|Reprimand|Monetary Fine|Disqualification|False Start",paste(.,collapse = "")))
race <- race[-1]
row_lengths <- sapply(race,length)
unique_row_lengths <- unique(row_lengths)
if (length(unique_row_lengths) == 2 && diff(unique_row_lengths) == -1){
message("Removing 'Bib' column to align columns...",appendLF = TRUE)
race <- lapply(X = race,FUN = \(x) if (length(x) == unique_row_lengths[1]) x[-2] else x)
cn_short <- cn[-2]
race <- race %>%
purrr::map(.f = function(x) setNames(x,cn_short[1:length(x)]))
} else {
race <- race %>%
purrr::map(.f = function(x) setNames(x,cn[1:length(x)]))
}
new_row_lengths <- unique(sapply(race,length))
if (length(new_row_lengths) != 1) {
browser()
}
race <- race %>%
setNames(.,compids) %>%
bind_rows(.id = "compid") %>%
select(-matches("Bib")) %>%
janitor::clean_names(.,case = "snake") %>%
rename(fisid = fis_code,name = athlete,
yob = year) %>%
rename_at(.vars = vars(matches("fis_points")),.funs = function(x) "fispoints") %>%
mutate(rank = as.integer(stringr::str_trim(rank)),
notes = NA_character_)
if (any_notes){
# Add notes about DNS, DNF, DSQ, sanctions, etc.
first_note <- min(which(lengths(page_tbl) == 1))
notes <- page_tbl[first_note:length(page_tbl)]
note_compids <- compids[(first_note - 1):length(compids)]
#Split notes
notes_list <- split(x = notes,f = cumsum(sapply(notes,find_note_headers)))
notes_list <- setNames(lapply(notes_list,`[`,-1),lapply(notes_list,`[[`,1))
notes_fisids <- lapply(notes_list,find_fisid)
names(notes_list) <- stringr::str_replace(names(notes_list),"2nd Run$|1st Run$","")
names(notes_list) <- stringr::str_trim(names(notes_list),side = "both")
notes_list <- purrr::imap(notes_list,build_notes)
#Transfer DNS, DNF, etc info to notes column
for (i in seq_along(notes_list)){
cur_notes <- notes_list[[i]]
cur_notes <- filter(cur_notes,fisid %in% race$fisid)
if (nrow(cur_notes) == 0) {
next
}else {
if (anyDuplicated(cur_notes$fisid)){
cur_notes <- cur_notes %>%
group_by(fisid) %>%
summarise(notes = paste(notes,collapse = ", ")) %>%
as.data.frame()
}
race$notes[race$fisid %in% cur_notes$fisid] <- cur_notes$notes
}
}
}
if (is.null(override_eventid)){
ev_id <- get_max_eventid() + 1
}else {
ev_id <- override_eventid
}
#Final packaging
race <- race %>%
mutate(name = stringr::str_trim(name),
name = stringr::str_squish(name),
yob = as.integer(yob),
nation = stringr::str_trim(nation)) %>%
mutate(eventid = ev_id,
eventid_sf = paste0(ev_id,LETTERS[idx]),
date = event_info[["date"]],
season = event_info[["season"]],
cat1 = event_info[["cat1"]],
cat2 = event_info[["cat2"]],
spr_fin_cat = event_info$url$cat[idx],
location = event_info[["location"]],
site = site,
gender = event_info[["gender"]],
tech = event_info[["tech"]],
length = event_info[["length"]])
#race_penalty <- spr_race_penalty(result_data = race,event_date = event_info[["date"]])
skier <- race %>%
select(compid,fisid,name,yob) %>%
mutate(compid = as.integer(compid),
fisid = as.character(fisid),
name = as.character(name),
yob = as.integer(yob),
birth_date = NA_character_)
event <- race %>%
select(eventid,season,date,location,site,cat1,cat2,gender,length,tech) %>%
distinct()
event_tags1 <- data.frame(eventid = race$eventid[1],
tag = event_info[["primary_tag"]],
primary_tag = "Y")
n_tags <- length(event_info[["other_tags"]])
if (n_tags > 0){
event_tags2 <- data.frame(eventid = rep(race$eventid[1],n_tags),
tag = event_info[["other_tags"]],
primary_tag = rep("N",n_tags))
event_tags <- dplyr::bind_rows(event_tags1,
event_tags2)
} else {
event_tags <- event_tags1
}
result <- race %>%
select(eventid_sf,eventid,spr_fin_cat,compid,nation,rank,notes)
return(list(event = event,
event_tags = event_tags,
skier = skier,
result = result,
race = race))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.