R/get_race_year_page.R

#' This is a function that takes as input the website for a particular race
#' at a particular time and returns a data frame of the competitors.
#' The columns are: pos, runner_id, runner, club_name = club_name,
#' category, time, percent_winner
#' @export

get_race_year_page <- function(url){
  webPage <- as.character(read_html(url))
  
  participant <- strsplit(webPage, "width=\"50\"><font face=\"Verdana\" size=\"1\">", fixed = TRUE)
  data <- unlist(participant)[-1]
  
  #return(data)
  
  
  pos <- str_extract(data, "^[0-9]*") # The position
  
  runner_id <- str_extract(data, "RunnerID=[A-Z0-9 ]*") # Runner id number
  runner_id <- substr(runner_id, 10, length(runner_id))
  
  temp <- str_extract_all(data, "\\\">[A-Za-z0-9:. ]*") # Extracts some items
  
  runner <- list() # Runner name
  for (i in 1:length(temp)) {
    runner[[i]] <- temp[[i]][[3]]  
  }
  runner <- substr(runner, 3, length(runner))
  
  club_name <- list() # Club name
  for (i in 1:length(temp)) {
    club_name[[i]] <- temp[[i]][[5]]  
  }
  club_name <- substr(club_name, 3, length(club_name))
  
  category <- list() # Category
  for (i in 1:length(temp)) {
    category[[i]] <- temp[[i]][[7]]  
  }
  category <- substr(category, 3, length(category))
  
  time <- list() # Time
  for (i in 1:length(temp)) {
    time[[i]] <- temp[[i]][[9]]  
  }
  time <- substr(time, 3, length(time))
  
  percent_winner <- list() # Percent Winner
  for (i in 1:length(temp)) {
    percent_winner[[i]] <- temp[[i]][[11]]  
  }
  percent_winner <- substr(percent_winner, 3, length(percent_winner))

  data.frame(pos = pos, runner_id = runner_id, runner = runner, club_name = club_name,
             category = category, time = time, percent_winner = percent_winner)
}
gdking01/ScottishHillRacing documentation built on May 17, 2019, 12:12 a.m.