pacman::p_load(dplyr, ggplot2, rvest, xml2, stringr, purrr, RSelenium, wdman, seleniumPipes)
get_dates <- function(month_nr, years){ months <- tibble( months = c( "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" ), mnumber = 1:12 ) %>% filter(mnumber %in% month_nr) out <- expand.grid(months$months, years) %>% rename(month = Var1, year = Var2) %>% split(1:nrow(.)) %>% map_chr(~paste(.x$month, .x$year)) %>% as.character() return(out) } get_dates(1:9, 2016:2018) leagues <- list( "challenge-france" = c( "November 2016", "January 2017", "February 2017", "March 2017", "November 2017", "December 2017" ), "eu-lcs" = get_dates(1:9, 2016:2018), "na-lcs" = get_dates(1:9, 2016:2018), "lck" = get_dates(1:9, 2016:2018), "lpl-china" = get_dates(1:9, 2016:2018), "eu-cs" = get_dates(1:8, 2016:2017), "na-cs" = get_dates(1:7, 2016:2017), "worlds" = get_dates(9:12, 2015:2018), "msi" = get_dates(4:5, 2016:2018), "all-star" = get_dates(12, 2016:2018), "rift-rivals" = get_dates(7, 2017:2018) ) unlist(leagues) %>% length leagues_url <- leagues %>% map2(.x = ., .y = names(leagues), ~{ tournament <- .y urls <- .x %>% str_split("\\s") %>% map(~{ url <- paste0( "https://eu.lolesports.com/en/schedule#slug=", tournament, "&tab=results", "&resultsMonth=", .x[1], "%20", .x[2] ) return(tibble(tournament, year = .[2], month = .[1], url)) }) %>% purrr::reduce(bind_rows) }) %>% purrr::reduce(bind_rows) leagues_url %>% count(url) %>% arrange(desc(n))
get_date <- function(html){ html %>% .[1] %>% # index to map over html_node("h2") %>% html_text() } get_time <- function(html){ html %>% .[1] %>% html_text() %>% str_replace("Approx", "") %>% str_trim } get_rounds <- function(html){ html %>% .[2] %>% html_nodes("span") %>% html_text() %>% .[stringr::str_detect(.,"Best of")] %>% str_extract("[[:digit:]]") %>% as.numeric() #paste(., collapse = " ") } get_teams <- function(html){ links <- html %>% .[2] %>% html_nodes("a") # team_url <- links %>% # .[c(1,3)] %>% # as.character() %>% # str_extract('/en/team/.*?\\"') %>% # str_replace('\"', '') %>% # ifelse(is.null(.), NA, .) team_logo <- links %>% .[c(1,3)] %>% as.character() %>% str_extract("https?[:]//[[:graph:]]+") %>% str_replace('\"', '') team <- links %>% .[c(2,4)] %>% html_text() team_blue <- tibble(team, team_logo)[1,] colnames(team_blue) <- paste0("blue_", colnames(team_blue)) team_red <- tibble(team, team_logo)[2,] colnames(team_red) <- paste0("red_", colnames(team_red)) return(bind_cols(team_blue, team_red)) } get_match_link <- function(html){ html %>% .[3] %>% #html_children() html_nodes("a") %>% as.character() %>% str_extract('href=\\".*?\\"') %>% str_replace_all('href=\\"|\\"', '') }
extract_matches_esport <- function(html){ game_list <- html %>% # html_node("body") %>% # html_node("main") %>% ### results table level html_node("#results") %>% html_children() %>% ### Need Iterator over date/tournaments purrr::map(~{ main <- .x %>% html_children() %>% .[2] %>% html_children() %>% ### Event level (Day/ Tournament) map(~{ time <- .x %>% html_children() %>% get_time teams <- .x %>% html_children() %>% get_teams rounds <- .x %>% html_children() %>% get_rounds %>% ifelse(length(.) == 0, NA, .) game_link <- .x %>% html_children() %>% get_match_link() return(tibble(time, game_link, rounds) %>% bind_cols(teams)) }) ### append date per list of games a day out <- main %>% purrr::reduce(bind_rows) %>% dplyr::mutate( date = .x %>% html_children() %>% .[1] %>% get_date ) return(out) }) return(game_list) } #match_list <- extract_matches_esport(html)
#match_list %>% # reduce(bind_rows)
#rD <- rsDriver() #remDr <- rD$client # remDr$open() # # base_url <- "https://eu.lolesports.com/en/schedule#slug=rift-rivals&tab=results" # # base_html <- base_url %>% # read_html() # # remDr$navigate(base_url) # base_html2 <- remDr$getPageSource()[[1]] %>% # read_html # # match_list <- extract_matches_esport(base_html2) # # match_list %>% # purrr::reduce(bind_rows)
rD <- rsDriver() remDr <- rD$client remDr$open() get_matches <- function(x){ #url <- "https://eu.lolesports.com/en/schedule#slug=challenge-france&tab=results&resultsMonth=January%202017" remDr$navigate(x$url) remDr$refresh() #Sys.sleep(1) html <- remDr$getPageSource()[[1]] %>% read_html out <- extract_matches_esport(html) %>% purrr::reduce(bind_rows) %>% mutate( year = x$year, month = x$month, event = x$tournament, event_url = x$url ) return(out) } get_matches_safe <- purrr::safely(get_matches) get_matches_safe_pro <- tidyMBO::progressively(get_matches_safe, length(leagues_url)) table(duplicated(leagues_url)) leagues_matches <- leagues_url %>% split(1:nrow(.)) %>% map(get_matches_safe_pro) length(leagues_matches) leagues_matches %>% map("error") match_esports <- leagues_matches %>% map("result") %>% purrr::reduce(bind_rows) #arrange(game_link) %>% load("match_esports.Rdata") dplyr::glimpse(match_esports) # match_esports$rounds %>% sum # nrow(match_esports) table(duplicated(match_esports$game_link)) match_esports %>% filter(game_link == "/en/msi/msi_2018/match/2018-05-20/royal-never-give-up-vs-kingzone-dragonx") # dd <- match_esports %>% # #filter(!duplicated(game_link)) %>% # count(game_link) %>% # arrange(desc(n)) # .$game_link#[1:10] # #save(match_esports, file = "match_esports.Rdata")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.