# devtools::install_github("stillmatic/leagueR") pacman::p_load(dplyr, purrr, ggplot2, rvest, stringr)
scrape_matchlist <- function(url){ html <- url %>% xml2::read_html() match_data <- html %>% html_table() %>% .[[1]] %>% as_tibble() %>% select(-Red, -Blue) %>% janitor::clean_names() match_list <- html %>% html_nodes("a") %>% as.character() %>% .[str_detect(.,"gameshow")] link <- match_list %>% str_extract("\\.\\..*?\\d{3,}") %>% str_replace("\\.\\.", "") %>% paste0("http://www.gamesoflegends.com", .) game_id <- link %>% str_extract("id=\\d{2,}") %>% str_replace("id=", "") %>% as.numeric() game <- match_list %>% str_extract(">.*?</a>") %>% str_replace_all(">|<\\/a", "") extra_data <- tibble(game, game_id, link) final <- match_data %>% left_join(extra_data) %>% filter(!duplicated(game_id)) return(final) } scrape_matchlist_safe <- purrr::safely(scrape_matchlist) base_url <- "http://www.gamesoflegends.com/home/home.php?region=ALL&start=" target_url <- paste0(base_url, seq(0, 5290, by = 10)) scrape_matchlist_safe_progress <- tidyMBO::progressively(scrape_matchlist_safe, length(target_url)) match_list <- target_url %>% as.list() %>% map(scrape_matchlist_safe_progress) match_list %>% map("result") %>% reduce(bind_rows) scrape_matchlist(base_html) extra_data <- tibble(game, game_id, link) extra_data$game == match_data$game final <- match_data %>% left_join(extra_data) %>% filter(!duplicated(game_id)) final$link
team_logo <- pre_html %>% #html_node("#table_preview td") %>% html_node("#table_preview") %>% html_children() %>% .[1] %>% html_nodes(".team_icon") %>% as.character() %>% str_extract("\\.\\..*?\\.png") %>% str_replace("\\.\\.", "") %>% paste0("http://www.gamesoflegends.com", .)
win_list <- pre_html %>% #html_node("#table_preview td") %>% html_node("#table_preview") %>% html_children() %>% .[2] %>% html_children() %>% .[2:3] %>% html_text() %>% str_trim() win_rate <- win_list %>% str_extract("^\\d{1,}") won <- win_list %>% str_extract("\\d{1,}W") %>% str_replace("W", "") lost <- win_list %>% str_extract("\\d{1,}L") %>% str_replace("L", "") history <- tibble(win_rate, won, lost)
first_blood <- pre_html %>% #html_node("#table_preview td") %>% html_node("#table_preview") %>% html_children() %>% .[c(5:6)] %>% html_text() %>% str_extract("\\d{1,}\\.\\d{1,}|\\d{1,}") %>% str_trim() %>% as.numeric() first_tower <- pre_html %>% #html_node("#table_preview td") %>% html_node("#table_preview") %>% html_children() %>% .[c(8:9)] %>% html_text() %>% str_extract("\\d{1,}\\.\\d{1,}|\\d{1,}") %>% str_trim() %>% as.numeric() tibble(first_blood, first_tower)
game_base_url <- "http://www.gamesoflegends.com/game/gameshow.php?id=" game_urls <- paste0(game_base_url, 1:11650) game_urls %>% head()
html <- "http://www.gamesoflegends.com/game/gameshow.php?id=11650" %>% xml2::read_html() date <- html %>% html_node(".text-right") %>% html_text() %>% str_replace(" \\(.*?\\)", "") final <- html %>% html_node(".text-right") %>% html_text() %>% str_extract(" \\(.*?\\)") %>% str_replace_all("[[:punct:]]", "") %>% str_trim tournament <- html %>% html_node(".text-left") %>% html_text() %>% str_trim duration <- html %>% html_nodes("table") %>% .[2] %>% html_node("#spantime") %>% html_text() %>% str_replace(":", ".") %>% as.numeric
patch <- html %>% html_nodes("table") %>% .[2] %>% html_children() %>% .[1] %>% html_text() %>% str_trim() %>% str_extract("\\s.*?$") %>% str_trim() #str_replace("v", "")
get_banned <- function(x){ champion <- x %>% str_extract("/_img/champions_icon/.*?.png") %>% str_extract("\\w+.png") %>% str_replace_all("/|.png", "") champion_id <- x %>% str_extract("id=\\d{1,}") %>% str_replace("id=", "") champion_link <- x %>% str_extract("/champion/.*?id=\\d{1,}") champion_icon <- x %>% str_extract("/_img/champions_icon/.*?.png") return(tibble(champion, champion_id, champion_link, champion_icon)) } banned <- html %>% html_nodes("#banszone td") %>% as.character() %>% map_df(get_banned) %>% list() match_data <- tibble(date, final, tournament, duration, patch, banned) match_data %>% glimpse
gold <- html %>% html_nodes(".container-fluid .row") %>% map(~html_children(.x)) %>% map_dbl(~{ .x[.x %>% str_detect("Gold")] %>% html_text() %>% str_trim() %>% str_replace("k", "") %>% as.numeric }) first_blood <- html %>% html_nodes(".container-fluid .row") %>% map(~html_children(.x)) %>% map_int(~{ .x[.x %>% str_detect("First Blood")] %>% length }) first_tower <- html %>% html_nodes(".container-fluid .row") %>% map(~html_children(.x) %>% as.character) %>% map_int(~{ .x[.x %>% str_detect("First Tower")] %>% length() }) kills <- html %>% html_nodes(".container-fluid .row") %>% map(~html_children(.x)) %>% map_int(~{ .x[.x %>% str_detect(".ills")] %>% html_text() %>% str_trim() %>% as.integer() }) dragon <- html %>% html_nodes(".container-fluid .row") %>% map(~html_children(.x)) %>% map_int(~{ .x[.x %>% str_detect("Infernal Drake")] %>% html_text() %>% str_trim() %>% as.integer() }) nashor <- html %>% html_nodes(".container-fluid .row") %>% map(~html_children(.x)) %>% map_int(~{ .x[.x %>% str_detect("Nashor")] %>% html_text() %>% str_trim() %>% as.integer() }) team_data <- tibble(team = c("blue", "red"), gold, first_blood, first_tower, kills, dragon, nashor) team_data %>% glimpse team <- html %>% html_node(".blue_player_bar") %>% html_nodes(".black_link") %>% html_text() player <- html %>% html_node(".red_player_bar") %>% html_nodes(".black_link") %>% html_text() player_id <- html %>% html_node(".red_player_bar") %>% html_nodes(".black_link") %>% as.character() %>% str_extract("id=\\d{1,}") %>% str_replace("id=", "") %>% as.integer() red_player_link <- html %>% html_node(".red_player_bar") %>% html_nodes(".black_link") %>% as.character() %>% str_extract("/players.*?id=\\d{1,}") html %>% html_node(".red_player_bar") %>% html_children() %>% html_children() %>% as.character() map(~{ .x %>% as.character() }) html %>% html_nodes(".container-fluid .row") %>% map(~html_children(.x)) %>% map(~{ .x %>% as.character() })
pre_url <- "http://www.gamesoflegends.com/game/gameshow.php?id=11647&page=pw" pre_html <- pre_url %>% xml2::read_html() team_list <- pre_html %>% html_nodes("#table_preview td , th") team_logo <- team_list %>% as.character() %>% str_extract("\\.\\..*?\\.png") %>% str_replace("\\.\\.", "") %>% paste0("http://www.gamesoflegends.com", .) team <- pre_html %>% html_nodes(".score_table2 a") %>% html_text() tibble(team, team_logo)
url <- "http://www.gamesoflegends.com/game/gameshow.php?id=11650" html <- url %>% xml2::read_html() blue_team <- html %>% rvest::html_node(".blue_line a") %>% html_text() red_team <- html %>% rvest::html_node(".red_line a") %>% html_text() html %>% rvest::html_nodes(".black_link") %>% html_text() %>% .[.!=""] %>% tibble(player = .) %>% split(c(rep("blue",5), rep("red", 5)))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.