# devtools::install_github("stillmatic/leagueR")
pacman::p_load(dplyr, purrr, ggplot2, rvest, stringr)
scrape_matchlist <- function(url){

  html <- url %>% 
    xml2::read_html()

  match_data <- html %>% 
    html_table() %>% 
    .[[1]] %>% 
    as_tibble() %>% 
    select(-Red, -Blue) %>% 
    janitor::clean_names()

  match_list <- html %>% 
    html_nodes("a") %>% 
    as.character() %>% 
    .[str_detect(.,"gameshow")]

  link <- match_list %>% 
    str_extract("\\.\\..*?\\d{3,}") %>% 
    str_replace("\\.\\.", "") %>% 
    paste0("http://www.gamesoflegends.com", .)

  game_id <- link %>% 
    str_extract("id=\\d{2,}") %>% 
    str_replace("id=", "") %>%
    as.numeric()

  game <- match_list %>% 
    str_extract(">.*?</a>") %>% 
    str_replace_all(">|<\\/a", "")


  extra_data <- tibble(game, game_id, link)

  final <- match_data %>% 
    left_join(extra_data) %>% 
    filter(!duplicated(game_id))

  return(final)
}


scrape_matchlist_safe <- purrr::safely(scrape_matchlist)


base_url <- "http://www.gamesoflegends.com/home/home.php?region=ALL&start=" 
target_url <- paste0(base_url, seq(0, 5290, by = 10))
scrape_matchlist_safe_progress <- tidyMBO::progressively(scrape_matchlist_safe, length(target_url))


match_list <- target_url %>% 
  as.list() %>% 
  map(scrape_matchlist_safe_progress)


match_list %>% 
  map("result") %>% 
  reduce(bind_rows)

scrape_matchlist(base_html)




extra_data <- tibble(game, game_id, link)

extra_data$game == match_data$game 


final <- match_data %>% 
  left_join(extra_data) %>% 
  filter(!duplicated(game_id))

final$link

Logo

team_logo <- pre_html %>% 
  #html_node("#table_preview td") %>% 
  html_node("#table_preview") %>%
  html_children() %>%
  .[1] %>% 
  html_nodes(".team_icon") %>% 
  as.character() %>% 
  str_extract("\\.\\..*?\\.png") %>% 
  str_replace("\\.\\.", "") %>% 
  paste0("http://www.gamesoflegends.com", .)
win_list <- pre_html %>% 
  #html_node("#table_preview td") %>% 
  html_node("#table_preview") %>%
  html_children() %>%
  .[2] %>% 
  html_children() %>% 
  .[2:3] %>% 
  html_text() %>% 
  str_trim()

win_rate <- win_list %>% str_extract("^\\d{1,}")
won <- win_list %>% str_extract("\\d{1,}W") %>% str_replace("W", "")
lost <- win_list %>% str_extract("\\d{1,}L") %>% str_replace("L", "")

history <- tibble(win_rate, won, lost)
first_blood <- pre_html %>% 
  #html_node("#table_preview td") %>% 
  html_node("#table_preview") %>%
  html_children() %>%
  .[c(5:6)] %>% 
  html_text() %>% 
  str_extract("\\d{1,}\\.\\d{1,}|\\d{1,}") %>% 
  str_trim() %>% 
  as.numeric()

first_tower <- pre_html %>% 
  #html_node("#table_preview td") %>% 
  html_node("#table_preview") %>%
  html_children() %>%
  .[c(8:9)] %>% 
  html_text() %>% 
  str_extract("\\d{1,}\\.\\d{1,}|\\d{1,}") %>% 
  str_trim() %>% 
  as.numeric()

tibble(first_blood, first_tower)

Match Data

game_base_url <- "http://www.gamesoflegends.com/game/gameshow.php?id="
game_urls <- paste0(game_base_url, 1:11650)
game_urls %>% head()

Date

html <- "http://www.gamesoflegends.com/game/gameshow.php?id=11650" %>% 
  xml2::read_html()

date <- html %>% 
  html_node(".text-right") %>% 
  html_text() %>% str_replace(" \\(.*?\\)", "")

final <- html %>% 
  html_node(".text-right") %>% 
  html_text() %>% 
  str_extract(" \\(.*?\\)") %>% 
  str_replace_all("[[:punct:]]", "") %>% 
  str_trim

tournament <- html %>% 
  html_node(".text-left") %>% 
  html_text() %>% 
  str_trim

duration <- html %>% 
  html_nodes("table") %>% 
  .[2] %>% 
  html_node("#spantime") %>% 
  html_text() %>% 
  str_replace(":", ".") %>%
  as.numeric
patch <- html %>% 
  html_nodes("table") %>% 
  .[2] %>% 
  html_children() %>% 
  .[1] %>% 
  html_text() %>% 
  str_trim() %>%
  str_extract("\\s.*?$") %>%
  str_trim()
  #str_replace("v", "")
get_banned <- function(x){
      champion <- x %>% 
      str_extract("/_img/champions_icon/.*?.png") %>% 
      str_extract("\\w+.png") %>% 
      str_replace_all("/|.png", "")

    champion_id <- x %>% 
      str_extract("id=\\d{1,}") %>% 
      str_replace("id=", "")

    champion_link <- x %>% 
      str_extract("/champion/.*?id=\\d{1,}")

    champion_icon <- x %>% 
      str_extract("/_img/champions_icon/.*?.png")

    return(tibble(champion, champion_id, champion_link, champion_icon))
}

banned <- html %>% 
  html_nodes("#banszone td") %>% 
  as.character() %>% 
  map_df(get_banned) %>%
  list()

match_data <- tibble(date, final, tournament, duration, patch, banned)
match_data  %>% glimpse
gold <- html %>% 
  html_nodes(".container-fluid .row") %>% 
  map(~html_children(.x)) %>% 
  map_dbl(~{
    .x[.x %>% str_detect("Gold")] %>% 
      html_text() %>% 
      str_trim() %>% 
      str_replace("k", "") %>% 
      as.numeric
  }) 

first_blood <- html %>% 
  html_nodes(".container-fluid .row") %>% 
  map(~html_children(.x)) %>% 
  map_int(~{
    .x[.x %>% str_detect("First Blood")] %>% 
      length
  })

first_tower <- html %>% 
  html_nodes(".container-fluid .row") %>% 
  map(~html_children(.x) %>% as.character) %>% 
  map_int(~{
    .x[.x %>% str_detect("First Tower")] %>% 
      length()
  }) 

kills <- html %>% 
  html_nodes(".container-fluid .row") %>% 
  map(~html_children(.x)) %>% 
  map_int(~{
    .x[.x %>% str_detect(".ills")] %>% 
      html_text() %>% 
      str_trim() %>% 
      as.integer()
  }) 

dragon <- html %>% 
  html_nodes(".container-fluid .row") %>% 
  map(~html_children(.x)) %>% 
  map_int(~{
    .x[.x %>% str_detect("Infernal Drake")] %>% 
      html_text() %>% 
      str_trim() %>%
      as.integer()
  }) 

nashor <- html %>% 
  html_nodes(".container-fluid .row") %>% 
  map(~html_children(.x)) %>% 
  map_int(~{
    .x[.x %>% str_detect("Nashor")] %>% 
      html_text() %>% 
      str_trim() %>%
      as.integer()
  }) 

team_data <- tibble(team = c("blue", "red"), gold, first_blood, first_tower, kills, dragon, nashor)

team_data %>% glimpse



team <- html %>% 
  html_node(".blue_player_bar") %>% 
  html_nodes(".black_link") %>% 
  html_text()

player <- html %>% 
  html_node(".red_player_bar") %>% 
  html_nodes(".black_link") %>% 
  html_text()

player_id <- html %>% 
  html_node(".red_player_bar") %>% 
  html_nodes(".black_link") %>% 
  as.character() %>% 
  str_extract("id=\\d{1,}") %>% 
  str_replace("id=", "") %>% 
  as.integer()

red_player_link <- html %>% 
  html_node(".red_player_bar") %>% 
  html_nodes(".black_link") %>% 
  as.character() %>% 
  str_extract("/players.*?id=\\d{1,}")

html %>% 
  html_node(".red_player_bar") %>% 
  html_children() %>% 
  html_children() %>% 
  as.character()


map(~{
    .x %>% as.character()
  }) 



html %>% 
  html_nodes(".container-fluid .row") %>% 
  map(~html_children(.x)) %>% 
  map(~{
    .x %>% as.character()
  }) 
pre_url <- "http://www.gamesoflegends.com/game/gameshow.php?id=11647&page=pw"

pre_html <- pre_url %>% 
  xml2::read_html()





team_list <- pre_html %>% 
  html_nodes("#table_preview td , th")

team_logo <- team_list %>% 
  as.character() %>% 
  str_extract("\\.\\..*?\\.png") %>% 
  str_replace("\\.\\.", "") %>% 
  paste0("http://www.gamesoflegends.com", .)

team <- pre_html %>% 
  html_nodes(".score_table2 a") %>% 
  html_text()

tibble(team, team_logo)
url <- "http://www.gamesoflegends.com/game/gameshow.php?id=11650"
html <- url %>% 
  xml2::read_html()

blue_team <- html %>% 
  rvest::html_node(".blue_line a") %>% 
  html_text()

red_team <- html %>% 
  rvest::html_node(".red_line a") %>% 
  html_text()


html %>% 
  rvest::html_nodes(".black_link") %>% 
  html_text() %>% 
  .[.!=""] %>% 
  tibble(player = .) %>% 
  split(c(rep("blue",5), rep("red", 5)))


systats/lolR documentation built on May 31, 2019, 6:17 p.m.