In systats/lolR: Data Sourcing

packages

pacman::p_load(tidyverse, xml2, rvest, qdapRegex)

function

base_url <- "https://acs.leagueoflegends.com/v1/stats/game/"

gameid <- 1001380191

tournament <- "TRLH3/"

gamehash <- "43a46f49938bacca"

scrapeurl <- paste0(base_url, tournament, gameid, "?gameHash=", gamehash)

example_dat <- jsonlite::read_json(x)


clean_matches(example_dat)
# 
x <- example_dat

clean_matches <- function(x) {

  match_base <- x[1:10] %>% 
    as_tibble() %>% 
    janitor::clean_names() %>% 
    mutate(date = anytime::anytime(gamecreation/1000))

  match_details <- list(x$teams[[1]][1:15], x$teams[[2]][1:15]) %>%
    reduce(bind_rows) %>% 
   janitor::clean_names() %>% 
   split(.$teamid) %>% 
    map(~{
      if(.x$teamid == 100){
        colnames(.x) <- paste0("blue_", colnames(.x))
        return(.x)
      } else {
        colnames(.x) <- paste0("red_", colnames(.x))
        return(.x)
      }
    }) %>% 
    reduce(bind_cols)

  match_part <- x$participants[1:6] %>% 
    tibble() %>% 
 #   ifelse(is.null(.), NA, .) %>% 
    list() %>%
    tibble(match_part = .)

  match_part_details <- bind_rows(
    x$participants[[1]][["stats"]],
    x$participants[[2]][["stats"]],
    x$participants[[3]][["stats"]],
    x$participants[[4]][["stats"]],
    x$participants[[5]][["stats"]],
    x$participants[[6]][["stats"]],
    x$participants[[7]][["stats"]],
    x$participants[[8]][["stats"]],
    x$participants[[9]][["stats"]],
    x$participants[[10]][["stats"]]) %>% 
    as_tibble() %>% 
    mutate(win = ifelse(win, "Win", "Fail")) %>% 
 #   ifelse(is.null(.), NA, .) %>% 
    list() %>%
    tibble(match_part_details = .)

  dat <- match_base %>% 
    bind_cols(
      match_details, 
      match_part, 
      match_part_details
    ) 

  return(dat)
}


# https://acs.leagueoflegends.com/v1/stats/game/TRLH3/1001380191?gameHash=43a46f49938bacca

scraping - it doesn't work :/

url <- "https://eu.lolesports.com/en/schedule#slug=msi&tab=results"

class="button button--action--solid button--block micro-fixture__cta"


read_html(url) %>% 
  rvest::html_nodes(".micro-fixture__cta")# %>% 
  # html_attr("class")
  as.character()

verzweiflung

all_links <- read_lines("data/all_links.txt")


str_match_all(all_links, '<a href (.*?) >')


genXtract(all_links, "href", "class", with=TRUE)

links <- qdapRegex::ex_url(all_links) %>% 
  unlist() %>% 
  .[str_detect(., "msi")] %>% 
  str_replace_all('"', "") %>% 
  tibble(links = .)


links %<>% 
  arrange(links) %>% 
  filter(!duplicated(links)) %>% 
  mutate(type = c(rep("normal", 58), rep("final", 3)))



ss <- read_html("https://eu.lolesports.com/en/msi/msi_2018/match/2018-05-15/evos-esports-vs-royal-never-give-up") %>% 
  html_nodes(".button--nav") %>% 
  html_attr("href") 


sub('.*match-details', '', target_url)

realo scraper

get_ids <- function(x) {
  target_url <- read_html(x) %>% 
  html_nodes(".button--nav") %>% 
  html_attr("href") 

  match <- read_html(x) %>% 
  html_nodes("title") %>% 
  html_text()


  return(sub('.*match-details', '', target_url))
}





get_ids_save <- safely(tidyMBO::progressively(get_ids, .n = 61))

api_links <- links %>% 
 # filter(type != "final") %>% 
  .$links %>%
  purrr::map(get_ids_save)

glimpse(api_links)

base_url <- "https://acs.leagueoflegends.com/v1/stats/game"

scrape_links <- api_links %>% 
  map("result") %>% 
  bind_rows() %>% 
  mutate(urls = sub('.*match-details', '', paste0(base_url, urls))) 



get_match_details <- function(x) {
  raw <- jsonlite::read_json(x)
  return(clean_matches(raw))
}


get_save_details <- safely(tidyMBO::progressively(get_match_details, 
                                                  .n = length(scrape_links)))

gamedetails <- scrape_links$urls %>% 
  purrr::map(get_save_details)


scrape_ids <- scrape_links %>% 
  mutate(gameid = qdapRegex::ex_between(urls, "TRLH", "gameHash") %>% 
           stringi::stri_sub(.,3) %>% 
           str_remove("\\?") %>% 
           as.integer()) %>% 
  tidyr::separate(match, 
                  into = c("blue_team", "red_team"), 
                  remove = F, sep = " VS ") %>% 
  mutate(red_team = str_replace(red_team, " \\| LoL Esports", ""))

gamedetails %>%
  map("result") %>% 
  bind_rows() %>% 
  left_join(scrape_ids) %>% 
  select(gameid, date, blue_team, red_team, blue_win, red_win, everything())


scrape_links 

# match <- read_html("https://eu.lolesports.com/en/msi/msi_2018/match/2018-05-15/evos-esports-vs-royal-never-give-up") %>% 
#   html_nodes("title") %>% 
#   html_text()