data-raw/extract_content.R

library(dplyr, warn.conflicts = FALSE)
library(rvest)

base_url = "https://roll20.net"

url <- read_html(paste0(base_url,"/compendium/dnd5e/Monsters%20by%20Name#content"))

tmp <- html_nodes(x = url, css = ".pagecontent a") %>%
  html_attr("href")

monster_urls <- data_frame(url = paste0(base_url,tmp))

extract_content <- function(url) {
  link <- read_html(url)
  name <- html_node(x = link, css = ".page-title") %>%
    html_text
  
  description <- html_node(x = link, css = ".pagecontent") %>%
    html_text
  
  tab <- html_node(x = link, css = "table") %>%
    html_table(fill=TRUE) %>%
    rename(key   = X1,
           value = X2) %>%
    select(key, value) %>%
    filter(key != " ") %>%
    filter(value != "+ Add New Attribute")  %>%
  tidyr::spread(key, value) %>%
  select(-Category)
  
  return(tab)
}
jarad/dnd5e documentation built on Oct. 15, 2023, 5:55 p.m.