scrape_cnpj <- function(x) {
txts <- x %>%
xml2::read_html() %>%
rvest::html_nodes(xpath = '//td[contains(@style, "BORDER-RIGHT")]') %>%
arrumar_node()
txts <- txts[txts != '']
dados <- txts %>%
stringr::str_split_fixed(' \n', 2) %>%
tibble::as_tibble() %>%
purrr::set_names(c('key', 'value'))
return(dados)
}
scrape_qsa <- function(x) {
txts <- xml2::read_html(x)
fs <- txts %>%
rvest::html_nodes('fieldset') %>%
arrumar_node() %>%
stringr::str_split_fixed(' \n', 4) %>%
tibble::as_tibble() %>%
purrr::set_names(c('nm_key', 'nm', 'qual_key', 'qualif')) %>%
dplyr::select(nm, qualif)
xp <- '//div[@id="principal"]//table[2]//table//tr'
kv <- txts %>%
rvest::html_nodes(xpath = xp) %>%
arrumar_node() %>%
stringr::str_split_fixed(' \n', 2) %>%
tibble::as_tibble() %>%
purrr::set_names(c('key', 'value')) %>%
dplyr::mutate(key = stringr::str_replace(key, ':$', ''))
return(list(infos_basicas = kv, qsa = fs))
}
arrumar_node <- function(x) {
x %>%
rvest::html_text() %>%
stringr::str_replace_all('[\t \r]+', ' ') %>%
stringr::str_replace_all('(\n )+', '\n') %>%
stringr::str_trim()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.