# Get data for downloading DJE depending on its TJ
get_dje_data <- function(tj) {
switch (tj,
"tjac" = list(
u_dje = "http://diario.tjac.jus.br/edicoes.php?",
booklets = c(1)),
"tjal" = list(
u_dje = "http://www2.tjal.jus.br/cdje/downloadCaderno.do?",
booklets = c(2, 3)),
"tjam" = list(
u_dje = "http://esaj.tjam.jus.br/cdje/downloadCaderno.do?",
booklets = c(1:3)),
"tjba" = list(
u_dje = "http://www.tjba.jus.br/diario/internet/download.wsp?",
booklets = c(1)),
"tjce" = list(
u_dje = "http://esaj.tjce.jus.br/cdje/downloadCaderno.do?",
booklets = c(1:2)),
"tjms" = list(
u_dje = "http://www.tjms.jus.br/cdje/downloadCaderno.do?",
booklets = c(1:4)),
"tjrn" = list(
u_dje = "https://www.diario.tjrn.jus.br/djonline/pages/edicao/edicaoVisualizadorTopo.jsf",
booklets = c(1:2)),
"tjsc" = list(
u_dje = "http://busca.tjsc.jus.br/dje-consulta/rest/diario/caderno?",
booklets = c(1:4)),
"tjsp" = list(
u_dje = "http://www.dje.tjsp.jus.br/cdje/downloadCaderno.do?",
booklets = c(11:15, 18)))
}
# Call appropriate function to get link depending on TJ
get_dje_link <- function(tj, date, u_dje, booklet) {
switch(tj,
"tjac" = get_tjac_link(date, u_dje),
"tjba" = get_tjba_link(date, u_dje),
"tjrn" = get_tjrn_link(date, u_dje, booklet),
"tjsc" = get_tjsc_link(date, u_dje, booklet),
get_default_link(date, u_dje, booklet)
)
}
# Get link for many TJ"s DJEs
get_default_link <- function(date, u_dje, booklet) {
date_link <- format(lubridate::as_date(date), "%d/%m/%Y")
stringr::str_c(u_dje, "dtDiario=", date_link, "&cdCaderno=", booklet)
}
# Get link for TJAC"s DJE
get_tjac_link <- function(date, u_dje) {
# Convert date to appropriate format
date <- lubridate::as_date(date)
# Get text in page
text <- stringr::str_c(
u_dje, "Ano=", lubridate::year(date),
"&Mes=", lubridate::month(date)) %>%
httr::GET() %>%
httr::content("text") %>%
xml2::read_html() %>%
rvest::html_nodes("table tr.texto_normal")
# Get dates in page
dates <- rvest::html_node(text, "a.texto_normal") %>%
rvest::html_text() %>%
stringr::str_trim() %>%
lubridate::dmy(locale = "pt_BR.UTF-8")
# Get links in page
links <- rvest::html_node(text, xpath = './/a[@title="Baixar"]') %>%
rvest::html_attr("href") %>%
stringr::str_c("http://diario.tjac.jus.br", .)
# Get only pertinent link
link <- dplyr::data_frame(dates, links) %>%
dplyr::filter(dates == date) %>%
with(links)
return(link)
}
# Get link for TJBA"s DJE
get_tjba_link <- function(date, u_dje) {
# Get token for access
token <- "http://www2.tjba.jus.br/diario/internet/pesquisar.wsp" %>%
httr::GET(httr::config(followlocation = 0L)) %>%
httr::content("text", encoding = "ISO-8859-1") %>%
xml2::read_html() %>%
rvest::html_node(xpath = '//input[@id="wi.token"]') %>%
rvest::html_attr("value")
# Build body of POST request
body <- list(
"tmp_origem" = "",
"tmp.diario.dt_inicio" = "25/01/2001",
"tmp.diario.dt_fim" = "04/02/2100",
"tmp.diario.cd_caderno" = "",
"tmp.diario.cd_secao" = "",
"tmp.diario.pal_chave" = "",
"wi.token" = token,
"tmp.diario.id_advogado" = "")
# Get
editions <- "http://www2.tjba.jus.br/diario/internet/pesquisar.wsp" %>%
httr::POST(body = body, encode = "form") %>%
httr::content("text", encoding = "ISO-8859-1") %>%
xml2::read_html() %>%
rvest::html_node("table.grid") %>%
rvest::html_table() %>%
janitor::clean_names() %>%
tidyr::gather() %>%
dplyr::filter(stringr::str_trim(value) != "") %>%
tidyr::separate(value, c("edition", "date"), sep = "\\(") %>%
dplyr::mutate(date = lubridate::dmy(date, locale = "pt_BR.UTF-8"))
# Return link with matching edition
return(stringr::str_c(
"tmp.diario.nu_edicao=", u_dje,
editions$edition[match(date, editions$date)]))
}
# Get link for TJRN"s DJE
get_tjrn_link <- function(date, u_dje, booklet) {
# Conver date to link format
date <- format(lubridate::as_date(date), "%d/%m/%Y")
# Get JSF from a search result
get_jsf <- function(result) {
httr::content(result, "text") %>%
xml2::read_html() %>%
rvest::html_nodes(xpath = '//input[contains(@id, "_64")]') %>%
rvest::html_attr("value")
}
# Get JSF from simple search
jsf_1 <- httr::GET(
"http://www.diario.tjrn.jus.br/djonline/inicial.jsf",
httr::config(ssl_verifypeer = FALSE)) %>%
get_jsf()
# Body for second request
body_2 <- list(
"jsf_tree_64" = jsf_1[1],
"jsf_state_64" = jsf_1[2],
"jsf_viewid" = "/goto.jsp",
"menu:formMenu_SUBMIT" = "1",
"menu:formMenu:_link_hidden_" = "menu:formMenu:_id16"
)
# Get JSF from first POSTed search
jsf_2 <- httr::POST(
"https://www.diario.tjrn.jus.br/djonline/goto.jsf",
body = body_2, httr::config(ssl_verifypeer = FALSE)) %>%
get_jsf()
# Body for third request
body_3 <- list(
"jsf_tree_64" = jsf_2[1],
"jsf_state_64" = jsf_2[2],
"jsf_viewid" = "/goto.jsp",
"pesquisarEdicaoCompletaBean:pesquisa_:dataInicio" = date,
"pesquisarEdicaoCompletaBean:pesquisa_:dataFim" = date,
"pesquisarEdicaoCompletaBean:pesquisa_:cmdPesquisar" = "Pesquisar",
"pesquisarEdicaoCompletaBean:pesquisa__SUBMIT" = "1",
"edicao" = "",
"pesquisarEdicaoCompletaBean:pesquisa_:_link_hidden_" = ""
)
# Get relevant nodes from third request
nodes <- httr::POST(
"https://www.diario.tjrn.jus.br/djonline/goto.jsf",
body = body_3, httr::config(ssl_verifypeer = FALSE), encode = "form") %>%
httr::content("text") %>%
xml2::read_html() %>%
rvest::html_nodes(
xpath = '//tbody[@id="pesquisarEdicaoCompletaBean:dados:tbody_element"]//a')
# Extract data items from nodes
if (length(nodes) == 2) {
id <- rvest::html_attr(nodes, "id")
nm <- rvest::html_attr(nodes, "onclick")
nm <- stringr::str_match(nm, "\\.value='([0-9]{8}_[A-Z]{3})'")[,2]
items <- list(list(id[1], nm[1]), list(id[2], nm[2]))
} else {
return(NA)
}
# Get JSF from second POSTed search
jsf_3 <- httr::POST(
"https://www.diario.tjrn.jus.br/djonline/goto.jsf",
body = body_3, httr::config(ssl_verifypeer = FALSE), encode = "form") %>%
get_jsf()
# Create request bodies from items
items_body <- purrr::map(items, function(x) {
list(
"jsf_tree_64" = jsf_3[1],
"jsf_state_64" = jsf_3[2],
"jsf_viewid" = "/goto.jsp",
"linkDummyForm:_link_hidden_" = x[[1]],
"pesquisarEdicaoCompletaBean:scroll_1" = "",
"edicao" = x[[2]])
})
# Get links using body_items
links <- purrr::map_chr(items_body, function(x) {
r <- httr::POST(
"https://www.diario.tjrn.jus.br/djonline/goto.jsf",
body = x, httr::config(ssl_verifypeer = FALSE), encode = "form")
httr::GET(u_dje, httr::config(ssl_verifypeer = FALSE)) %>%
httr::content("text", encoding = "ISO-8859-1") %>%
xml2::read_html() %>%
rvest::html_node("a") %>%
rvest::html_attr("href") %>%
stringr::str_replace_all("\\\\", "/") %>%
stringr::str_c("https://www.diario.tjrn.jus.br", .)
})
# Only return link associated with requested booklet
return(links[booklet])
}
# Get link for TJSC"s DJE
get_tjsc_link <- function(date, u_dje, booklet) {
# Get edition number for date
edition <- stringr::str_c(
"http://busca.tjsc.jus.br/dje-consulta/rest/diario/dia?dia=",
format(lubridate::as_date(date), "%d/%m/%Y")) %>%
httr::GET() %>%
xml2::read_html() %>%
stringr::str_extract("\"edicao\":[0-9]+") %>%
stringr::str_extract("[0-9]+")
# Return link
return(stringr::str_c(u_dje, "edicao=", edition, "&cdCaderno=", booklet))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.