#' @title Download PDF documents belonging to lawsuits
#' @description This function accesses lawsuits' ESAJ pages and
#' tries to download documents (if IDs are provided to `data`
#' and a lawsuit isn't found, shows error but skips it).
#' @param data A character vector with one or more lawsuit IDs or
#' the tibble returned by [get_metadata()] (can be filtered)
#' @param path Path to directory where to save files
#' @param login ESAJ system login (if left `NULL` and your're
#' not logged in yet, will ask for it)
#' @param password ESAJ system password (if left `NULL` and your're
#' not logged in yet, will ask for it)
#' @param only_petitions Whether to only download petitions
#' @param progress Whether to show a progress bar
#' @seealso [get_metadata()]
#' @export
download_documents <- function(data, path, login = NULL, password = NULL,
only_petitions = FALSE, progress = FALSE) {
# Depending on data's type, get ready for download
if (dplyr::is.tbl(data)) {
get_metadata(unique(data$id), login, password, only_petitions)
} else {
if (progress) { message("Fetching metadata...") }
data <- get_metadata(data, path, login, password, only_petitions)
}
if (nrow(data) > 0) {
# Create directories if necessary
data <- dplyr::mutate(data, file = str_c(
normalizePath(path), "/",
replace_all(id, "[\\.\\-]", "")))
purrr::walk(data$file, dir.create, FALSE, TRUE)
# Setup progress bar
if (progress) {
message("Downloading documents...")
pb <- progress::progress_bar$new(total = nrow(data))
}
# Download documents
for (i in seq_along(data$title)) {
# Download a document
data$file[i] <- str_c(
data$file[i], "/", replace_all(data$number[i], "-", "_"),
"_", data$title[i], ".pdf")
httr::GET(
data$link[i], vfpr_f,
httr::write_disk(data$file[i], TRUE))
# Tick progress bar
if (progress) { pb$tick() }
}
}
invisible(data)
}
#' @title Get metadata from documents belonging to lawsuits
#' @description This function accesses lawsuits' ESAJ pages and
#' tries to get the metadata of all documents (if lawsuit
#' isn't found, shows error but skips it).
#' @param id A character vector with one or more lawsuit IDs
#' @param login ESAJ system login (if left `NULL` and your're
#' not logged in yet, will ask for it)
#' @param password ESAJ system password (if left `NULL` and your're
#' not logged in yet, will ask for it)
#' @param only_petitions Whether to only get petitions
#' @seealso [download_documents()]
#' @export
get_metadata <- function(id, path = ".", login = NULL, password = NULL,
only_petitions = FALSE) {
id <- abjutils::clean_cnj(id)
f <- sprintf("%s/%s", path, id)
fs::dir_create(f)
html_file <- sprintf("%s/%s.html", f, id)
rds_file <- sprintf("%s/%s.rds", f, id)
# Get metadata for one ID
get_metadata_ <- function(id) {
# Initial access
base <- "https://esaj.tjsp.jus.br/cpopg/"
r_cpopg <- httr::GET(str_c(base, "open.do?gateway=true"), vfpr_f)
# Parameters for GET query
query_get <- list(
conversationId = "",
dadosConsulta.localPesquisa.cdLocal = "-1",
cbPesquisa = "NUMPROC",
dadosConsulta.tipoNuProcesso = "UNIFICADO",
numeroDigitoAnoUnificado = stringr::str_sub(id, 1, 15),
foroNumeroUnificado = stringr::str_sub(id, 22),
dadosConsulta.valorConsultaNuUnificado = id,
dadosConsulta.valorConsulta = "")
# Get lawsuit code
lwst_code <- str_c(base, "search.do") %>%
httr::GET(query = query_get, vfpr_f,
httr::write_disk(html_file, overwrite = TRUE)) %>%
purrr::pluck("all_headers", 1, "headers", "location") %>%
stringr::str_match("processo\\.codigo=([^&]+)&") %>%
magrittr::extract(1, 2)
# Get page with all PDFs
f_folder <- base %>%
str_c("abrirPastaDigital.do?processo.codigo=", lwst_code) %>%
httr::GET(vfpr_f) %>%
purrr::pluck("all_headers", 1, "headers", "location") %>%
httr::GET(vfpr_f)
# Convert relevant content into JSON
json <- f_folder %>%
httr::content("text") %>%
sub_between("requestScope", "requestScopeArvore") %>%
stringr::str_sub(5, -9) %>%
jsonlite::fromJSON()
# Create data frame with all documents found
docs <- json$data %>%
tibble::as_tibble() %>%
tibble::rownames_to_column() %>%
dplyr::mutate(rowname = as.integer(rowname)) %>%
dplyr::group_by(title, rowname) %>%
dplyr::do(link = {
json$children[[.$rowname]]$data$parametros }) %>%
tidyr::unnest(link) %>%
dplyr::arrange(rowname) %>%
dplyr::rowwise() %>%
dplyr::mutate(
link = str_c(
"https://esaj.tjsp.jus.br/pastadigital/getPDF.action?", link),
number = link %>%
stringr::str_match('numInicial=([0-9]+)') %>%
magrittr::extract(1, 2) %>%
as.integer(),
title = title %>%
rm_accent() %>%
stringr::str_to_lower() %>%
stringr::str_trim() %>%
replace_all('[ +/]', '_') %>%
replace_all('_+', '_')) %>%
dplyr::ungroup() %>%
dplyr::mutate(
id = id,
number = number %>%
sprintf('%03d-%03d', ., dplyr::lead(.)-1) %>%
gsub('0NA', 'inf', .)) %>%
dplyr::select(title, number, id, link) %>%
dplyr::arrange(number)
readr::write_rds(docs, rds_file)
# Filter columns if necessary
docs <-
if (only_petitions) {
dplyr::filter(docs, detect(title, 'peticao|ajuizamento|contestacao'))
} else { docs }
return(docs)
}
get_metadata_ <- purrr::safely(get_metadata_, dplyr::tibble(), FALSE)
# Login to ESAJ system
login_esaj(login, password)
# Map download over all IDs
purrr::map_dfr(id, ~get_metadata_(.x)$result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.