parse_dje_tjms <- function(text_file){
txt <- readr::read_file(text_file)
clean_text <- txt %>%
remove_header() %>%
remove_footer()
sumario <- clean_text %>%
find_index() %>%
stringr::str_split("(SUM\u00c1RIO|\b)\n") %>%
dplyr::first() %>%
stringr::str_subset("Juizado|Vara") %>%
stringr::str_trim()
courts <- sumario %>%
paste0("\n", ., "\n") %>%
break_text(clean_text, .) %>%
append(-1)
d <- purrr::map(1:(length(courts)-1),
~stringr::str_sub(clean_text, courts[.x], courts[.x+1])) %>%
purrr::map(~break_chunk(.x) %>%
dplyr::first() %>%
purrr::map_df(parse_docket_entry)) %>%
purrr::map2(names(courts)[-length(courts)],
~dplyr::mutate(.x, location = .y)) %>%
dplyr::bind_rows() %>%
dplyr::filter(stringr::str_detect(lawsuit_id, "Processo"))
}
#' Parse DJE file
#'
#' parse TJSP file obtained from [dje_to_text()].
#'
#' @param text_file path of the file to parse.
#'
#' @return tibble containing lawsuit and court information
#' @export
parse_dje_tjsp <- function(text_file) {
# clean_text <- text_file %>%
# remove_header() %>%
# remove_footer()
txt <- readr::read_file(text_file)
cnj_format_sp <- stringr::regex("[0-9]{7}\\-[0-9]{2}\\.[0-9]{4}\\.8\\.26\\.[0-9]{4}")
clean_text <- paste0(txt, "@fim_do_texto@")
classify_content <- function(raw_content) {
dplyr::case_when(
stringr::str_detect(raw_content, "Distribuidor") ~ "D",
stringr::str_detect(raw_content, "[XVI ]+-|F\u00f3rum|^[\u00c7A-Z\u00c3\u00c2\u00c1\u00cd\u00d3\u00da\u00c9\u00ca ]+$") ~ "C",
TRUE ~ "Outros")
}
index <- clean_text %>%
find_index(index_regex = "\nSUM\u00c1RIO\nTRIBUNAL") %>%
stringr::str_split("SUM\u00c1RIO|\n") %>%
dplyr::first() %>%
stringr::str_trim() %>%
stringr::str_subset("^[XVI ]+-|F\u00f3rum|^[\u00c7A-Z\u00c3\u00c2\u00c1\u00cd\u00d3\u00da\u00c9\u00ca -]+$|Vara|Anexo|Distribuidor|Juizado|Fiscais|Criminal|C\u00edvel") %>%
tibble::as.tibble() %>%
purrr::set_names("valor") %>%
dplyr::mutate(tipo = classify_content(valor))
clean_breaks <- function(x, y) {
if (stringr::str_detect(x[[1]][length(x[[1]])], "Distribuidor")) {
tab2 <- tibble::tibble(valor = ifelse(is.na(y), "@fim_do_texto@", y))
x <- dplyr::bind_rows(x, tab2)
} else {
x
}
}
breaks <- index %>%
dplyr::filter((tipo == "D") |
(dplyr::lag(tipo) == "D") |
(tipo == "C" & dplyr::lead(tipo) != "C")) %>%
dplyr::filter(is.na(dplyr::lead(tipo)) |
!(tipo == "C" & dplyr::lead(tipo) == "C")) %>%
dplyr::mutate(classe = ifelse(tipo == "C" & !is.na(dplyr::lead(tipo)),
valor, NA),
valor = paste0("\n", valor, "\n")) %>%
tidyr::fill(classe) %>%
dplyr::mutate(classe = paste0("\n", classe, "\n")) %>%
dplyr::select(classe, valor) %>%
dplyr::group_by(classe) %>%
tidyr::nest(.key = "valor") %>%
dplyr::mutate(valor = purrr::map2(valor, dplyr::lead(classe), clean_breaks))
breaks_counties <- clean_text %>%
break_text(breaks$classe) %>%
append(-1)
cut_text <- function(x, y) {
if (breaks_counties[x+1] == -1){
text <- clean_text %>%
stringr::str_sub(breaks_counties[x], breaks_counties[x+1])
} else {
text <- clean_text %>%
stringr::str_sub(breaks_counties[x],
breaks_counties[x + 1] + nchar(breaks_counties[x + 1]) - 1)
}
# print(y$valor[1])
if(length(y$valor) == 3) {
lim_inf <- text %>%
break_text(y$valor[2])
points <- text %>%
stringr::str_sub(lim_inf, -1) %>%
break_text(y$valor[2:3])
r <- stringr::str_sub(stringr::str_sub(text, lim_inf, -1), points[1], points[2])
} else {
distribuidor <- which(stringr::str_detect(y$valor, "Distribuidor"))
r <- character(length(distribuidor))
for(i in 1:length(distribuidor)) {
if(i == 1){
points <- text %>%
break_text(y$valor[distribuidor[1]:(distribuidor[1] + 1)])
r[i] <- stringr::str_sub(text, points[1], points[2])
} else {
lim_inf <- text %>%
break_text(y$valor[distribuidor[i]])
points <- text %>%
stringr::str_sub(lim_inf, -1) %>%
break_text(y$valor[distribuidor[i]:(distribuidor[i]+1)])
r[i] <- stringr::str_sub(stringr::str_sub(text, lim_inf, -1),
points[1], points[2])
}
}
# points1 <- text %>%
# break_text(y$valor[distribuidor[1]:(distribuidor[1]+1)])
#
# lim_inf <- text %>%
# break_text(y$valor[distribuidor[2]])
#
# points2 <- text %>%
# stringr::str_sub(lim_inf, -1) %>%
# break_text(y$valor[distribuidor[2]:(distribuidor[2]+1)])
#
# c(stringr::str_sub(text, points1[1], points1[2]),
# stringr::str_sub(stringr::str_sub(text, lim_inf, -1), points2[1], points2[2]))
r
}
}
inner_breaks <- purrr::map2(seq_along(breaks$classe), breaks$valor, cut_text) %>%
unlist() %>%
stringr::str_split(pattern = stringr::regex(paste0("(?=PROCESSO ?:",
cnj_format_sp, ")"),
ignore_case = TRUE))
d <- breaks %>%
tidyr::unnest(valor) %>%
dplyr::filter(stringr::str_detect(valor, "Distribuidor")) %>%
dplyr::mutate(processos = inner_breaks) %>%
tidyr::unnest(processos)
d
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.