Nothing
#' Queries the appeals made through the Brazilian Right to Information Law
#'
#' Downloads data for the selected years, apply a filter and return it in the form of a dataframe.
#'
#' @importFrom utils download.file unzip
#'
#' @param year selects the years which data will be downloaded. integer.
#' @param agency selects the public agency to be searched. see the available agencies in agencies_initials. character.
#' @param search selects the keyword to be searched. character.
#' @param answer if true, fetches the content of the search argument in the appeals responses. boolean.
#'
#'
#' @return a dataframe with appeals containing the keyword
#' @examples
#' \dontrun{appeals(search = 'PAC')}
#' @export
appeals <- function(year = 'all', agency = 'all', search = 'all', answer = F) {
if (answer == F) col_filter <- '.desc_recurso'
if (answer == T) col_filter <- '.resposta_recurso'
year.options <- c(2015:format(Sys.Date(), "%Y"))
links <- paste0('https://dadosabertos-download.cgu.gov.br/FalaBR/Arquivos_FalaBR_Filtrado/Arquivos_csv_', year.options, '.zip')
protocolo <- palavras <- id_recurso <- orgao_destinatario <- NULL
`%!in%` = Negate(`%in%`) # creates operator
if(sum(stringr::str_count(search, '\\w+')) > 1){
search <- unlist(strsplit(search, split = " "))
search <- search[search %!in% stopwords::stopwords('portuguese')] # remove the stopwords
}
search <- tolower(search)
tabela <- data.frame(matrix(NA, nrow = 0, ncol = 21)) # Create empty data frame
nomes.colunas <- c('id_recurso','id_recurso_precedente','desc_recurso','id_pedido',
'id_solicitante','protocolo_pedido','orgao_destinatario','instancia',
'situacao','data_registro','prazo_atendimento','origem_solicitacao',
'tipo_recurso','data_resposta','resposta_recurso','tipo_resposta')
colnames(tabela) <- nomes.colunas
dir.temp <- tempdir()
# if the user does not enter the year, data for all years will be downloaded
if ('all' %in% year) {
year <- year.options
}
# allows to include more than one year at a time with a vector
for(i in year) {
year <- paste0('link', i)
x <- paste0('link', c(2015:format(Sys.Date(), "%Y")))
x <- match(year, x) # returns the position of the matching string
# check if the files of the years have already been downloaded
lista.arquivos.locais <- list.files(path = dir.temp, pattern = "\\.csv", full.names = TRUE)
# download_lai
#
# Download data from the CGU for the selected years.
download_lai <- function() {
download.file(links[x], paste(dir.temp, stringr::str_sub(links[x],start = -21), sep = '/')) # fazer com que o nome do arquivo seja dinâmico
}
# checks if the file has been previously downloaded
if(any(grepl(paste0('Recursos_csv_', i), lista.arquivos.locais)) == T) {
message(paste0('Data for the year ', i,' found locally.'))
} else{
if (RCurl::url.exists(links[x]) == F) { # network is down = message (not an error anymore)
message("No internet connection or data source broken.")
return(NULL)
} else { # network is up = proceed to download
message(paste0("Downloading ", i," dataset."))
download_lai()
} # /if - network up or down
# list zip files from the year
lista.arquivos <- list.files(path = dir.temp, pattern = paste0("Arquivos_csv_", i, ".zip"), full.names = TRUE)
arquivo.pedido <- unzip(zipfile = lista.arquivos, list = T)
arquivo.pedido <- grep("Recursos", arquivo.pedido$Name, value = TRUE)
# extract only requests files the downloaded files
unzip(zipfile = lista.arquivos, exdir = dir.temp, files = arquivo.pedido)
}
# read the files
lista.arquivos.locais <- list.files(path = dir.temp, pattern = "*.csv", full.names = TRUE)
caminho.arquivo <- stringr::str_subset(lista.arquivos.locais, paste0("_Recursos_csv_",i))
var <- readr::read_csv2(file = caminho.arquivo, col_names = FALSE, quote = '\'', show_col_types = FALSE, locale = readr::locale(encoding="UTF-16LE"))
if(ncol(var) == 1) { # error message
message(paste0("Inconsistent data for the year ", i, "."))
} else{
colnames(var) <- nomes.colunas
tabela <- rbind(tabela, var)
}
rm(list = 'var') # remove variável para liberar RAM
}
if ('all' %in% agency) {
} else {
tabela <- tabela %>%
dplyr::filter(stringr::str_detect(orgao_destinatario, agency))
}
if ('all' %in% search) {
tabela.final <- tabela
} else {
# Optimize search to reduce RAM consumption
tabela.final <- data.frame(matrix(NA, nrow = 0, ncol = 21)) # Create empty data frame
colnames(tabela.final) <- nomes.colunas
n <- 5000
nr <- nrow(tabela)
lista.tabelas <- split(tabela, rep(1:ceiling(nr/n), each = n, length.out = nr))
rm(list = 'tabela')
for(i in 1:length(lista.tabelas)){
# creates a partial table
tabela.parcial <- as.data.frame(lista.tabelas[i]) %>%
tidytext::unnest_tokens('palavras', paste0('X', i, col_filter), drop = F) %>%
dplyr::filter(palavras %in% search) %>%
unique()
colnames(tabela.parcial) <- c('id_recurso','id_recurso_precedente','desc_recurso','id_pedido',
'id_solicitante','protocolo_pedido','orgao_destinatario','instancia',
'situacao','data_registro','prazo_atendimento','origem_solicitacao',
'tipo_recurso','data_resposta','resposta_recurso','tipo_resposta',
'palavras')
tabela.final <- rbind(tabela.final, tabela.parcial)
}
if(sum(stringr::str_count(search, '\\w+')) > 1){
count <- tabela.final %>%
dplyr::group_by(id_recurso) %>%
dplyr::count()
tabela.final <- tabela.final %>%
dplyr::left_join(count) %>%
dplyr::filter(n >= sum(stringr::str_count(search, '\\w+')))
}
tabela.final <- tabela.final %>% dplyr::select(1:16) %>% unique()
}
message('Ended query.')
return(tabela.final)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.