R/tseGetFinalAccountability.R

#' \code{tseGetFinalAccountability} Gets brazilian accountability data from the brazilian
#' Supreme Electoral Court (TSE - Tribunal Superior Eleitoral) which is freely available
#' from it's [site]{http://www.tse.jus.br/eleicoes}.
#'
#' @details The function can download the requested data, extract the csv files from the
#'  zipped downloaded file and appropriately read the csv files in accordance with it's
#'  layout which is described in a pdf file bundled in the same zipped file.
#'  Alternatively it can skip the first (downloading) step and second (zip file extraction)
#'  and head straight to the final steps (conversion of the csv files to an R
#'  \code{data.frame}).
#'
#' @param srcLocation Source location: Depending the value of the \code{srcLocationType} parameter,
#' it shall be a path to a \code{file}, \code{folder} or an \code{http} url.
#'
#' @param srcLocationType Source Location Type: can assume any of the following values: \code{file},
#' \code{folder} or \code{http}. The \code{srcLocation} should assume appropriate values.
#'
#' @param destLocation Detination location: should point to a local folder in which the data files will be
#' stored after download or extraction. This is an optional parameter. If left unassigned, the destination
#' will be assumed to be the same as \code{srcLocation}, if it points to a valid local folder, or a temporary
#' local folder otherwise.
#'
#' @param electionType Election Type: there are two types od
#'
#' @param year Election year (\code{integer}). For this function, the following years are available: 1994, 1996, 1998,
#' 2000, 2002, 2004, 2006, 2008, 2010, 2012, 2014 and 2016.
#'
#' @param encoding Data original encoding (defaults to 'windows-1252'). This can be changed to avoid errors
#' when \code{ascii = TRUE}.
#'
#' @return \code{voter_profile()} returns a \code{data.frame} with the following variables:
#'
#' \itemize{
#'   \item A
#'   \item B
#'   \item C
#'   \item D
#'   \item E
#'   \item F
#'   \item G
#'   \item H
#'   \item i
#' }
#'
#' @import utils
#' @import dplyr
#' @import tm
#' @importFrom magrittr "%>%"
#' @export
#' @examples
#' \dontrun{
#' dfBaseAP <- tseGetFinalAccountability(
#'   srcLocationType = 'folder',
#'   srcLocation = "G:/Contas_TSE",
#'   accountabilityType = "Despesas_Partidos",
#'   federativeBody = "AP", year = 2016)
#' }

tseGetFinalAccountability <- function(srcLocation = NULL,
                                      srcLocationType = "file",
                                      destLocation = NULL,
                                      electionType = "",
                                      accountabilityType = "Party_Incomes",
                                      federativeBody = "BR",
                                      sinceYear = NA,
                                      year = NA,
                                      extractZip = F,
                                      encoding = "windows-1252",
                                      dataBaseEnvir){

  # federative body
  body <- federativeBody

  # data file pattern
  filePattern <- ""
  switch(EXPR = accountabilityType,
         Party_Incomes = {
           layoutType <-"receitas_de_direcoes_partidarias"
           filePattern <- "receitas_partidos_prestacao_contas_final_%s_%s.txt"},
         Receitas_Partidos = {
           layoutType <-"receitas_de_direcoes_partidarias"
           filePattern <- "receitas_partidos_prestacao_contas_final_%s_%s.txt"},
         Candidate_Incomes = {
           layoutType <-"receitas_de_candidatos"
           filePattern <- "receitas_candidatos_prestacao_contas_final_%s_%s.txt"},
         Receitas_Candidatos = {
           layoutType <-"receitas_de_candidatos"
           filePattern <- "receitas_candidatos_prestacao_contas_final_%s_%s.txt"},
         Party_Expenses = {
           layoutType <-"despesas_de_direcoes_partidarias"
           filePattern <- "despesas_partidos_prestacao_contas_final_%s_%s.txt"},
         Despesas_Partidos = {
           layoutType <-"despesas_de_direcoes_partidarias"
           filePattern <- "despesas_partidos_prestacao_contas_final_%s_%s.txt"},
         Candidate_Expenses = {
           layoutType <-"despesas_de_candidatos"
           filePattern <- "despesas_candidatos_prestacao_contas_final_%s_%s.txt"},
         Despesas_Candidadtos = {
           layoutType <-"despesas_de_candidatos"
           filePattern <- "despesas_candidatos_prestacao_contas_final_%s_%s.txt"},
         stop("\n[English]: Invalid accountability type.\n[Portuguese]: Tipo de prestacao de contas invalido.")
         )

  year <- utilValidateElectionYears(year, sinceYear)

  if (srcLocationType == "http"){
    dataFolder <- destLocation
    for (y in year){
      dataFolder <- tseDownloadFinalAccountability(destFolder = destLocation,
                                                   dataUrl = srcLocation,
                                                   electionType = electionType,
                                                   year = year)
    }

  }

  if (srcLocationType == 'file'){

    # consider the possibility of srcLocation being a list of files
    for (src in srcLocation){

      if (is.null(stringr::str_locate(src, pattern = "\\"))){
        slashType <- "/"
      }else{
        slashType <- "\\"
      }

      fileDelimiter <- unique(unlist(stringr::str_locate_all(string = srcLocation, pattern = "[/|\\\\]")))


      if (!is.null(fileDelimiter)){
        fileDelimiter <- max(fileDelimiter)

        srcFolder <- stringr::str_sub(srcLocation, 1, fileDelimiter-1)
        srcFile <- stringr::str_sub(srcLocation, fileDelimiter+1, nchar(srcLocation))
      }else{
        srcFolder <- getwd()
        srcFile <- srcLocation
      }

      if (is.null(destLocation) | (!dir.exists(destLocation))){
        destLocation <- srcFolder
      }
    }

    unzip(paste(srcFolder, srcFile, sep = slashType), exdir = destLocation)

    dataFolder <- destLocation
  }

  if (srcLocationType == 'folder'){

    # check if location points to a local folder
    if (!dir.exists(srcLocation)) {
      # if not, stop and display error message
      stop(paste0(
        paste("\n[English]: Invalid folder:", srcLocation),
        paste("\n[Portuguese]: Pasta invalida.", srcLocation)))
    } else{

      dataFolder <- srcLocation
    }
  }

  layouts <- utilGetLayout()

  for (y in year){
    dfData <- y %>%
      lapply(utilFinalAccountabilityCsvToDataFrame,
             cdatafolder = datafolder,
             filePattern = filePattern,
             federativeBody = body,
             layoutType = layoutType,
             csvLayouts = layouts) %>%
      dplyr::bind_rows()
  }


  # dfData <- utilFinalAccountabilityCsvToDataFrame(year = year,
  #                                                 datafolder = datafolder,
  #                                                 filePattern = filePattern,
  #                                                 federativeBody = body,
  #                                                 layoutType = layoutType,
  #                                                 csvLayouts = layouts)

  # needs to return an environment with the required data plus the data folder

  return(dfData)

}
brunomssmelo/TseWrangler documentation built on May 13, 2019, 8:07 a.m.