R/pnad_get.R

Defines functions pnad_get

Documented in pnad_get

#' Download PNAD files and read them.
#'
#' @param year A year must be provided.
#' @param design TRUE if you want a survey object, FALSE if you want a dataframe.
#' @param vars Selected variables
#' @param hh_only If TRUE, will skip person data, using just household data.
#'
#' @return A survey object or a tibble.
#' @export
#'
#' @examples
#' \dontrun{
#' # Download PNAD 2015
#' pnad2015 <- pnad_get(year = 2015)
#'
#' # Select just the variable V8005 for PNAD 2008
#' pnad2008 <- pnad_get(year = 2008, vars = c("V8005"))
#' }
#'
#' @importFrom utils download.file unzip
pnad_get <- function(year, design = TRUE, vars = NULL, hh_only = FALSE) {

  # A year must be provided
  stopifnot("You must select an year" = !missing(year))

  # List required files
  download_links <- pnad_list(files = TRUE, year = year)

  for (download_link in download_links) {
    temp_file <- file.path(tempdir(), basename(download_link))
    temp_dir <- file.path(tempdir(), "unzip")

    utils::download.file(url = download_link, destfile = temp_file)

    # Extract files
    zip::unzip(temp_file, junkpaths = TRUE, exdir = temp_dir)

  }

  # Remove files that won't be necessary
  file.remove(grep("((input |sas_)?(pes|dom)(soa|icilio)?\\d{2}(\\d{2})?\\.txt|dicio.*\\.xls)",
                   list.files(path = temp_dir, full.names = TRUE),
                   ignore.case = TRUE,
                   value = TRUE,
                   invert = TRUE))

  hh_file_df <- file.path(temp_dir,
                          grep("^dom(icilio)?\\d{2}(\\d{2})?",
                               list.files(temp_dir),
                               ignore.case = TRUE,
                               value = TRUE))

  hh_file_input <- file.path(temp_dir,
                             grep("^(input|sas).dom\\d{2}(\\d{2})?.txt",
                                  list.files(temp_dir),
                                  ignore.case = TRUE,
                                  value = TRUE))

  # Check if person data will be skiped
  if (hh_only == FALSE) {
    prs_file_df <- file.path(temp_dir,
                             grep("^pes(soa)?\\d{2}(\\d{2})?",
                                  list.files(temp_dir),
                                  ignore.case = TRUE,
                                  value = TRUE))

    prs_file_input <- file.path(temp_dir,
                                grep("^(input|sas).pes(soa)?\\d{2}(\\d{2})?.txt",
                                     list.files(temp_dir),
                                     ignore.case = TRUE,
                                     value = TRUE))

    pnad <- pnad_read(hh_data = hh_file_df, hh_input = hh_file_input,
                      prs_data = prs_file_df, prs_input = prs_file_input,
                      vars = vars)
  } else {
    pnad <- pnad_read(hh_data = hh_file_df, hh_file_input, vars = vars)
  }

  # Remove used files
  file.remove(list.files(temp_dir, full.names = TRUE))

  invisible(gc())

  # Check if must return a survey object
  if (design == TRUE) {
    pnad <- pnad_design(pnad)
  }

  return(pnad)
}
hhmacedo/dnads documentation built on March 10, 2023, 11:35 p.m.