R/get_epf_data.R

Defines functions convert_vars process_epf_file get_epf_data

Documented in get_epf_data

#' Convert character variables to numeric
#'
#' @noRd
convert_vars <- function(varname, fmt, width, decimals, data) {
  field <- data[[varname]]
  if (fmt == "i") {
    field <- readr::parse_integer(field)
  } else if (fmt == "d") {
    before <- stringr::str_sub(field, 1, width - decimals)
    after <- stringr::str_sub(field, width - decimals + 1, width)
    field <-
      readr::parse_double(stringr::str_c(before, after, sep = '.'))
  }
  rlang::set_names(list(field), varname)
}

#' Process database files
#'
#' @noRd
process_epf_file <- function(file, year) {
  base_filename <- stringr::str_c(file, year, sep = '_')

  # get dictionary
  db_dict <- dplyr::filter(epf_dict, file == !!file, year == !!year)

  db_pos <- readr::fwf_positions(db_dict[["start"]],
                                 db_dict[["end"]],
                                 db_dict[["varname"]])

  raw_path <-
    file.path("data-raw", stringr:str_c(base_filename, ".gz"))
  message(stringr::str_c(base_filename, ": Reading raw data..."))
  db <-
    readr::read_fwf(raw_path,
                    db_pos,
                    col_types = readr::cols(.default = readr::col_character()))

  message(stringr::str_c(base_filename, ": Transforming variables..."))
  db <- db_dict %>%
    dplyr::select(varname, fmt, width, decimals) %>%
    purrr::pmap_dfc(convert_vars, data = db)

  db_path <-
    file.path("data", stringr::str_c(base_filename, ".rda"))
  message(stringr::str_c(base_filename, ": Saving database..."))
  assign(base_filename, db)
  save(list = base_filename,
       file = db_path,
       compress = "xz")
  message(stringr::str_c(base_filename, ": Done.\n"))
}



#' Get EPF data files
#'
#' Get the public use EPF files and process them. The \code{years} allow to
#' specify which years should be processed. If this param is not given, all
#' available years, 2006 to 2018, are processed. If \code{path} is \code{NULL},
#' the data is downloaded from the website of the Spanish Statistical Office,
#' (INE, \url{https://www.ine.es}). Alternatively, the user can provide the
#' \code{path} to a local copy of the data files.
#'
#' @param years a numeric vector or \code{NULL}.
#' @param path path to local copies of data files
#'
#' @return \code{NULL}
#' @export
get_epf_data <- function(years = NULL, path = NULL) {

  # Root dir of epf package
  system.file(package = "epf")

  # pwalk(databases, process_db)

  return(NULL)
}
jcpernias/epf documentation built on Dec. 23, 2019, 6:31 p.m.