data-raw/metadata.R

library(tidyverse)

# Build dictionary to read the EPF variables from the public use files
build_epf_dict <- function() {
  epf_dict <-
    read_delim('data-raw/dicts.csv',
               delim = ';',
               col_types = 'ccicii')

  # Find which columns are numeric
  matches <- str_match(epf_dict$fmt, '^([0-9]+)\\.([0-9]+)?')

  # Augment epf_dict table with columns to transform numeric variables
  epf_dict <- epf_dict %>% mutate(
    width = end - start + 1,
    decimals = parse_integer(matches[, 3]),
    fmt = if_else(is.na(matches[, 1]), 'c',
                  if_else(is.na(decimals), 'i', 'd'))
  )

  file_dict <-
    read_delim('data-raw/file_dict.csv',
               delim = ';',
               col_types = 'cii')

  left_join(file_dict, epf_dict, by = c('file', 'dict' = 'year')) %>%
    select(-dict)
}

epf_dict <- build_epf_dict()

epf_files <- read_delim('data-raw/gdrive.csv',
                        delim = ';',
                        col_types = 'cic') %>%
  mutate(base_name = str_c(file, year, sep = '_'),
         url = str_c('https://drive.google.com/open?id=', google_id)) %>%
  select(-google_id)

# save metadata in sysdata.rda
use_data(epf_dict, epf_files,
         internal = TRUE, overwrite = TRUE)
jcpernias/epf documentation built on Dec. 23, 2019, 6:31 p.m.