R/fetch_FF.R

Defines functions fetch_FF_3F__

# If dplyr::select is in a package, using .data also prevents R CMD check from
# giving a NOTE about undefined global variables (provided that @importFrom
# rlang .data is inserted). Using rlang::.data (without @importFrom rlang .data)
# is another option. See Wickham, Hadley, R Packages, O'Reilly, 1st Edition,
# 2015, p. 89 for details

#' @importFrom rlang .data
#' @importFrom magrittr "%>%"

fetch_FF_3F__ <- function(hdl_str, dest_dir, save_to_disk = T){

  # Function handle two models/entries: FF_3F_US and FF_3F_DEV

  hdl_offset <- switch(hdl_str, "FF_3F_US_M" = 2, "FF_3F_DEV_M" = 4)
  if( is.null(hdl_offset) ) {
    error_msg <- stringr::str_glue("hdl_offset can't be set. ",
                                   "Switch call possibly faulty.")
    stop(error_msg, call. = T)
  }


  # Select catalog entry
  c_entry <- dplyr::filter(catalog, .data$hdl == hdl_str)

  # ----------------------------------------------------------------------------
  # Set up dir/file names
  #
  # < path_df_ >, < path_d_ > and < path_f_ > are reserved prefix,
  # where 'd' stand for directory and 'f' stands for file.
  #
  path_df_archive <- stringr::str_glue(dest_dir, '/Archives/',
                                       fs::path_file(c_entry$url_src))
  path_df_audit <- stringr::str_glue(dest_dir, '/Audit/', hdl_str, '.pdf')
  path_df_series_csv <- stringr::str_glue(dest_dir, '/Uncompressed/',
                                          hdl_str, '.csv')
  path_d_uncompressed <- stringr::str_glue(dest_dir, '/Uncompressed')
  path_f_uncompressed <- stringr::str_replace(string =
                                                fs::path_file(c_entry$url_src),
                                              pattern = '_TXT.zip',
                                              replacement = '.txt')
  path_df_uncompressed <- stringr::str_glue(dest_dir, '/Uncompressed/',
                                            path_f_uncompressed)
  # ----------------------------------------------------------------------------

  # Send request
  response <- httr::GET(c_entry$url_src,
                        httr::write_disk(path_df_archive, overwrite = T))

  # Validate response
  if(httr::status_code(response) != 200) {
    print( httr::http_status(response) )
    print( httr::headers(response) )
  }
  httr::stop_for_status(x = response,
                        task = stringr::str_glue('Fail to Download ',
                                                 c_entry$url_src))

  # Uncompress file in archive (e.g. '.zip') format
  zip::unzip(zipfile = path_df_archive, files = path_f_uncompressed,
             exdir = path_d_uncompressed)
  # Set permission to 'r-' for non-owners
  fs::file_chmod(path_df_uncompressed, mode = '644')

  # Process uncompressed file content
  stream_lines <- readr::read_lines(path_df_uncompressed,
                                    skip = 0,
                                    skip_empty_rows = F)

  # Process NA's
  purrr::map(.x = seq_along(stream_lines), .f = function(l){
    stream_lines[l] <<- stringr::str_replace_all(string = stream_lines[l],
                                                 pattern = '-999',
                                                 replacement = 'NA')
  })
  purrr::map(.x = seq_along(stream_lines), .f = function(l){

    stream_lines[l] <<- stringr::str_replace_all(string = stream_lines[l],
                                                 pattern = '-99.99',
                                                 replacement = 'NA')
  })

  hdl_m <- stringr::str_which(stream_lines, pattern = 'Mkt-RF')[1]
  hdl_a <- stringr::str_which(stream_lines, pattern = 'Annual Factors:')

  tbl <- readr::read_table(file = stream_lines,
                           col_types = readr::cols(readr::col_character(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double()),
                           skip = hdl_m,
                           skip_empty_rows = F, col_names = F,
                           n_max = ((hdl_a - hdl_offset) - (hdl_m + 1) + 1))

  tbl <- magrittr::set_colnames(x = tbl, c('date', 'mkt', 'smb', 'hml', 'rf'))
  tbl <- dplyr::mutate_if(.tbl = tbl,
                          .predicate = is.double,
                          .funs = function(.) ./100) %>%
    dplyr::mutate(year = as.integer(stringr::str_sub(string = date,
                                                     start = 1, end = 4)),
                  month = as.integer(stringr::str_sub(string = date,
                                                      start = 5, end = 6))) %>%
    dplyr::select(.data$year, .data$month,
                  .data$mkt, .data$smb, .data$hml, .data$rf)

  if( save_to_disk ) {
    readr::write_csv(x = tbl, path = path_df_series_csv)
  }

  # ----------------------------------------------------------------------------
  # Auditing
  caption_str <- as.character(stringr::str_glue(c_entry$desc, ': Paths'))
  tbl_path <- tibble::tibble(
    format = c('url_src', 'archive', 'uncomp\'d', 'csv'),
    path_dir = c(fs::path_dir(c_entry$url_src),
                 fs::path_dir(path_df_archive),
                 fs::path_dir(path_df_uncompressed),
                 fs::path_dir(path_df_series_csv))
    ) %>% xtable::xtable(caption = caption_str)

  caption_str <- as.character(stringr::str_glue(c_entry$desc, ': Attributes'))
  tbl_attr <- tibble::tibble(
    file = c( fs::path_file(path_df_archive),
              fs::path_file(path_df_uncompressed),
              fs::path_file(path_df_series_csv)),
    user = c( fs::file_info(path_df_archive)$user,
              fs::file_info(path_df_uncompressed)$user,
              fs::file_info(path_df_series_csv)$user),
    device_id =
      c( as.character( fs::file_info(path_df_archive)$device_id ),
         as.character( fs::file_info(path_df_uncompressed)$device_id ),
         as.character( fs::file_info(path_df_series_csv)$device_id )),
    permissions =
      c( as.character( fs::file_info(path_df_archive)$permissions ),
         as.character( fs::file_info(path_df_uncompressed)$permissions ),
         as.character( fs::file_info(path_df_series_csv)$permissions )),
    size = c( fs::file_info(path_df_archive)$size,
              fs::file_info(path_df_uncompressed)$size,
              fs::file_info(path_df_series_csv)$size),
    birth =
      c( as.character( fs::file_info(path_df_archive)$birth_time ),
         as.character( fs::file_info(path_df_uncompressed)$birth_time ),
         as.character( fs::file_info(path_df_series_csv)$birth_time) )
  ) %>% xtable::xtable(caption = caption_str)

  caption_str <- as.character(stringr::str_glue(c_entry$desc, ': Time Stamps'))
  tbl_times <- tibble::tibble(
    file = c( fs::path_file(path_df_archive),
              fs::path_file(path_df_uncompressed),
              fs::path_file(path_df_series_csv)),
    modification =
      c( as.character( fs::file_info(path_df_archive)$modification_time ),
         as.character( fs::file_info(path_df_uncompressed)$modification_time ),
         as.character( fs::file_info(path_df_series_csv)$modification_time )),
    access =
      c( as.character( fs::file_info(path_df_archive)$access_time ),
         as.character( fs::file_info(path_df_uncompressed)$access_time ),
         as.character( fs::file_info(path_df_series_csv)$access_time) ),
    change =
      c( as.character( fs::file_info(path_df_archive)$change_time ),
         as.character( fs::file_info(path_df_uncompressed)$change_time ),
         as.character( fs::file_info(path_df_series_csv)$change_time) )
  ) %>% xtable::xtable(caption = caption_str)

  str_catalog <- stringr::str_glue('catalog entry: ', hdl_str)
  str_scheme <- stringr::str_glue('Scheme: Monthly')
  str_NA <- stringr::str_glue('Has NA: ', any( apply(X = tbl, 2, anyNA) ) )
  str_etag <- stringr::str_glue('Etag: ', response$headers$etag)
  str_doc <- stringr::str_glue('Documentation: ', c_entry$doc_src)

  arsenal::write2pdf(object = list(tbl_path,
                                   tbl_attr,
                                   tbl_times,
                                   str_catalog,
                                   str_scheme,
                                   str_NA,
                                   str_etag,
                                   str_doc),
                     file = path_df_audit, quiet = T)
  # ----------------------------------------------------------------------------

  attr(tbl, which = 'catalog_hdl') <- hdl_str
  attr(tbl, which = 'series_freq') <- 'Monthly'
  attr(tbl, which = 'units') <- 'decimals'
  return(tbl)
}


fetch_FF_OP__ <- function(hdl_str, dest_dir, save_to_disk = T){

  # Function handle two models/entries: FF_OP_US and FF_OP_exDiv_US
  # Scheme: Equal Weight Returns -- Monthly

  # Select catalog entry
  c_entry <- dplyr::filter(catalog, .data$hdl == hdl_str)

  # ----------------------------------------------------------------------------
  # Set up dir/file names
  #
  # < path_df_ >, < path_d_ > and < path_f_ > are reserved prefix,
  # where 'd' stand for directory and 'f' stands for file.
  #
  path_df_archive <- stringr::str_glue(dest_dir, '/Archives/',
                                       fs::path_file(c_entry$url_src))
  path_df_audit <- stringr::str_glue(dest_dir, '/Audit/', hdl_str, '.pdf')
  path_df_series_csv <- stringr::str_glue(dest_dir, '/Uncompressed/',
                                          hdl_str, '.csv')
  path_d_uncompressed <- stringr::str_glue(dest_dir, '/Uncompressed')
  path_f_uncompressed <- stringr::str_replace(string =
                                                fs::path_file(c_entry$url_src),
                                              pattern = '_TXT.zip',
                                              replacement = '.txt')
  path_df_uncompressed <- stringr::str_glue(dest_dir, '/Uncompressed/',
                                            path_f_uncompressed)
  # ----------------------------------------------------------------------------

  # Send request
  response <- httr::GET(c_entry$url_src,
                        httr::write_disk(path_df_archive, overwrite = T))

  # Validate response
  if(httr::status_code(response) != 200) {
    print( httr::http_status(response) )
    print( httr::headers(response) )
  }
  httr::stop_for_status(x = response,
                        task = stringr::str_glue('Fail to Download ',
                                                 c_entry$url_src))

  # Uncompress file in archive (e.g. '.zip') format
  zip::unzip(zipfile = path_df_archive, files = path_f_uncompressed,
             exdir = path_d_uncompressed)
  # Set permission to 'r-' for non-owners
  fs::file_chmod(path_df_uncompressed, mode = '644')

  # Process uncompressed file content
  stream_lines <- readr::read_lines(path_df_uncompressed,
                                    skip = 0,
                                    skip_empty_rows = F)

  # Process NA's
  purrr::map(.x = seq_along(stream_lines), .f = function(l){
    stream_lines[l] <<- stringr::str_replace_all(string = stream_lines[l],
                                                 pattern = '-999',
                                                 replacement = 'NA')
  })
  purrr::map(.x = seq_along(stream_lines), .f = function(l){

    stream_lines[l] <<- stringr::str_replace_all(string = stream_lines[l],
                                                 pattern = '-99.99',
                                                 replacement = 'NA')
  })

  w_value_m <- stringr::str_which(stream_lines,
                                  pattern = 'Value Weight Returns -- Monthly')
  w_equal_m <- stringr::str_which(stream_lines,
                                  pattern = 'Equal Weight Returns -- Monthly')
  w_value_a <- stringr::str_which(stream_lines,
                                  pattern = 'Value Weight Returns -- Annual')
  w_equal_a <- stringr::str_which(stream_lines,
                                  pattern = 'Equal Weight Returns -- Annual')

    names_str <- readr::read_table(stream_lines,
                                 skip = w_value_m,
                                 skip_empty_rows = F, col_names = F,
                                 n_max = 1)
  names_str <- make.names(names_str)[-1]

  tbl <- readr::read_table(file = stream_lines,
                           col_types = readr::cols(readr::col_character(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double(),
                                                   readr::col_double()),
                           skip = w_value_m + 1,
                           skip_empty_rows = F, col_names = F,
                           n_max = ((w_equal_m - 3) - (w_value_m + 2) + 1))

  tbl <- magrittr::set_colnames(x = tbl, c('date', names_str))
  tbl <- dplyr::mutate_if(.tbl = tbl,
                          .predicate = is.double,
                          .funs = function(.) ./100) %>%
    dplyr::mutate(year = as.integer(stringr::str_sub(string = date,
                                                     start = 1, end = 4)),
                  month = as.integer(stringr::str_sub(string = date,
                                                      start = 5, end = 6))) %>%
    dplyr::select(-.data$date) %>%
    dplyr::relocate(c(.data$year, .data$month),
                    .before = !c(.data$year, .data$month))

  if( save_to_disk ) {
    readr::write_csv(x = tbl, path = path_df_series_csv)
  }

  # ----------------------------------------------------------------------------
  # Auditing
  caption_str <- as.character(stringr::str_glue(c_entry$desc, ': Paths'))
  tbl_path <- tibble::tibble(
    format = c('url_src', 'archive', 'uncomp\'d', 'csv'),
    path_dir = c(fs::path_dir(c_entry$url_src),
                 fs::path_dir(path_df_archive),
                 fs::path_dir(path_df_uncompressed),
                 fs::path_dir(path_df_series_csv))
  ) %>% xtable::xtable(caption = caption_str)

  caption_str <- as.character(stringr::str_glue(c_entry$desc, ': Attributes'))
  tbl_attr <- tibble::tibble(
    file = c( fs::path_file(path_df_archive),
              fs::path_file(path_df_uncompressed),
              fs::path_file(path_df_series_csv)),
    user = c( fs::file_info(path_df_archive)$user,
              fs::file_info(path_df_uncompressed)$user,
              fs::file_info(path_df_series_csv)$user),
    device_id =
      c( as.character( fs::file_info(path_df_archive)$device_id ),
         as.character( fs::file_info(path_df_uncompressed)$device_id ),
         as.character( fs::file_info(path_df_series_csv)$device_id )),
    permissions =
      c( as.character( fs::file_info(path_df_archive)$permissions ),
         as.character( fs::file_info(path_df_uncompressed)$permissions ),
         as.character( fs::file_info(path_df_series_csv)$permissions )),
    size = c( fs::file_info(path_df_archive)$size,
              fs::file_info(path_df_uncompressed)$size,
              fs::file_info(path_df_series_csv)$size),
    birth =
      c( as.character( fs::file_info(path_df_archive)$birth_time ),
         as.character( fs::file_info(path_df_uncompressed)$birth_time ),
         as.character( fs::file_info(path_df_series_csv)$birth_time) )
  ) %>% xtable::xtable(caption = caption_str)

  caption_str <- as.character(stringr::str_glue(c_entry$desc, ': Time Stamps'))
  tbl_times <- tibble::tibble(
    file = c( fs::path_file(path_df_archive),
              fs::path_file(path_df_uncompressed),
              fs::path_file(path_df_series_csv)),
    modification =
      c( as.character( fs::file_info(path_df_archive)$modification_time ),
         as.character( fs::file_info(path_df_uncompressed)$modification_time ),
         as.character( fs::file_info(path_df_series_csv)$modification_time )),
    access =
      c( as.character( fs::file_info(path_df_archive)$access_time ),
         as.character( fs::file_info(path_df_uncompressed)$access_time ),
         as.character( fs::file_info(path_df_series_csv)$access_time) ),
    change =
      c( as.character( fs::file_info(path_df_archive)$change_time ),
         as.character( fs::file_info(path_df_uncompressed)$change_time ),
         as.character( fs::file_info(path_df_series_csv)$change_time) )
  ) %>% xtable::xtable(caption = caption_str)

  str_catalog <- stringr::str_glue('catalog entry: ', hdl_str)
  str_scheme <- stringr::str_glue('Scheme: Value Weight Returns -- Monthly')
  str_NA <- stringr::str_glue('Has NA: ', any( apply(X = tbl, 2, anyNA) ) )
  str_etag <- stringr::str_glue('Etag: ', response$headers$etag)
  str_doc <- stringr::str_glue('Documentation: ', c_entry$doc_src)

  arsenal::write2pdf(object = list(tbl_path,
                                   tbl_attr,
                                   tbl_times,
                                   str_catalog,
                                   str_scheme,
                                   str_NA,
                                   str_etag,
                                   str_doc),
                     file = path_df_audit, quiet = T)
  # ----------------------------------------------------------------------------

  attr(tbl, which = 'catalog_hdl') <- hdl_str
  attr(tbl, which = 'series_freq') <- 'Monthly'
  attr(tbl, which = 'units') <- 'decimals'
  return(tbl)
}
fognyc/factorr documentation built on Nov. 16, 2020, 8:48 p.m.