# If dplyr::select is in a package, using .data also prevents R CMD check from
# giving a NOTE about undefined global variables (provided that @importFrom
# rlang .data is inserted). Using rlang::.data (without @importFrom rlang .data)
# is another option. See Wickham, Hadley, R Packages, O'Reilly, 1st Edition,
# 2015, p. 89 for details
#' @importFrom rlang .data
#' @importFrom magrittr "%>%"
fetch_FF_3F__ <- function(hdl_str, dest_dir, save_to_disk = T){
# Function handle two models/entries: FF_3F_US and FF_3F_DEV
hdl_offset <- switch(hdl_str, "FF_3F_US_M" = 2, "FF_3F_DEV_M" = 4)
if( is.null(hdl_offset) ) {
error_msg <- stringr::str_glue("hdl_offset can't be set. ",
"Switch call possibly faulty.")
stop(error_msg, call. = T)
}
# Select catalog entry
c_entry <- dplyr::filter(catalog, .data$hdl == hdl_str)
# ----------------------------------------------------------------------------
# Set up dir/file names
#
# < path_df_ >, < path_d_ > and < path_f_ > are reserved prefix,
# where 'd' stand for directory and 'f' stands for file.
#
path_df_archive <- stringr::str_glue(dest_dir, '/Archives/',
fs::path_file(c_entry$url_src))
path_df_audit <- stringr::str_glue(dest_dir, '/Audit/', hdl_str, '.pdf')
path_df_series_csv <- stringr::str_glue(dest_dir, '/Uncompressed/',
hdl_str, '.csv')
path_d_uncompressed <- stringr::str_glue(dest_dir, '/Uncompressed')
path_f_uncompressed <- stringr::str_replace(string =
fs::path_file(c_entry$url_src),
pattern = '_TXT.zip',
replacement = '.txt')
path_df_uncompressed <- stringr::str_glue(dest_dir, '/Uncompressed/',
path_f_uncompressed)
# ----------------------------------------------------------------------------
# Send request
response <- httr::GET(c_entry$url_src,
httr::write_disk(path_df_archive, overwrite = T))
# Validate response
if(httr::status_code(response) != 200) {
print( httr::http_status(response) )
print( httr::headers(response) )
}
httr::stop_for_status(x = response,
task = stringr::str_glue('Fail to Download ',
c_entry$url_src))
# Uncompress file in archive (e.g. '.zip') format
zip::unzip(zipfile = path_df_archive, files = path_f_uncompressed,
exdir = path_d_uncompressed)
# Set permission to 'r-' for non-owners
fs::file_chmod(path_df_uncompressed, mode = '644')
# Process uncompressed file content
stream_lines <- readr::read_lines(path_df_uncompressed,
skip = 0,
skip_empty_rows = F)
# Process NA's
purrr::map(.x = seq_along(stream_lines), .f = function(l){
stream_lines[l] <<- stringr::str_replace_all(string = stream_lines[l],
pattern = '-999',
replacement = 'NA')
})
purrr::map(.x = seq_along(stream_lines), .f = function(l){
stream_lines[l] <<- stringr::str_replace_all(string = stream_lines[l],
pattern = '-99.99',
replacement = 'NA')
})
hdl_m <- stringr::str_which(stream_lines, pattern = 'Mkt-RF')[1]
hdl_a <- stringr::str_which(stream_lines, pattern = 'Annual Factors:')
tbl <- readr::read_table(file = stream_lines,
col_types = readr::cols(readr::col_character(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double()),
skip = hdl_m,
skip_empty_rows = F, col_names = F,
n_max = ((hdl_a - hdl_offset) - (hdl_m + 1) + 1))
tbl <- magrittr::set_colnames(x = tbl, c('date', 'mkt', 'smb', 'hml', 'rf'))
tbl <- dplyr::mutate_if(.tbl = tbl,
.predicate = is.double,
.funs = function(.) ./100) %>%
dplyr::mutate(year = as.integer(stringr::str_sub(string = date,
start = 1, end = 4)),
month = as.integer(stringr::str_sub(string = date,
start = 5, end = 6))) %>%
dplyr::select(.data$year, .data$month,
.data$mkt, .data$smb, .data$hml, .data$rf)
if( save_to_disk ) {
readr::write_csv(x = tbl, path = path_df_series_csv)
}
# ----------------------------------------------------------------------------
# Auditing
caption_str <- as.character(stringr::str_glue(c_entry$desc, ': Paths'))
tbl_path <- tibble::tibble(
format = c('url_src', 'archive', 'uncomp\'d', 'csv'),
path_dir = c(fs::path_dir(c_entry$url_src),
fs::path_dir(path_df_archive),
fs::path_dir(path_df_uncompressed),
fs::path_dir(path_df_series_csv))
) %>% xtable::xtable(caption = caption_str)
caption_str <- as.character(stringr::str_glue(c_entry$desc, ': Attributes'))
tbl_attr <- tibble::tibble(
file = c( fs::path_file(path_df_archive),
fs::path_file(path_df_uncompressed),
fs::path_file(path_df_series_csv)),
user = c( fs::file_info(path_df_archive)$user,
fs::file_info(path_df_uncompressed)$user,
fs::file_info(path_df_series_csv)$user),
device_id =
c( as.character( fs::file_info(path_df_archive)$device_id ),
as.character( fs::file_info(path_df_uncompressed)$device_id ),
as.character( fs::file_info(path_df_series_csv)$device_id )),
permissions =
c( as.character( fs::file_info(path_df_archive)$permissions ),
as.character( fs::file_info(path_df_uncompressed)$permissions ),
as.character( fs::file_info(path_df_series_csv)$permissions )),
size = c( fs::file_info(path_df_archive)$size,
fs::file_info(path_df_uncompressed)$size,
fs::file_info(path_df_series_csv)$size),
birth =
c( as.character( fs::file_info(path_df_archive)$birth_time ),
as.character( fs::file_info(path_df_uncompressed)$birth_time ),
as.character( fs::file_info(path_df_series_csv)$birth_time) )
) %>% xtable::xtable(caption = caption_str)
caption_str <- as.character(stringr::str_glue(c_entry$desc, ': Time Stamps'))
tbl_times <- tibble::tibble(
file = c( fs::path_file(path_df_archive),
fs::path_file(path_df_uncompressed),
fs::path_file(path_df_series_csv)),
modification =
c( as.character( fs::file_info(path_df_archive)$modification_time ),
as.character( fs::file_info(path_df_uncompressed)$modification_time ),
as.character( fs::file_info(path_df_series_csv)$modification_time )),
access =
c( as.character( fs::file_info(path_df_archive)$access_time ),
as.character( fs::file_info(path_df_uncompressed)$access_time ),
as.character( fs::file_info(path_df_series_csv)$access_time) ),
change =
c( as.character( fs::file_info(path_df_archive)$change_time ),
as.character( fs::file_info(path_df_uncompressed)$change_time ),
as.character( fs::file_info(path_df_series_csv)$change_time) )
) %>% xtable::xtable(caption = caption_str)
str_catalog <- stringr::str_glue('catalog entry: ', hdl_str)
str_scheme <- stringr::str_glue('Scheme: Monthly')
str_NA <- stringr::str_glue('Has NA: ', any( apply(X = tbl, 2, anyNA) ) )
str_etag <- stringr::str_glue('Etag: ', response$headers$etag)
str_doc <- stringr::str_glue('Documentation: ', c_entry$doc_src)
arsenal::write2pdf(object = list(tbl_path,
tbl_attr,
tbl_times,
str_catalog,
str_scheme,
str_NA,
str_etag,
str_doc),
file = path_df_audit, quiet = T)
# ----------------------------------------------------------------------------
attr(tbl, which = 'catalog_hdl') <- hdl_str
attr(tbl, which = 'series_freq') <- 'Monthly'
attr(tbl, which = 'units') <- 'decimals'
return(tbl)
}
fetch_FF_OP__ <- function(hdl_str, dest_dir, save_to_disk = T){
# Function handle two models/entries: FF_OP_US and FF_OP_exDiv_US
# Scheme: Equal Weight Returns -- Monthly
# Select catalog entry
c_entry <- dplyr::filter(catalog, .data$hdl == hdl_str)
# ----------------------------------------------------------------------------
# Set up dir/file names
#
# < path_df_ >, < path_d_ > and < path_f_ > are reserved prefix,
# where 'd' stand for directory and 'f' stands for file.
#
path_df_archive <- stringr::str_glue(dest_dir, '/Archives/',
fs::path_file(c_entry$url_src))
path_df_audit <- stringr::str_glue(dest_dir, '/Audit/', hdl_str, '.pdf')
path_df_series_csv <- stringr::str_glue(dest_dir, '/Uncompressed/',
hdl_str, '.csv')
path_d_uncompressed <- stringr::str_glue(dest_dir, '/Uncompressed')
path_f_uncompressed <- stringr::str_replace(string =
fs::path_file(c_entry$url_src),
pattern = '_TXT.zip',
replacement = '.txt')
path_df_uncompressed <- stringr::str_glue(dest_dir, '/Uncompressed/',
path_f_uncompressed)
# ----------------------------------------------------------------------------
# Send request
response <- httr::GET(c_entry$url_src,
httr::write_disk(path_df_archive, overwrite = T))
# Validate response
if(httr::status_code(response) != 200) {
print( httr::http_status(response) )
print( httr::headers(response) )
}
httr::stop_for_status(x = response,
task = stringr::str_glue('Fail to Download ',
c_entry$url_src))
# Uncompress file in archive (e.g. '.zip') format
zip::unzip(zipfile = path_df_archive, files = path_f_uncompressed,
exdir = path_d_uncompressed)
# Set permission to 'r-' for non-owners
fs::file_chmod(path_df_uncompressed, mode = '644')
# Process uncompressed file content
stream_lines <- readr::read_lines(path_df_uncompressed,
skip = 0,
skip_empty_rows = F)
# Process NA's
purrr::map(.x = seq_along(stream_lines), .f = function(l){
stream_lines[l] <<- stringr::str_replace_all(string = stream_lines[l],
pattern = '-999',
replacement = 'NA')
})
purrr::map(.x = seq_along(stream_lines), .f = function(l){
stream_lines[l] <<- stringr::str_replace_all(string = stream_lines[l],
pattern = '-99.99',
replacement = 'NA')
})
w_value_m <- stringr::str_which(stream_lines,
pattern = 'Value Weight Returns -- Monthly')
w_equal_m <- stringr::str_which(stream_lines,
pattern = 'Equal Weight Returns -- Monthly')
w_value_a <- stringr::str_which(stream_lines,
pattern = 'Value Weight Returns -- Annual')
w_equal_a <- stringr::str_which(stream_lines,
pattern = 'Equal Weight Returns -- Annual')
names_str <- readr::read_table(stream_lines,
skip = w_value_m,
skip_empty_rows = F, col_names = F,
n_max = 1)
names_str <- make.names(names_str)[-1]
tbl <- readr::read_table(file = stream_lines,
col_types = readr::cols(readr::col_character(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double(),
readr::col_double()),
skip = w_value_m + 1,
skip_empty_rows = F, col_names = F,
n_max = ((w_equal_m - 3) - (w_value_m + 2) + 1))
tbl <- magrittr::set_colnames(x = tbl, c('date', names_str))
tbl <- dplyr::mutate_if(.tbl = tbl,
.predicate = is.double,
.funs = function(.) ./100) %>%
dplyr::mutate(year = as.integer(stringr::str_sub(string = date,
start = 1, end = 4)),
month = as.integer(stringr::str_sub(string = date,
start = 5, end = 6))) %>%
dplyr::select(-.data$date) %>%
dplyr::relocate(c(.data$year, .data$month),
.before = !c(.data$year, .data$month))
if( save_to_disk ) {
readr::write_csv(x = tbl, path = path_df_series_csv)
}
# ----------------------------------------------------------------------------
# Auditing
caption_str <- as.character(stringr::str_glue(c_entry$desc, ': Paths'))
tbl_path <- tibble::tibble(
format = c('url_src', 'archive', 'uncomp\'d', 'csv'),
path_dir = c(fs::path_dir(c_entry$url_src),
fs::path_dir(path_df_archive),
fs::path_dir(path_df_uncompressed),
fs::path_dir(path_df_series_csv))
) %>% xtable::xtable(caption = caption_str)
caption_str <- as.character(stringr::str_glue(c_entry$desc, ': Attributes'))
tbl_attr <- tibble::tibble(
file = c( fs::path_file(path_df_archive),
fs::path_file(path_df_uncompressed),
fs::path_file(path_df_series_csv)),
user = c( fs::file_info(path_df_archive)$user,
fs::file_info(path_df_uncompressed)$user,
fs::file_info(path_df_series_csv)$user),
device_id =
c( as.character( fs::file_info(path_df_archive)$device_id ),
as.character( fs::file_info(path_df_uncompressed)$device_id ),
as.character( fs::file_info(path_df_series_csv)$device_id )),
permissions =
c( as.character( fs::file_info(path_df_archive)$permissions ),
as.character( fs::file_info(path_df_uncompressed)$permissions ),
as.character( fs::file_info(path_df_series_csv)$permissions )),
size = c( fs::file_info(path_df_archive)$size,
fs::file_info(path_df_uncompressed)$size,
fs::file_info(path_df_series_csv)$size),
birth =
c( as.character( fs::file_info(path_df_archive)$birth_time ),
as.character( fs::file_info(path_df_uncompressed)$birth_time ),
as.character( fs::file_info(path_df_series_csv)$birth_time) )
) %>% xtable::xtable(caption = caption_str)
caption_str <- as.character(stringr::str_glue(c_entry$desc, ': Time Stamps'))
tbl_times <- tibble::tibble(
file = c( fs::path_file(path_df_archive),
fs::path_file(path_df_uncompressed),
fs::path_file(path_df_series_csv)),
modification =
c( as.character( fs::file_info(path_df_archive)$modification_time ),
as.character( fs::file_info(path_df_uncompressed)$modification_time ),
as.character( fs::file_info(path_df_series_csv)$modification_time )),
access =
c( as.character( fs::file_info(path_df_archive)$access_time ),
as.character( fs::file_info(path_df_uncompressed)$access_time ),
as.character( fs::file_info(path_df_series_csv)$access_time) ),
change =
c( as.character( fs::file_info(path_df_archive)$change_time ),
as.character( fs::file_info(path_df_uncompressed)$change_time ),
as.character( fs::file_info(path_df_series_csv)$change_time) )
) %>% xtable::xtable(caption = caption_str)
str_catalog <- stringr::str_glue('catalog entry: ', hdl_str)
str_scheme <- stringr::str_glue('Scheme: Value Weight Returns -- Monthly')
str_NA <- stringr::str_glue('Has NA: ', any( apply(X = tbl, 2, anyNA) ) )
str_etag <- stringr::str_glue('Etag: ', response$headers$etag)
str_doc <- stringr::str_glue('Documentation: ', c_entry$doc_src)
arsenal::write2pdf(object = list(tbl_path,
tbl_attr,
tbl_times,
str_catalog,
str_scheme,
str_NA,
str_etag,
str_doc),
file = path_df_audit, quiet = T)
# ----------------------------------------------------------------------------
attr(tbl, which = 'catalog_hdl') <- hdl_str
attr(tbl, which = 'series_freq') <- 'Monthly'
attr(tbl, which = 'units') <- 'decimals'
return(tbl)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.