bindr: Tools for Binding Raw Times-Series Hosted on Remote Site Into a Synchronized Factor Data Set

# If dplyr::select is in a package, using .data also prevents R CMD check from
# giving a NOTE about undefined global variables (provided that @importFrom
# rlang .data is inserted). Using rlang::.data (without @importFrom rlang .data)
# is another option. See Wickham, Hadley, R Packages, O'Reilly, 1st Edition,
# 2015, p. 89 for details

#' @importFrom rlang .data
#' @importFrom magrittr "%>%"

#----------------------------------------------------------------------------
# Reference: Fama, E., Gibbons, M. A Comparison of Inflation Forecasts.
# Journal of Monetary Economics, 1984, vol. 13, pp 327-348
#----------------------------------------------------------------------------

# Model 'Time-series' : EITS (expected inflation from time-series)
# Model 'Treasury Bills' : EITB (expected inflation from Tresuary Bills)
# Model 'Naive' : EINTB (expected inflation from naive Tresuary Bills)

inflation_series <- function(from_ = NA, to_ = NA, stop_on_join_NA,
                             src_dir =
                               '~/Desktop/UMich/Factor Warehouse/Uncompressed/',
                             src_file = c(CPI = 'FRED_CPI_US_M.csv',
                                          TB = 'FF_3F_US_M.csv')){

  if(is.na(src_file['CPI'])) {
    stop("CPI source file argument is NA", call. = T)
  }
  if(is.na(src_file['TB'])) {
    stop("TB (Treasury Bills) source file argument is NA", call. = T)
  }

  #----------------------------------------------------------------------------
  # Date range validation and construction. Push back < from_ > by one month to
  # allow T-Bill observed at end of t-1 without generating NA
  stop_on_ym_range(.from = from_, .to = to_)

  if(src_file['TB'] == 'FF_3F_US_M.csv') {
    if(tsibble::yearmonth(from_) <= tsibble::yearmonth('1926-07')) {
      stop("Argument from_ must be later than '1926-07' to allow lagged T-Bill",
           call. = T)
    }} else {
      warning(stringr::str_glue("Unknow threshold date in file ",
                                src_file['TB']), call. = T)
    }

  if(src_file['CPI'] == 'FRED_CPI_US_M.csv') {
    if(tsibble::yearmonth(from_) <= tsibble::yearmonth('1947-02')) {
      stop(stringr::str_glue("Argument from_ must be later than '1947-02' to ",
                             "allow lagged CPI in inflation I(t) calculation.",
                             "\n",
                             "Also accouts for start date adjustment related ",
                             "to lagged T-Bill", .sep = ''),
           call. = T)
    }} else {
      warning(stringr::str_glue("Unknow threshold date in file ",
                                src_file['CPI']), call. = T)
    }

  from_ <- as.character( tsibble::yearmonth(from_) - 1 )
  date_range = expression_ym_range(.from = from_, .to = to_)

  #----------------------------------------------------------------------------
  # Time-series: Inflation (CPI)

  # Path validation of source file
  path_df_uncompressed <- stringr::str_glue(src_dir, '/', src_file['CPI'])
  if( fs::file_exists(path_df_uncompressed) == F) {
    stop(stringr::str_glue('Directory and/or file for ', src_file['CPI'],
                           ' does not exist'), call. = T)
  }

  # Sourcing & processing
  tbl.CPI <- readr::read_csv(path_df_uncompressed)
  tbl.CPI <- dplyr::mutate(.data = tbl.CPI,
                           I_t = log(.data$CPI) -
                             log(dplyr::lag(x = .data$CPI,
                                            n = 1,
                                            default = NA)) ) %>%
    dplyr::mutate( year = lubridate::year(.data$date),
                   month = lubridate::month(.data$date)) %>%
    dplyr::mutate( year_month =
                     tsibble::yearmonth(as.character(.data$date))) %>%
    tsibble::as_tsibble(index = .data$year_month) %>%
    dplyr::select(.data$year, .data$month, .data$year_month,
                  .data$CPI, .data$I_t) %>%
    tsibble::filter_index(eval(date_range))

  stop_on_gap_duplicate(tbl.CPI)
  #----------------------------------------------------------------------------


  #----------------------------------------------------------------------------
  # Time-series: Treasury Bill (TB)

  # # Path validation of source file
  path_df_uncompressed <- stringr::str_glue(src_dir, '/', src_file['TB'])
  if( fs::file_exists(path_df_uncompressed) == F) {
    stop(stringr::str_glue('Directory and/or file for ', src_file['TB'],
                           ' does not exist'), call. = T)
  }

  # Sourcing & processing
  tbl.TB <- readr::read_csv(path_df_uncompressed)
  tbl.TB <- dplyr::mutate(.data = tbl.TB,
                          year_month_str = paste0(.data$year, '-',
                                                  .data$month, '-01')) %>%
    dplyr::mutate(year_month = tsibble::yearmonth(.data$year_month_str)) %>%
    tsibble::as_tsibble(index = .data$year_month) %>%
    dplyr::select(.data$year, .data$month, .data$year_month, .data$rf)  %>%
    magrittr::set_colnames(c('year', 'month', 'year_month', 'TB_t')) %>%
    tsibble::filter_index(eval(date_range))

  stop_on_gap_duplicate(tbl.TB)
  #----------------------------------------------------------------------------


  #----------------------------------------------------------------------------
  # Merge CPI and Treasury Bills time-series.
  tbl <- dplyr::full_join(x = tbl.CPI,
                          y = dplyr::select(tbl.TB,
                                            .data$year_month, .data$TB_t),
                          by = 'year_month')

  # Add calendar date (end-of-month)
  tbl <- dplyr::mutate(.data = tbl,
                       date = last_cal_day(yyyy = .data$year,
                                           mm = .data$month),
                       .after = .data$month)

  stop_on_gap_duplicate(tbl)
  if(stop_on_join_NA == TRUE){
    if( base::anyNA(tbl) ){
      stop("Joined tsibble objects have NA's", call. = T)
    }
  } else {
    if( base::anyNA(tbl) ){
      msg <- stringr::str_glue(
        "The following records obtained from joined objects have NA's,",
        "which will be removed:", .sep = " ")
      warning(msg, call. = T, immediate. = T)
      get_incomplete_record(tbl, show = T, issue_warning = F)
      tbl <- tidyr::drop_na(tbl)
      stop_on_gap_duplicate(tbl)
    }
  }

  # T-Bill observed at end of t-1 and ex-post real return (cf Section 2.2, (8))
  tbl <- dplyr::mutate(tbl,
    TB_t_1 = dplyr::lag(x = .data$TB_t, n = 1, default = NA)) %>%
    dplyr::mutate(ex_post_real = .data$TB_t_1 - .data$I_t)

  # Clean up first NA induced by < TB_t_1 > ( ...and <ex_post_real> ) while
  # maintaining the original start date requested via argument < from_ >
  tbl <- tidyr::drop_na(data = tbl)
  stop_on_gap_duplicate(tbl)

  return(tbl)
}


inflation_metrics <- function(obs, q = 1, conf_level = 0.95) {

  # ---------------------------------------------------------------------------
  # Validate
  if( !tsibble::is_tsibble(obs) ) {
    stop('Argument obs must be tsibble object')
  }

  nm <- c('year', 'month', 'year_month',
          'CPI', 'I_t', 'TB_t', 'TB_t_1',
          'ex_post_real')
  if( !all( nm %in% names(obs) ) ){
    stop('Tsibble object (argument) has missing variables', call. = )
  }

  # ---------------------------------------------------------------------------
  augmented_stamp <- dplyr::select(obs,
                                   .data$year, .data$month,
                                   .data$date, .data$year_month,
                                   .data$I_t,
                                   .data$TB_t, .data$TB_t_1,
                                   .data$ex_post_real)

  # ---------------------------------------------------------------------------
  # Econometrics
  #
  # Model 'Time-series' (cf. Section 2.1)
  fit <- stats::arima(x = obs$I_t, order = c(0, 1, q),
                      include.mean = F, method = 'ML',
                      optim.control = list(maxit = 1000))
  arima.ITS <- broom::tidy(fit, conf.int = TRUE)

  tbl.ITS <- tibble::tibble(expected = obs$I_t - fit$residuals,
                            shock = fit$residuals,
                            model = base::as.factor('Time-series'))

  tbl.ITS <- dplyr::bind_cols(augmented_stamp, tbl.ITS)
  stop_on_gap_duplicate(tbl.ITS)
  remove(fit)


  # Model 'Treasury Bill' (cf. Section 2.2)
  fit <- stats::arima(x = obs$ex_post_real, order = c(0, 1, q),
                      include.mean = F, method = 'ML',
                      optim.control = list(maxit = 1000))
  arima.ITB <- broom::tidy(fit, conf.int = TRUE)

  tbl.ITB <- tibble::tibble(
    ER_t_1 = obs$ex_post_real - fit$residuals) %>%
    dplyr::mutate(expected = obs$TB_t_1 - .data$ER_t_1) %>%
    dplyr::mutate(shock = obs$I_t - .data$expected,
                  model = base::as.factor('Treasury Bills'))
  tbl.ITB <- dplyr::select(tbl.ITB, -.data$ER_t_1)

  tbl.ITB <- dplyr::bind_cols(augmented_stamp, tbl.ITB)
  stop_on_gap_duplicate(tbl.ITB)
  remove(fit)


  # Model 'Naive' (cf. Section 2.3)
  tbl.INTB <- tibble::tibble(
    ERN_t_1 = zoo::rollmean(x = dplyr::lag(x = obs$ex_post_real,
                                           n = 1,
                                           default = NA),
                            k = 12,
                            align = 'right',
                            fill = NA)) %>%
    dplyr::mutate(expected = obs$TB_t_1 - .data$ERN_t_1) %>%
    dplyr::mutate(shock = obs$I_t - .data$expected,
                  model = base::as.factor('Naive'))
  tbl.INTB <- dplyr::select(tbl.INTB, -.data$ERN_t_1)

  tbl.INTB <- dplyr::bind_cols(augmented_stamp, tbl.INTB)
  stop_on_gap_duplicate(tbl.INTB)

  # ---------------------------------------------------------------------------
  # Compile results
  metrics <- dplyr::bind_rows(dplyr::as_tibble(tbl.ITB),
                              dplyr::as_tibble(tbl.INTB),
                              dplyr::as_tibble(tbl.ITS))

  # ---------------------------------------------------------------------------
  # Model diagnostic
  diagnostic <- dplyr::group_by(.data = metrics, .data$model) %>%
    tidyr::nest() %>%
    dplyr::mutate(
      ols = purrr::map(.data$data,
                       function(df) stats::lm(I_t ~ expected, data = df)))

  diagnostic <- diagnostic %>%
    dplyr::mutate(
      ols.tidy = purrr::map(.data$ols,
                            function(df) broom::tidy(
                              df, conf.int = T,
                              conf.level = conf_level))) %>%
    dplyr::mutate(ols.glance = purrr::map(.data$ols,
                                          function(df) broom::glance(df)))

  diagostic_param <- diagnostic %>%
    dplyr::select(.data$model, .data$ols.tidy) %>%
    tidyr::unnest(.data$ols.tidy)

  diagostic_stats <- diagnostic %>%
    dplyr::select(.data$model, .data$ols.glance) %>%
    tidyr::unnest(.data$ols.glance) %>%
    dplyr::select(.data$model, .data$adj.r.squared, .data$sigma,
                  .data$df.residual, .data$nobs)


  return( list(metrics = metrics,
               arima.ITB = arima.ITB,
               arima.ITS = arima.ITS,
               diagostic_param = diagostic_param,
               diagostic_stats = diagostic_stats) )

}


inflation_validate_FamaGibbons <- function(metrics, show_plot = T){

  # ---------------------------------------------------------------------------
  # Integrity check and minor filter

  metrics <- with_sub_Period(metrics, interval_type = 'Fama-Gibbons') %>%
    dplyr::filter(.data$sub_period != 'Other')

  nm <- c('model', 'sub_period', 'shock')
  if( !all( nm %in% names(metrics) ) ){
    msg <- stringr::str_glue(
      'Metric object (argument) has missing variables.\n',
      "'model', 'sub_period', 'shock' must be in the tibble.")
    stop(msg, call. = )
  }

  # ---------------------------------------------------------------------------
  # Compile diagnostic measures

  diagnostic <- dplyr::group_by(.data = metrics,
                                .data$sub_period, .data$model) %>%
    tidyr::nest() %>%
    dplyr::filter(.data$sub_period != 'Other') %>%
    dplyr::mutate(mean = purrr::map(.x = .data$data,
                                    ~ base::mean(.x$shock, na.rm = T)),
                  test = purrr::map(.x = .data$data,
                                    ~ stats::t.test(.x$shock)$statistic),
                  sd = purrr::map(.x = .data$data,
                                  ~ stats::sd(.x$shock, na.rm = T)),
                  rmse = purrr::map(.x = .data$data,
                                    ~ base::sqrt(base::mean(.x$shock^2,
                                                            na.rm = T)))) %>%
    tidyr::unnest(cols = c(.data$mean, .data$test, .data$sd, .data$rmse)) %>%
    dplyr::select(-.data$data) %>%
    dplyr::select(.data$sub_period, .data$model,
                  .data$mean, .data$test, .data$sd, .data$rmse)

  # ---------------------------------------------------------------------------
  # Plot generation

  if(show_plot) {

    # Baseline plot
    g <- ggplot2::ggplot(data = diagnostic,
                         ggplot2::aes(x = .data$sub_period,
                                      y = .data$sd * 10000,
                                      col = .data$model))
    g <- g + ggplot2::geom_point(size = 3)

    # Add original data from Fama, Gibbons (1984), Table 4
    g <- g + ggplot2::geom_point(data = Fama_Gibbons_Tbl_4,
                                 mapping = ggplot2::aes(
                                   x = .data$sub_period,
                                   y = .data$sd * 10000,
                                   col = .data$model),
                                 shape = 23, size = 3)

    # y-title
    g <- g + ggplot2::ylab(label = 'sd. x 10,000')

    # x-text, x-title
    g <- g + ggplot2::theme(
      axis.text.x = ggplot2::element_text(angle = 90,vjust = 0.5, hjust=1))
    g <- g + ggplot2::theme(axis.title.x = ggplot2::element_blank())

    # Title, sub-title, caption
    main_title <- stringr::str_glue(
      'Standard Deviation of Monthly Forecast Error, ',
      'by Sub-Period, In-Sample')
    g <- g + ggplot2::labs(
      title = main_title,
      subtitle = "(Circle: Replicated Estimates | Diamond: Original Reference)",
      caption = "Ref.: Fama, Gibbons (1984), Table 4, p. 338")

    g <- g + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5),
                            plot.subtitle = ggplot2::element_text(hjust = 0.5))

    g <- g + ggplot2::theme(
      plot.title = ggplot2::element_text(color = "black",
                                         size = 12, face = "bold"),
      plot.subtitle = ggplot2::element_text(color = "black",
                                            size = 10),
      plot.caption = ggplot2::element_text(color = "blue", face = "italic"))

    # xy-text
    g <- g + ggplot2::theme(
      axis.text.x = ggplot2::element_text(face = 'bold'),
      axis.text.y = ggplot2::element_text(face = 'bold'))

    base::print(g)
  }
  # ---------------------------------------------------------------------------

  return(diagnostic)
}


write_inflation_factor <- function(tsbl, hdl_str, dest_root_dir,
                                   model_type, func_arg){

  path_df_audit <- stringr::str_glue(dest_root_dir, '/Audit/', hdl_str, '.pdf')
  path_df_series_csv <- stringr::str_glue(dest_root_dir, '/Uncompressed/',
                                          hdl_str, '.csv')

  tsbl <- remove_year_month_from(tsbl)
  readr::write_csv(x = tsbl, path = path_df_series_csv)

  # ----------------------------------------------------------------------------
  # Auditing
  caption_str <- 'Inflation Factor: File Paths'
  tbl_path <- tibble::tibble(
    format = c('pdf', 'csv'),
    path_dir = c(fs::path_dir(path_df_audit),
                 fs::path_dir(path_df_series_csv))
  ) %>% xtable::xtable(caption = caption_str)

  caption_str <- 'Inflation Factor: File Attributes'
  tbl_attr <- tibble::tibble(
    file = c( fs::path_file(path_df_series_csv)),
    user = c( fs::file_info(path_df_series_csv)$user),
    device_id =
      c( as.character( fs::file_info(path_df_series_csv)$device_id )),
    permissions =
      c( as.character( fs::file_info(path_df_series_csv)$permissions )),
    size = c( fs::file_info(path_df_series_csv)$size),
    birth =
      c( as.character( fs::file_info(path_df_series_csv)$birth_time) )
  ) %>% xtable::xtable(caption = caption_str)

  caption_str <- 'Inflation Factor: File Time Stamps'
  tbl_times <- tibble::tibble(
    file = c( fs::path_file(path_df_series_csv)),
    modification =
      c( as.character( fs::file_info(path_df_series_csv)$modification_time )),
    access =
      c( as.character( fs::file_info(path_df_series_csv)$access_time) ),
    change =
      c( as.character( fs::file_info(path_df_series_csv)$change_time) )
  ) %>% xtable::xtable(caption = caption_str)

  parsedHdl <- parseBuiltdHdl(hdl_str)
  arsenal::write2pdf(object = list(tbl_path,
                                   tbl_attr,
                                   tbl_times,
                                   stringr::str_glue('Derived Catalog Entry: ',
                                                     parsedHdl[1], '_',
                                                     parsedHdl[2], '_',
                                                     parsedHdl[3], '\n'),
                                   stringr::str_glue(
                                     'Proprietary: TRUE\n'),
                                   stringr::str_glue(
                                     'Build Type: Econometric\n'),
                                   stringr::str_glue('Inflation Model Type: ',
                                                     model_type, '\n'),
                                   'Function Call Parameters:\n',
                                   utils::ls.str(func_arg),
                                   'Tibble Tail:\n',
                                   utils::tail(tsbl)),
                     file = path_df_audit, quiet = T)

  # ----------------------------------------------------------------------------
  # Set permission to 'r-' (read-only) for non-owners
  fs::file_chmod(path_df_audit, mode = '644')
  fs::file_chmod(path_df_series_csv, mode = '644')
}
fognyc/bindr documentation built on Dec. 4, 2020, 12:33 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
fognyc/bindr
Tools for Binding Raw Times-Series Hosted on Remote Site Into a Synchronized Factor Data Set

R/inflation_func.R
In fognyc/bindr: Tools for Binding Raw Times-Series Hosted on Remote Site Into a Synchronized Factor Data Set

Defines functions write_inflation_factor inflation_validate_FamaGibbons inflation_metrics inflation_series

R Package Documentation

Browse R Packages

We want your feedback!

fognyc/bindr Tools for Binding Raw Times-Series Hosted on Remote Site Into a Synchronized Factor Data Set

R/inflation_func.R In fognyc/bindr: Tools for Binding Raw Times-Series Hosted on Remote Site Into a Synchronized Factor Data Set

Defines functions write_inflation_factor inflation_validate_FamaGibbons inflation_metrics inflation_series

R Package Documentation

Browse R Packages

We want your feedback!

fognyc/bindr
Tools for Binding Raw Times-Series Hosted on Remote Site Into a Synchronized Factor Data Set

R/inflation_func.R
In fognyc/bindr: Tools for Binding Raw Times-Series Hosted on Remote Site Into a Synchronized Factor Data Set