R/TimeSeriesUtility.R

Defines functions check_time_series check_freq freq_to_string price_to_ret ret_to_price stat_mode guess_freq change_freq combine_time_series trunc_time_series price_fill_na .return_vec_fill_na return_fill_na tidy_ret untidy_ret freq_to_scale fPercent fNum excess_ret excess_ret_bool xts_to_dataframe cal_time

Documented in cal_time change_freq check_freq check_time_series combine_time_series excess_ret excess_ret_bool fNum fPercent freq_to_scale freq_to_string guess_freq price_fill_na price_to_ret ret_to_price return_fill_na stat_mode tidy_ret trunc_time_series untidy_ret xts_to_dataframe

#' @title Check time-series structure
#' @param ret time-series
#' @export
#' @return \code{ret} time-series or error if the time-series is not properly
#' strucutured.
#' @details This package is set up to work with time-series in a data.frame
#' structure organized by columns (matrix-like) with dates in the first column.
#' This function will check for the data.frame structure and if dates are in
#' the first column.
check_time_series <- function(ret) {


  if (!'data.frame' %in% class(ret)) {
    stop('ret needs to be a data.frame')
  }
  if (!'Date' %in% class(ret[[1]])) {
    warning('first column in time-series needs to be date observations')
    try(dt <- as.Date(ret[[1]]))
    if (class(dt) == 'try-error') {
      stop('could not convert first column of time-series to Date')
    } else {
      ret[[1]] <- dt
    }
  }
  date_missing <- is.na(ret[[1]])
  if (any(date_missing)) {
    warning('some dates are missing')
    ret <- ret[!date_missing, ]
  }
  if (colnames(ret)[1] != 'date') {
    warning('renaming time_series first column \'date\'')
    colnames(ret)[1] <- 'date'
  }
  return(ret)
}


#' @title Check frequency character
#' @param freq frequency character
#' @return Uppercase \code{freq} string or an error if not properly specified
#' @details Frequency options are D for daily, W for weekly, M for monthly,
#' Q for quarterly, and A for annual
#' @export
check_freq <- function(freq) {

  if (toupper(freq) %in% c('D', 'W', 'M', 'Q', 'A')) {
    return(toupper(freq))
  } else {
    stop('freq must be D, W, M, Q, or A')
  }
}


#' @title Convert frequency character to descriptive string
#' @param freq frequency character
#' @return frequency string, e.g., 'D' = 'days'
#' @export
freq_to_string <- function(freq) {

  switch (
    toupper(freq),
    D = 'days',
    W = 'weeks',
    M = 'months',
    Q = 'quarters',
    A = 'years'
  )
}

#' @title Convert price (level) time-series to returns (delta)
#' @param price data.frame time-series containing prices
#' @return data.frame return time-series
#' @export
price_to_ret <- function(price) {

  price <- check_time_series(price)
  row_lead <- rep(NA, ncol(price))
  lag_price <- rbind(row_lead, price[1:(nrow(price) - 1), , drop = FALSE])
  diff_mat <- price[, 2:ncol(price)] / lag_price[, 2:ncol(lag_price)] - 1
  ret <- cbind(price[, 1, drop = FALSE], diff_mat)
  ret_out <- ret[2:nrow(ret), ]
  colnames(ret_out) <- colnames(price)
  return(ret_out)
}


#' @title Convert returns to prices
#' @param ret data.frame time-series with returns
#' @param init_val number to represent the initial value of the prices time-series
#' @return data.frame with prices time-series
#' @export
ret_to_price <- function(ret, init_val = 1) {

  ret <- check_time_series(ret)
  ret[is.na(ret)] <- 0
  init_row <- data.frame(date = ret[1, 1] - 1,
                         matrix(init_val, ncol = ncol(ret) - 1))
  colnames(init_row) <- colnames(ret)
  ret_aug <- rbind(init_row, ret)
  ret_aug[2:nrow(ret_aug), 2:ncol(ret_aug)] <- 1 + ret_aug[2:nrow(ret_aug),
                                                           2:ncol(ret_aug)]
  price <- apply(ret_aug[, 2:ncol(ret), drop = FALSE], 2, cumprod)
  price_out <- cbind(ret_aug[, 1, drop = FALSE], price)
  return(price_out)
}


#' @title Calculate mode
#' @param x numeric vector
#' @export
stat_mode <- function(x) {

  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}


#' @title Guess time-series frequency
#' @param time_series data.frame containing time-seires
#' @return character representing frequency
#' @importFrom lubridate int_diff
#' @export
guess_freq <- function(time_series) {

  ret <- check_time_series(time_series)
  date_diff <- lubridate::int_diff(ret$date)
  date_mode <- stat_mode(date_diff)
  p <- as.numeric(date_mode)
  if (p == 86400) {
    return('D')
  } else if(p == 604800) {
    return('W')
  } else if(p == 2678400) {
    return('M')
  } else if (p == 7948800) {
    return('Q')
  } else if (p > 7948800) {
    return('A')
  } else {
    return('could not guess frequency')
  }
}

#' @title Change time-series frequency
#' @param time_series data.frame containing time-series to adjust
#' @param freq character to represent desired frequency
#' @param data_type string ret for return or price for prices
#' @return data.frame with new frequency
#' @import lubridate
#' @export
change_freq <- function(time_series, freq, data_type = c('ret', 'price')) {

  guess <- guess_freq(time_series)
  if (guess == toupper(freq)) {
    return(time_series)
  }
  time_series <- check_time_series(time_series)
  freq <- check_freq(freq)
  data_type <- toupper(data_type[1])
  if (!data_type %in% c('RET', 'PRICE')) {
    stop('data_type must be ret or price')
  }
  if (data_type == 'RET') {
    time_series <- ret_to_price(time_series)
  }
  date <- time_series$date
  group_date <- switch(
    freq,
    D = paste0(lubridate::day(date), '-', lubridate::year(date)),
    W = paste0(lubridate::week(date), '-', lubridate::year(date)),
    M = paste0(lubridate::month(date), '-', lubridate::year(date)),
    Q = paste0(lubridate::quarter(date), '-', lubridate::year(date)),
    A = paste0(lubridate::year(date), '-', lubridate::year(date))
  )
  time_series$group_date <- group_date
  grouped <- aggregate(time_series, list(group_date), max)
  sorted <- grouped[order(grouped$date), 2:(ncol(grouped) - 1)]
  if (data_type == 'RET') {
    sorted <- price_to_ret(sorted)
  }
  if (freq %in% c('M', 'Q', 'A')) {
    sorted$date <- lubridate::ceiling_date(sorted$date, unit = 'months') - 1
  }
  return(sorted)
}


#' @title Combine multiple time-series into one data.frame
#' @param ... time-series to combine
#' @param freq character to represent desired frequency
#' @param trunc_start boolean to truncate at latest inception
#' @param trunc_end boolean to truncate at earliest ending date
#' @param data_type string to specify time-series being combined as returns or prices
#' @param fill_NA boolean to fill missing values, FALSE leaves NAs
#' @param input_list boolean to specify if returns are being entered in list form
#' @details This workflow is set up to combine returns or prices. If you have
#' mixed data you'll need to convert to returns or prices before combining. If
#' you are combining return or price data.frames leave the \code{input_list}
#' parameter set to \code{FALSE}. Otherwise if your return data.frames have
#' already been combined into a list you can pass the list through \code{...}, and
#' set \code{input_list} to \code{TRUE}. The \code{freq} parameter allows you
#' to change the frequency of all the returns to the specified \code{freq}.
#' @export
#' @examples
#' data(ETF)
#' ret_a <- ret[, 1:2]
#' ret_b <- ret[, c(1, 3)]
#' ret_c <- ret[, c(1, 4)]
#' combo_ret <- combine_time_series(ret_a, ret_b, ret_c, freq = 'd')
combine_time_series <- function(..., freq, trunc_start = TRUE, trunc_end = TRUE,
                                data_type = c('ret', 'price'), fill_NA = TRUE,
                                  input_list = FALSE) {
  ret_list <- list(...)
  if (input_list) {
    ret_list <- ret_list[[1]]
  }
  data_type <- data_type[1]
  ret_list <- lapply(ret_list, check_time_series)
  ret_list <- lapply(ret_list, change_freq, freq = freq, data_type = data_type)
  combo <- Reduce(function(...) merge(..., by = 'date', all = TRUE), ret_list)
  date_list <- lapply(ret_list, '[[', 'date')
  if (trunc_start) {
    date_start_vec <- sapply(date_list, min)
    date_start <- as.Date(max(date_start_vec, na.rm = TRUE), origin = '1970-01-01')
  } else {
    date_start <- NULL
  }
  if (trunc_end) {
    date_end_vec <- sapply(date_list, max)
    date_end <- as.Date(min(date_end_vec, na.rm = TRUE), origin = '1970-01-01')
  } else {
    date_end <- NULL
  }
  combo_trunc <- trunc_time_series(combo, date_start, date_end)
  if (fill_NA) {
    if (toupper(data_type) == 'RET') {
      combo_trunc[, 2:ncol(combo_trunc)] <- apply(
        combo_trunc[, 2:ncol(combo_trunc), drop = FALSE], 2, .return_vec_fill_na)
    } else if (toupper(data_type) == 'PRICE') {
      combo_trunc[, 2:ncol(combo_trunc)] <- apply(
        combo_trunc[, 2:ncol(combo_trunc), drop = FALSE], 2, price_fill_na)
    } else {
      stop('data_type must be either ret or price')
    }
  }
  bad_date <- is.na(combo_trunc$date)
  res <- combo_trunc[!bad_date, ]
  return(res)
}


#' @title Truncate time-series
#' @param ret data.frame time-series
#' @param date_start truncate returns before this date
#' @param date_end truncate returns after this date
#' @export
trunc_time_series <- function(ret, date_start = NULL, date_end = NULL) {

  ret <- check_time_series(ret)
  if (!is.null(date_start)) {
    ind <- ret$date >= as.Date(date_start)
    ret <- ret[ind, ]
  }
  if (!is.null(date_end)) {
    ind <- ret$date <= as.Date(date_end)
    ret <- ret[ind, ]
  }
  return(ret)
}


#' @title Fill missing price values with previous non-missing value
#' @param x vector of price data
#' @return vector with NAs filled with last good (non-NA) value
#' @export
#' @note https://stackoverflow.com/questions/7735647/replacing-nas-with-latest-non-na-value
price_fill_na <- function(x) {

  ind <- which(!is.na(x))
  if (is.na(x[1])) {
    ind <- c(1, ind)
  }
  rep(x[ind], times = diff(c(ind, length(x) + 1)))
}


.return_vec_fill_na <- function(x) {

  if (!is.null(nrow(x))) {
    x <- x[[1]]
  }
  first_ret <- min(which(!is.na(x)))
  x[is.na(x)] <- 0
  if (first_ret > 1) {
    x[1:first_ret] <- NA
  }
  return(x)
}

#' @title Fill missing return values with zero
#' @param x vector of return data
#' @export
return_fill_na <- function(ret) {
  ret[, 2:ncol(ret)] <- apply(ret[, 2:ncol(ret), drop = FALSE],
                              2,
                              .return_vec_fill_na)
  return(ret)
}

#' @title Pivot returns to a tidy structure
#' @param ret data.frame containing returns organized by column
#' @return data.frame containing returns organized by date, series, and values
#' @importFrom tidyr pivot_longer
#' @export
tidy_ret <- function(ret) {

  ret <- check_time_series(ret)
  tidyret <- tidyr::pivot_longer(ret, -date, values_to = 'values',
                                 names_to = 'series')
  return(as.data.frame(tidyret))
}


#' @title Pivot returns to a wider (untidy) structure
#' @param tidyret data.frame containing tidy returns (see tidy_ret)
#' @importFrom tidyr pivot_wider
#' @export
untidy_ret <- function(tidyret) {

  ret <- tidyr::pivot_wider(tidyret, names_from = 'series', values_from = 'values')
  as.data.frame(ret)
}


#' @title Utility function to convert frequency character to numeric annual scaler
#' @param freq character to represent frequency
#' @return numeric annual scaler
#' @export
freq_to_scale <- function(freq) {

  freq <- check_freq(freq)
  switch (
    freq,
    D = 252,
    W = 52,
    M = 12,
    Q = 4,
    A = 1
  )
}


#' @title Utility function to format numeric data as 0.00%
#' @param x numeric data to convert into percent
#' @param digits number of digits for rounding
#' @export
#' @return character representation in %
fPercent <- function(x, digits = 2) {
  x_fmt <- formatC(x * 100, digits = digits, format = 'f')
  x_fmt_abs <- formatC(abs(x) * 100, digits = digits, format = 'f')
  x_per <- paste0(x_fmt, '%')
  less_0 <- x < 0
  less_0[is.na(less_0)] <- FALSE
  x_per[less_0] <- paste0('(', x_fmt_abs[less_0], '%)')
  x_per[x_per == ' NA%'] <- '-'
  return(x_per)
}


#' @title Utility function to format numeric data as 1,000.00
#' @param x numeric data to format
#' @param digits number of digits for rounding
#' @export
#' @return character representation of \code{x}
fNum <- function(x, digits = 2) {
  x <- formatC(x, digits = 2, format = 'f', big.mark = ',')
  x[x == ' NA'] <- '-'
  return(x)
}


#' @title Net y returns from all x returns
#' @param x data.frame of univarate or multivariate returns
#' @param y data.frame of univariate return
#' @param freq character to specify common or desired frequency
#' @details \code{x} and \code{y} will be converted to \code{freq}. Each
#' time-series or column in \code{x} will have \code{y} subtracted from it.
#' \code{y} is a one time-series (e.g., a common benchmark or risk-free rate).
#' @return data.frame of \code{x} returns that are net of \code{y}
#' @export
excess_ret <- function(x, y, freq) {

  if (ncol(y) != 2) {
    stop('y is mispecified, need two columns: 1 dates, 2 time-series')
  }
  x <- change_freq(x, freq)
  y <- change_freq(y, freq)
  tidy_x <- tidy_ret(x)
  tidy_x$series <- factor(tidy_x$series, unique(tidy_x$series))
  x_split <- split(tidy_x, tidy_x$series)
  x_split_df <- lapply(x_split, untidy_ret)
  xy_list <- lapply(x_split_df, merge, y = y, by = 'date', all.x = TRUE, all.y = FALSE)
  xy_fill <- lapply(xy_list, return_fill_na)
  x_net <- lapply(xy_list, function(df) {df[, 2] - df[, 3]
                                          return(df[, 1:2])})
  combine_time_series(x_net, freq = freq, input_list = TRUE)
}


#' @title Net y returns from x returns based on boolean index
#' @param x data.frame of multivariate returns
#' @param y data.frame of univariate returns
#' @param freq character to specify common or desired frequency
#' @param net_rf boolean index to specify which \code{x} returns to net \code{y} from
#' @details Suppose \code{x} is a data.frame with 3 asset time-series and we only
#' want to net \code{y} from the 2nd asset. \code{net_rf} should be set to
#' \code{c(FALSE, TRUE, FALSE)}. Note a single \code{net_rf} of \code{TRUE} or
#' \code{FALSE} will respectively net all or none of \code{x}. See \code{excess_ret}
#' for more info.
#' @export
excess_ret_bool <- function(x, y, freq, net_rf = TRUE) {

  x <- change_freq(x, freq)
  y <- change_freq(y, freq)
  if (length(net_rf) == 1) {
    net_rf <- rep(net_rf, ncol(x) - 1)
  }
  if (all(net_rf == TRUE)) {
    return(excess_ret(x, y, freq))
  }
  if (all(net_rf == FALSE)) {
    return(x)
  }
  x_to_net <- x[, c(TRUE, net_rf)]
  x_gross <- x[, c(TRUE, !net_rf)]
  x_net <- excess_ret(x_to_net, y, freq)
  x[, c(TRUE, net_rf)] <- x_net
  return(x)
}


#' @title Convert xts to data.frame
#' @param x xts
#' @return data.frame
#' @importFrom zoo index
#' @export
xts_to_dataframe <- function(x) {

  date_vec <- zoo::index(x)
  df <- data.frame(date = as.Date(date_vec), x, row.names = NULL)
  if (!is.null(colnames(x))) {
    colnames(df)[2:ncol(df)] <- colnames(x)
  }
  return(df)
}


#' @title Calendar time helper
#' @param xwin string to represent calendar window
#' @param asof ending date of window
#' @return beginning date of window
#' @export
cal_time <- function(xwin = c('dtd', 'wtd', 'mtd', 'qtd', 'ytd', 'ttm',
                              '3 yr', '5 yr', '10 yr', '20 yr', '30 yr'),
                     asof = Sys.Date()) {

  xwin <- tolower(xwin[1])
  xyear <- lubridate::year(asof)
  xmon <- lubridate::month(asof)
  xday <- lubridate::day(asof)
  switch (xwin,
          dtd = asof,
          wtd = lubridate::floor_date(asof, 'week') + 1,
          mtd = lubridate::floor_date(asof, 'month'),
          qtd = lubridate::floor_date(asof, 'quarter'),
          ytd = lubridate::floor_date(asof, 'year'),
          ttm = as.Date(paste0(xyear - 1, '-', xmon, '-', xday)),
          '3 yr' = as.Date(paste0(xyear - 3, '-', xmon, '-', xday)),
          '5 yr' = as.Date(paste0(xyear - 5, '-', xmon, '-', xday)),
          '10 yr' = as.Date(paste0(xyear - 10, '-', xmon, '-', xday)),
          '20 yr' = as.Date(paste0(xyear - 20, '-', xmon, '-', xday)),
          '30 yr' = as.Date(paste0(xyear - 30, '-', xmon, '-', xday))
  )
}
alejandro-sotolongo/InvestmentSuite documentation built on Jan. 19, 2020, 5:20 p.m.