InvestmentSuite: Investment Asset and Portfolio Time-Series Analysis

Documented in absorp_ratio ann_vol auto_reb_wgt check_y_param contr_to_ret cov_wgt_avg down_capture down_vol drawdown excess_mean excess_ret find_drawdowns fund_beta fund_corr geo_ret info_ratio mv_reg omega_ratio pca_cov pca_hclust rebal reg_fit_table roll_absorp_ratio run_reg sharpe_ratio sortino_ratio tracking_error up_capture val_at_risk worst_drawdowm worst_n_drawdowns zscore_win

#' @title Wrapper for running linear regression
#' @param y data.frame of time-series of depedent variables
#' @param x data.frame of time-series of indepedent variables
#' @param freq model frequency
#' @param net_y boolean vector to net risk-free from each \code{y} variable
#' @param net_x boolean vector to net risk-free from each \code{x} variable
#' @return list with regression output
#' @export
run_reg <- function(y, x, rf, freq, net_y = TRUE, net_x = FALSE) {

  y <- excess_ret_bool(y, rf, freq, net_y)
  x <- excess_ret_bool(x, rf, freq, net_x)
  fit <- mv_reg(y, x, freq)
  res <- reg_fit_table(fit, freq)
  res$fit <- fit
  return(res)
}


#' @title Multi-variate regression
#' @param y data.frame time-series of one or more dependent variables
#' @param x data.frame time-series one or more indepedent variables
#' @param freq character to represent frequency, both time-series will be
#' converted to the specified frequency
#' @return list of fits from lm function
#' @export
#' @details \code{y} variables are split and combined with \code{x} variables to
#' apply a lm model on each dataset.
mv_reg <- function(y, x, freq) {

  # lm formula relies on y values being named '.yval', need to rename any
  # potential x columns named '.yval'
  if ('.yval' %in% colnames(x)) {
    warning('.yval found in x column names, attempting to rename to ..yval')
    colnames(x)[which(colnames(x) == '.yval')] <- '..yval'
  }
  y_freq <- change_freq(y, freq)
  x_freq <- change_freq(x, freq)
  y_fill <- return_fill_na(y_freq)
  x_fill <- return_fill_na(x_freq)
  y_omit <- na.omit(y_fill)
  x_omit <- na.omit(x_fill)
  date_start <- max(min(c(y_omit$date, x_omit$date)))
  date_end <- min(max(c(y_omit$date, x_omit$date)))
  y_trunc <- trunc_time_series(y_omit, date_start, date_end)
  x_trunc <- trunc_time_series(x_omit, date_start, date_end)
  y_tidy <- tidy_ret(y_trunc)
  y_tidy$series <- factor(y_tidy$series, unique(y_tidy$series))
  y_split <- split(y_tidy, y_tidy$series)
  y_split_df <- lapply(y_split, untidy_ret)
  models <- lapply(y_split_df, y = x_trunc, merge, by = 'date', all = TRUE)
  named_models <- lapply(models,
                         function(x) {
                           colnames(x)[2] <- '.yval'
                            x})
  fit <- lapply(named_models, function(x) {lm(.yval ~ ., data = x[, -1])})
  return(fit)
}


#' @title Format regression summary table from list of fits
#' @param fit list of fit from \code{lm} see output from \code{mv_reg}
#' @param freq character to represent model frequency
#' @return list with data.frame with numeric and formatted (string) regression output
#' @export
reg_fit_table <- function(fit, freq) {

  asset <- names(fit)
  coeff <- sapply(fit, coefficients)
  coeff_df <- data.frame(asset = asset, t(coeff))
  colnames(coeff_df)[2] <- 'Ann.Resid'
  coeff_df$Ann.Resid <- coeff_df$Ann.Resid * freq_to_scale(freq)
  coeff_arranged <- coeff_df[, c(1, 3:ncol(coeff_df), 2)]
  summ <- lapply(fit, summary)
  adj_r2 <- sapply(summ, '[[', 'adj.r.squared')
  coeff_arranged$adj.r2 <- adj_r2
  fmt_df <- coeff_arranged
  fmt_df[, 2:(ncol(fmt_df) - 2)] <- apply(fmt_df[, 2:(ncol(fmt_df) - 2), drop = FALSE],
                                          2, fNum)
  fmt_df[, (ncol(fmt_df) - 1):ncol(fmt_df)] <- apply(fmt_df[, (ncol(fmt_df) - 1):ncol(fmt_df), drop = FALSE],
                                                     2, fPercent)
  t_val <- sapply(summ, function(x) x$coefficients[, 't value'])
  t_val_df <- data.frame(asset = asset, t(t_val))
  colnames(t_val_df)[2] <- 'Ann.Resid'
  t_val_arranged <- t_val_df[, c(1, 3:ncol(t_val_df), 2)]
  res <- list()
  res$num <- coeff_arranged
  res$fmt <- fmt_df
  res$t_val <- t_val_arranged
  return(res)
}


#' @title Rolling absorption ratio
#' @param ret data.frame containing return time-series
#' @param n_pc number of pc for absorption ratio calculation, see details
#' @param roll_win window for rolling calculation
#' @param cov_win list of windows for covariance calculation, see details
#' @param cov_wgt list of weights for covariance calculation, see details
#' @param delta_win list of weights for standardized change calc, see details
#' @return data.frame with dates, absorption ratio, and standardized change in
#' the absorption ratio.
#' @export
#' @importFrom slide slide
#' @details See 2010 Kritzman, et al paper Principal Components as a Measure of
#' Systemic Risk, https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1582687.
#' The ratio is equal to the proportion of risk explained by the first \code{n_pc}
#' eigenvalues. A higher ration indicates higher systemic risk in the universe of
#' the \code{ret} parameter. The paper uses a weighted covariance calculation with
#' a half life decay, this function slightly differes by using a simple weighted
#' average covariance calculation, see \code{cov_wgt_avg}. There are three
#' covariance weights: long (default 252 observations and 0.5 weight), med (default
#' 63 observations and 0.25 weight), and short (default 21 observations and 0.25 weight).
#' This allows for flexibality, e.g., if you wanted a standard \code{cov} calculation you
#' would set the \code{cov_win$long} to the same number as \code{roll_win} and
#' set the \code{cov_wgt$long} to 1 and the med and short weights to 0. Note
#' the \code{cov_win} and \code{cov_wgt} arguments are lists with the names
#' long, med, and short. The \code{delta_win} calculates a standardized change (zscore)
#' of the rolling absorption ratio defined as (mean of short window - mean of long window) /
#' standard devation of the long window.
#' @examples
#' data(ret)
#' # use standard parameters
#' roll_absorp_ratio(ret)
#'
#' # use regular cov() covariance calculation
#' roll_absorp_ratio(ret, cov_wgt = list(long = 1, med = 0, short = 0))
#'
#' # use 500 day rolling window instead
#' roll_absorp_ratio(ret, roll_win = 500, cov_win = list(long = 500, med = 63, short = 21))
roll_absorp_ratio <- function(ret, n_pc = 2, roll_win = 252,
                              cov_win = list(long = 252, med = 63, short = 21),
                              cov_wgt = list(long = 0.5, med = 0.25, short = 0.25),
                              delta_win = list(long = 252, short = 15)) {

  roll_cov <- slide::slide(ret,
                           ~cov_wgt_avg(.x, cov_win$long, cov_win$med,
                                        cov_win$short, cov_wgt$long,
                                        cov_wgt$med, cov_wgt$short),
                           .before = roll_win, .complete = TRUE)
  roll_ar <- slide::slide(roll_cov,
                          ~absorp_ratio(.x[[1]], n_pc = n_pc),
                          .before = 1)
  roll_ar_vec <- unlist(roll_ar)
  roll_ar_delta <- slide::slide(roll_ar_vec,
                                ~zscore_win(.x, delta_win$short, delta_win$long),
                                .before = roll_win, .complete = TRUE)
  roll_ar_delta_vec <- unlist(roll_ar_delta)
  res <- data.frame(date = ret$date,
                    absorp.ratio = c(rep(NA, 1 + roll_win), roll_ar_vec),
                    absorp.ratio.delta = c(rep(NA, 1 + roll_win + delta_win$long),
                                           roll_ar_delta_vec))
}


#' @title Standardized change with short and long windows
#' @param x numeric vector
#' @param short_win short window
#' @param long_win long window
#' @return numeric vector of standardized change, i.e., short window
#' standardized by long window
#' @export
zscore_win <- function(x, short_win, long_win) {
  (mean(x[1:short_win]) - mean(x[1:long_win])) / sd(x[1:long_win])
}


.zscore <- function(x) {
  x - mean(x, na.rm = TRUE) / sd(x, na.rm = TRUE)
}


#' @title Calculate absorption ratio
#' @param cov_mat estimated covariance matrix
#' @param n_pc number of eigenvalues to use for calculation
#' @details See \code{roll_absorp_ratio} for more details. Also 2010 Kritzman,
#' et al paper Principal Components as a Measure of Systemic Risk,
#' https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1582687.
#' @export
absorp_ratio <- function(cov_mat, n_pc = 2) {

  if (is.null(cov_mat)) {
    return(NULL)
  }
  eig <- svd(cov_mat)
  sum(eig$d[1:n_pc]) / sum(eig$d)
}


#' @title Weighted average covariance matrix
#' @param ret data.frame containing returns
#' @param long_win = long window length
#' @param med_win = medium window length
#' @param short_win = short window length
#' @param long_wgt = weight of long window covariance
#' @param med_wgt = weight of medium window covariance
#' @param short_wgt = weight of short window covariance
#' @note  The short window and weight place a higher emphasis on recent returns,
#' while the longer window and weight place a more neutral weight on recent returns
#' and returns further in the past. This covariance calculation is similar
#' to an exponentially weighted moving average but is less computationally
#' intensive.
#' @export
cov_wgt_avg <- function(ret, long_win = 252, med_win = 63, short_win = 21,
                        long_wgt = 0.25, med_wgt = 0.25, short_wgt = 0.5) {

  ret <- check_time_series(ret)
  ret_ordered <- ret[order(ret$date, decreasing = TRUE), 2:ncol(ret)]
  num_cols <- 2:ncol(ret)
  cov_short <- cov(ret[1:short_win, num_cols]) * short_wgt
  cov_med <- cov(ret[1:med_win, num_cols]) * med_wgt
  cov_long <- cov(ret[1:long_win, num_cols]) * long_wgt
  cov_short + cov_med + cov_long
}


#' @title Principal Component Analysis from the covariance matrix
#' @param cov_mat covariance matrix
#' @return list with latents and coefficients
#' @export
pca_cov <- function(cov_mat) {

  s <- svd(cov_mat)
  latent <- s$d
  raw_coeff <- s$v
  p <- dim(raw_coeff)[1]
  d <- dim(raw_coeff)[2]
  max_index <- max.col(t(abs(raw_coeff)))
  col_sign <- sign(raw_coeff[max_index + seq(from = 0, to = (d - 1) * p, by = p)])
  coeff <- matrix(col_sign, nrow = p, ncol = d, byrow = TRUE) * raw_coeff
  res <- list()
  res$latent <- latent
  res$coeff <- coeff
  return(res)
}


#' @title Heirachical clustering around latents
#' @param ret data.frame contaning return time-series
#' @return hclust result from the distance calculated from PCA
#' @export
#' @examples
#' data(ETF)
#' hclust_res <- pca_hclust(res)
#'
#' # plot dendrogram
#' plot(h_clust_res)
pca_hclust <- function(ret) {

  ret <- check_time_series(ret)
  cor_mat <- cor(ret[, 2:ncol(ret)], use = 'pairwise.complete.obs')
  p <- pca_cov(cor_mat)
  meas <- diag(sqrt(p$latent)) %*% t(p$coeff)
  dist_res <- dist(t(meas), method = 'euclidean')
  hclust(dist_res)
}


#' @title Geometric return
#' @param ret data.frame containing return time-series
#' @param freq character representing frequency
#' @export
geo_ret <- function(ret, freq) {

  ret <- change_freq(ret, freq)
  apply(1 + ret[, 2:ncol(ret), drop = FALSE], 2, prod, na.rm = TRUE)^(freq_to_scale(freq) / nrow(ret)) - 1
}


#' @title Annualized volatility
#' @param ret data.frame containing return time-series
#' @param freq character representing frequency
#' @export
ann_vol <- function(ret, freq) {

  ret <- change_freq(ret, freq)
  apply(ret[, 2:ncol(ret), drop = FALSE], 2, sd) * sqrt(freq_to_scale(freq))
}


#' @title Check specification of y paramter
#' @param y data.frame containing univariate time-series
#' @return error if y is misspecified, otherwise nothing
#' @export
check_y_param <- function(y) {

  if (ncol(y) != 2) {
    stop('y is misspecified, y should be two columns: 1 dates and 2 returns')
  }
}

#' @title Excess mean
#' @param x data.frame containing returns
#' @param y data.frame containing univariate return to net from each \code{x}
#' @param freq character representing common frequency
#' @return data.frame of \code{x} net of \code{y}
#' @export
#' @note the geometric mean of \code{y} is subtracted from the geometric mean
#' of \code{x}
excess_mean <- function(x, y, freq) {

  check_y_param(y)
  x <- change_freq(x, freq)
  y <- change_freq(y, freq)
  date_start <- max(c(min(x$date), min(y$date)))
  date_end <- min(max(c(x$date, y$date)))
  x_trunc <- trunc_time_series(x, date_start, date_end)
  y_trunc <- trunc_time_series(y, date_start, date_end)
  x_mean <- geo_ret(x_trunc, freq)
  y_mean <- geo_ret(y_trunc, freq)
  x_mean - y_mean
}


#' @title Excess return time-series
#' @param x data.frame containing returns
#' @param y data.frame containing univariate return to net from each \code{x}
#' @param freq character representing common frequency
#' @export
#' @return data.frame of \code{x} that is net of \code{y}
excess_ret <- function(x, y, freq) {

  check_y_param(y)
  combo <- combine_time_series(x, y, freq = freq)
  combo[, 2:(ncol(combo) - 1)] <- combo[, 2:(ncol(combo)- 1)] - combo[, ncol(combo)]
  return(combo[, 2:(ncol(combo) - 1)])
}


#' @title Sharpe Ratio
#' @param ret data.frame containing returns
#' @param rf either numeric value representing annual risk-free rate or data.frame
#' containing risk-free time-series
#' @param freq character representing frequency
#' @note Sharpe ratio = (annualized return less annual risk-free rate) / annualized volatility
#' @export
#' @examples
#' data(ETF)
#' data(FF)
#' sharpe_ratio(ret, ff$ff_ret, 'd')
#' sharpe_ratio(ret, 0.02, 'd')
sharpe_ratio <- function(ret, rf, freq) {

  if (is.numeric(rf) & length(ret) == 1) {
    rmrf <- geo_ret(ret, freq) - rf
  } else {
    rmrf <- excess_mean(ret, rf, freq)
  }
  rmrf / ann_vol(ret, freq)
}


#' @title Downside volatility
#' @param ret data.frame containing returns
#' @param mar minimal acceptable return to define downside, defualt is zero
#' @export
#' @details The \code{ret} data.frame is filtered to only include returns
#' below the \code{mar}. The resulting downside volatility is calculated from this
#' data.frame.
down_vol <- function(ret, freq, mar = 0) {

  ret <- check_time_series(ret)
  dvol <- apply(ret[, 2:ncol(ret)], 2, function(x) {sd(x[x < mar], na.rm = TRUE)})
  dvol * sqrt(freq_to_scale(freq))
}


#' @title Sortino ratio
#' @param ret data.frame containing returns
#' @param rf either numeric value representing annual risk-free rate or data.frame
#' containing risk-free time-series
#' @param mar minimal acceptable return to define downside
#' @details Sortino ratio = (annualized return less annual risk-fre rate) / annualized
#' downside volatility.
#' @export
sortino_ratio <- function(ret, rf, freq, mar = 0) {

  ret <- change_freq(ret, freq)
  if (is.numeric(rf) & length(ret) == 1) {
    rmrf <- geo_ret(ret, freq) - rf
  } else {
    rf <- change_freq(rf, freq)
    rmrf <- excess_mean(ret, rf, freq)
  }
  rmrf / down_vol(ret, freq)
}


#' @title Omega ratio
#' @param ret data.frame containing returns
#' @param mar minimal acceptable return to define downside
#' @export
omega_ratio <- function(ret, mar = 0) {

  # TO-DO add black-fisher approximation
  ret <- check_time_series(ret)
  x <- ret[, 2:ncol(ret)]
  avg_hit <- apply(x, 2, function(x, mar) {mean(pmax(x - mar, 0), na.rm = TRUE)}, mar = mar)
  avg_miss <- apply(x, 2, function(x, mar) {mean(pmax(mar - x, 0), na.rm = TRUE)}, mar = mar)
  avg_hit / avg_miss
}


#' @title Value at Risk
#' @param ret data.frame containing return time-series
#' @param p numeric percentile for VaR tail
#' @param type string to specify calculation type: \code{historical},
#' \code{modified}, \code{normal}, or \code{stable}, see details
#' @details \code{type historical} calculates an emperical VaR from the time-series
#' observations. The other methods are parametrical. \code{modified} uses a
#' Cornish-Fisher expansion to consider skew and kurtosis. \code{normal} approximates
#' using a Guassian distribution. \code{stable} approximates using a stable
#' distribution. See \code{fBasics::stableFit} for more info.
#' @export
#' @importFrom fBasics stableFit
#' @importFrom stabledist qstable
val_at_risk <- function(ret, p = 0.05,
                        type = c('historical', 'modified', 'normal', 'stable')) {

  x <- return_fill_na(ret[, 2:ncol(ret)])
  type <- toupper(type[1])
  if (type == 'HISTORICAL') {
    return(apply(x, 2, function(x, p) {sort(x)[ceiling(p * length(x))]}, p = p))
  } else if (type == 'MODIFIED') {
    warning('modified still under construction')
    return(NA)
  } else if (type == 'NORMAL') {
    return(apply(x, 2, function(x, p) {qnorm(p, mean(x, na.rm = TRUE), sd(x, na.rm = TRUE))}))
  } else if (type == 'STABLE') {
    # fits stable parameters with fBasics::stableFit using the Quantile Method
    .quantile_stable(ret[[2]], p = p)
  } else {
    warning('type must be one of historical, modified, normal, or stable')
    return(NA)
  }
}


# utility function to estimate stable quantile
.quantile_stable <- function(x, p = 0.05) {

  fit_object <- fBasics::stableFit(x, doplot = FALSE)
  fit <- fit_object@fit$estimate
  stabledist::qstable(p, fit[1], fit[2], fit[3], fit[4])
}


# utility function to fit a stable distribution and return pdf
.stable_fit <- function(x, ...) {

  fit_object <- fBasics::stableFit(x, ...)
  fit <- fit_object@fit$estimate
  d <- stabledist::dstable(x, fit[1], fit[2], fit[3], fit[4])
  res <- list()
  res$fit_obj <- fit_object
  res$fit <- fit
  res$dstable <- d
  return(res)
}


#' @title Calculate time-series of loss from prior peak
#' @param ret data.frame containing return time-series
#' @export
drawdown <- function(ret) {

  ret <- check_time_series(ret)
  x <- return_fill_na(ret)
  dd_calc <- function(x) {
    wi <- cumprod(x + 1)
    peaks <- cummax(wi)
    wi / peaks - 1
  }
  x[, 2:ncol(x)] <- apply(x[, 2:ncol(ret), drop = FALSE], 2, dd_calc)
  return(x)
}


#' @title Calculate worst peak to trough loss
#' @param ret data.frame containing returns
#' @export
worst_drawdowm <- function(ret) {

  dd <- drawdown(ret)
  apply(dd[, 2:ncol(dd)], 2, min)
}


#' @title Find the worst n drawdowns
#' @param fund data.frame containing time-series of one asset
#' @param n number of drawdowns to find
#' @return with unique drawdowns start, end, and trough dates, and trough value
#' @export
#' @examples
#' data(ETF)
#' worst_n_drawdowns(ret[, 1:2], n = 5)
worst_n_drawdowns <- function(fund, n = 5) {

  dd <- drawdown(fund)
  discrete_dd <- find_drawdowns(dd)
  ordered_dd <- discrete_dd[order(discrete_dd$Drawdown), ]
  ordered_dd[1:n, ]
}


#' @title Find unique drawdowns from drawdown time-series
#' @param dd data.frame containing drawdown time-series of one asset, see \code{drawdown}
#' @return data.frame with unique drawdowns start, end, and trough dates, and trough value
#' @export
#' @examples
#' data(ETF)
#' dd <- drawdown(ret[, 1:2])
#' find_drawdowns(dd)
find_drawdowns <- function(dd) {

  # This function finds the start and end dates of each unique drawdown
  # by laggining the drawdown time-series and checking if the series is transtioning
  # from 0 to negative or negative back to 0.
  # Next the unique drawdowns are placed into a list using mapply with a function
  # that truncates time-series based on the vector of starting and ending dates.
  # Finally finding the minimum of each drawdown list is found and the results
  # are returned in a data.frame

  check_y_param(dd) # make sure dd time-series is univariate
  colnames(dd) <- c('date', 'Drawdown')
  dd$isDown <- dd[, 2] < 0
  dd$isDownLag <- c(NA, dd[1:(nrow(dd) - 1), 'isDown'])
  dd$start <- dd$isDown & !dd$isDownLag # change from 0 to negative signals drawdown start
  dd$end <- !dd$isDown & dd$isDownLag # change from negative back to 0 signals recovery
  start_date <- dd[dd$start, 1]
  end_date <- dd[dd$end, 1]
  # if lengths of start and end dates are the same the time-series ends on
  # a drawdown and we need to adjust last end date to last start date
  if (length(end_date) == length(start_date)) {
    dd_date <- data.frame(StartDate = start_date,
                          EndDate = c(end_date[2:length(end_date)],
                                      start_date[length(start_date)]))
  } else {
    dd_date <- data.frame(StartDate = start_date,
                          EndDate = end_date[2:length(end_date)])
  }
  # create unique list of drawdowns
  dd_list <- mapply(trunc_time_series,
                    ret = list(dd[, 1:2]),
                    date_start = dd_date$StartDate,
                    date_end = dd_date$EndDate,
                    SIMPLIFY = FALSE)
  .get_min_dd <- function(x) {
    # find trough by finding which observation equals the minimum
    indx <- which(x[, 2] == min(x[, 2]))
    # if there are more than one observations equal to the minimum take the first one
    if (length(indx) > 1) {
      indx <- indx[1]
    }
    x[indx, ]
  }
  trough_list <- lapply(dd_list, .get_min_dd)
  trough_df <- do.call(rbind, trough_list)
  res <- data.frame(StartDate = dd_date$StartDate,
                    TroughDate = trough_df$date,
                    EndDate = dd_date$EndDate,
                    Trough = trough_df$Drawdown,
                    DaysToTrough = trough_df$date - dd_date$StartDate,
                    DaysToRecover = dd_date$EndDate - trough_df$date,
                    TotalDays = dd_date$EndDate - dd_date$StartDate)
  colnames(res)[4] <- 'Drawdown'
  return(res)
}


#' @title Tracking error
#' @param fund data.frame containing one or multiple fund returns
#' @param bench data.frame containing returns of one benchmark
#' @param freq character to specify common frequency
#' @return tracking error of each fund relative to benchmark
#' @export
#' @note Tracking error is the standard deviation of the fund minus the benchmark,
#' the tracking error will be annualized by the specified frequency.
tracking_error <- function(fund, bench, freq) {

  er <- excess_ret(fund, bench, freq)
  apply(er[, 2:ncol(er)], 2, sd, na.rm = TRUE) * sqrt(freq_to_scale(freq))
}


#' @title Beta of fund(s) relative to benchmark
#' @param fund data.frame containing one or multiple fund returns
#' @param bench data.frame containing returns of one benchmark
#' @param rf data.frame containing risk-free rate
#' @param freq character to specify common frequency
#' @return beta
#' @export
fund_beta <- function(fund, bench, rf, freq) {

  combo <- combine_time_series(fund, bench, freq = freq)
  er <- excess_ret(combo, rf, freq = freq)
  cov_mat <- cov(er[, 2:ncol(er)], use = 'pairwise.complete.obs')
  cov_mat[, ncol(cov_mat)] / cov_mat[nrow(cov_mat), ncol(cov_mat)]
}


#' @title Correlation of fund(s) relative to benchmark
#' @param fund data.frame containing one or multiple fund returns
#' @param bench data.frame containing returns of one benchmark
#' @param rf data.frame containing risk-free rate
#' @param freq character to specify common frequency
#' @return vector of correlation of each fund to benchmark
#' @export
fund_corr <- function(fund, bench, rf, freq) {

  combo <- combine_time_series(fund, bench, freq = freq)
  er <- excess_ret(combo, rf, freq = freq)
  cor_mat <- cor(er[, 2:ncol(er)], use = 'pairwise.complete.obs')
  cor_mat[, ncol(cor_mat)]
}


#' @title Information ratio
#' @export
info_ratio <- function(fund, bench, freq) {

  er <- excess_mean(fund, bench, freq)
  er / tracking_error(fund, bench, freq)
}


.subset_direction <- function(fund, bench, freq, direct = c('up', 'down')) {

  combo <- combine_time_series(fund, bench, freq = freq)
  if (direct == 'up') {
    keep <- combo[, ncol(combo)] >= 0
  } else if (direct == 'down') {
    keep <- combo[, ncol(combo)] < 0
  } else {
    stop('direct must be up or down')
  }
  combo[keep, ]
}


#' @title Up capture ratio
#' @export
up_capture <- function(fund, bench, freq) {

  up <- .subset_direction(fund, bench, freq, 'up')
  geo_ret(up[, 1:(ncol(up) - 1)], freq) / geo_ret(up[, c(1, ncol(up))], freq)
}


#' @title Down capture ratio
#' @export
down_capture <- function(fund, bench, freq) {

  down <- .subset_direction(fund, bench, freq, 'down')
  geo_ret(down[, 1:(ncol(down) - 1)], freq) / geo_ret(down[, c(1, ncol(down))], freq)
}


#' @export
up_beta <- function(fund, bench, freq) {

  up <- .subset_direction(fund, bench, freq, 'up')
  cov_mat <- cov(up[, 2:ncol(up)], use = 'pairwise.complete.obs')
  cov_mat[, ncol(cov_mat)] / cov_mat[nrow(cov_mat), ncol(cov_mat)]
}


#' @export
down_beta <- function(fund, beta, freq) {

  down <- .subset_direction(fund, bench, freq, 'down')
  cov_mat <- cov(down[, 2:ncol(down)], use = 'pairwise.complete.obs')
  cov_mat[, ncol(cov_mat)] / cov_mat[nrow(cov_mat), ncol(cov_mat)]
}


#' @export
roll_mean <- function(ret, xwin) {

  rl <- slide::slide(ret[, 2:ncol(ret), drop = FALSE],
                     ~colMeans(.x),
                     .before = xwin,
                     .step = 1,
                     .complete = TRUE)
  rdf <- do.call(rbind, rl)
  res <- data.frame(date = ret$date[(xwin + 1):nrow(ret)],
                    rdf)
  return(res)
}


#' @export
roll_excess_mean <- function(x, y, freq, xwin) {

  .er <- function(combo) {
    col_vec <- 2:(ncol(combo) - 1)
    combo[, col_vec] <- combo[, col_vec] - combo[, ncol(combo)]
    return(colMeans(combo[, col_vec, drop = FALSE]))
  }
  combo <- combine_time_series(x, y, freq = freq)
  rl <- slide::slide(combo,
                     ~.er(.x),
                     .before = xwin,
                     .step = 1,
                     .complete = TRUE)
  rdf <- do.call(rbind, rl)
  res <- data.frame(date = combo$date[(xwin + 1):nrow(combo)],
                    rdf)
  return(res)
}


#' @export
roll_vol <- function(ret, xwin, freq = NULL) {

  .col_sds <- function(x) {
    apply(x, 2, sd, na.rm = TRUE)
  }
  rl <- slide::slide(ret[, 2:ncol(ret), drop = FALSE],
                     ~.col_sds(.x),
                     .before = xwin,
                     .complete = TRUE)
  rdf <- do.call(rbind, rl)
  if (!is.null(freq)) {
    rdf <- rdf * sqrt(freq_to_scale(freq))
  }
  res <- data.frame(date = ret$date[(xwin + 1):nrow(ret)],
                    rdf)
  return(res)
}


#' @export
roll_beta <- function(fund, bench, freq, xwin) {

  .beta <- function(x) {
    cov_mat <- cov(x, use = 'pairwise.complete.obs')
    cov_mat[1:(nrow(cov_mat) - 1), ncol(cov_mat)] / cov_mat[nrow(cov_mat), ncol(cov_mat)]
  }
  x <- combine_time_series(fund, bench, freq = freq)
  rl <- slide::slide(x[, 2:ncol(x)],
                     ~.beta(.x),
                     .before = xwin,
                     .step = 1,
                     .complete = TRUE)
  rdf <- do.call(rbind, rl)
  res <- data.frame(date = x$date[(xwin + 1):nrow(x)],
                    rdf)
  return(res)
}


#' @title Portfolio rebalance
#' @param ret data.frame containing return time-series
#' @param ret_freq character to represent frequency of \code{ret}
#' @param wgt rebalance weights, see details
#' @param reb_freq rebalance frequency, see detials
#' @param date_start starting date to truncate portfolio returns
#' @param date_end ending date to truncate portfolio returns
#' @param t_cost optional numeric vector to specify transaction costs, see details
#' @return list containing portfolio returns, asset returns, and asset weights
#' @export
#' @details The rebalance weight parameter \code{wgt} can be entered as a
#' numeric vector of weights that correspond to each asset in \code{ret} or
#' a data.frame containing a time-series of weights with dates in the first
#' column similar to \code{ret}. If a numeric vector is entered \code{reb_freq}
#' will specify how often the portfolio is rebalanced back to the target weights.
#' If \code{reb_freq} is left as \code{NA} then a buy-and-hold strategy is assumed
#' with no rebalances. If \code{t_cost} is a single value then it's assumed all
#' assets have the same transaction cost. Otherwise a numeric vector that
#' corresponds to each asset can be specified. Finally, note that if \code{wgt}
#' is left as \code{NA} an equal weight portfolio will be assumed.
rebal <- function(ret, ret_freq, wgt = NA, reb_freq = NA, date_start = NA,
                  date_end = Sys.Date(), t_cost = 0) {

  n_assets <- ncol(ret) - 1
  if (all(is.na(wgt))) {
    wgt <- rep(1 / n_assets, n_assets)
  }
  if (is.null(nrow(wgt))) {
    wgt <- auto_reb_wgt(wgt, reb_freq, date_start, date_end)
  }
  if (length(t_cost) == 1) {
    t_cost <- rep(t_cost, length(n_assets))
  }
  ret <- change_freq(ret, ret_freq)
  ret <- return_fill_na(ret)
  n_obs <- nrow(ret)
  wgt <- check_time_series(wgt)
  wgt <- return_fill_na(wgt)
  date_vec <- wgt$date
  ret_mat <- as.matrix(ret[, 2:ncol(ret)])
  wgt_mat <- as.matrix(wgt[, 2:ncol(wgt)])
  init_cap <- 100
  asset_index <- matrix(0, nrow = n_obs + 1, ncol = n_assets)
  asset_index[1, ] <- wgt_mat[1, ] * init_cap
  port_index <- matrix(0, nrow = n_obs + 1, ncol = 1)
  port_index[1, 1] <- init_cap
  j_reb <- 1 # rebalance date counter
  for(i_obs in 1:n_obs) {
    # are we at a rebalance date?
    is_rebal_date <- ret$date[i_obs] >= wgt$date[j_reb]
    if (is.na(is_rebal_date)) {
      is_rebal_date <- FALSE
    }
    # if we are at a rebalance date, rebalance at beginning of day i_obs
    if (is_rebal_date) {
      adj_w <- wgt_mat[j_reb, ] - t_cost
      asset_index[i_obs, ] <- adj_w * port_index[i_obs, ]
      j_reb <- j_reb + 1
    }
    asset_index[i_obs + 1, ] <- asset_index[i_obs, ] * (1 + ret_mat[i_obs, ])
    pnl <- sum(asset_index[i_obs + 1, ]) - sum(asset_index[i_obs, ])
    port_index[i_obs + 1, 1] <- port_index[i_obs, 1] + pnl
  }
  hist_wgt_df <- data.frame(date = c(ret$date[1] - 1, ret$date),
                            asset_index / apply(asset_index, 1, sum))
  port_index_df <- data.frame(date = c(ret$date[1] - 1, ret$date),
                              port_index)
  asset_index_df <- data.frame(date = c(ret$date[1] - 1, ret$date),
                               asset_index)
  colnames(asset_index_df) <- c('date', colnames(ret)[2:ncol(ret)])
  port_ret <- price_to_ret(port_index_df)
  last_wgt <- hist_wgt_df[nrow(hist_wgt_df), 2:ncol(hist_wgt_df)]
  curr_df <- data.frame(asset = colnames(ret)[2:ncol(ret)],
                        wgt = as.numeric(last_wgt))
  res <- list()
  res$port_ret <- port_ret
  res$port_index <- port_index_df
  res$curr_df <- curr_df
  res$asset_ret <- price_to_ret(asset_index_df)
  res$asset_index <- asset_index_df
  res$hist_wgt <- hist_wgt_df
  return(res)
}


#' @title Populate rebalance weight time-series from a vector of weights
#' @param reb_wgt numeric vector of rebalance weights
#' @param freq rebalance frequency
#' @param date_start starting date of rebalance weight time-series
#' @param date_end ending date of rebalance weight time-series
#' @return data.frame containing time-series of rebalance weights
#' @seealso \code{rebal}
#' @export
auto_reb_wgt <- function(reb_wgt, freq = NA, date_start = NA,
                         date_end = Sys.Date()) {

  if (is.na(date_start)) {
    print('date_start passed as NA, setting to 1970-01-01')
    date_start <- as.Date('1970-01-01')
  }
  if (is.na(freq)) {
    print('freq passed as NA, returning reb_wgt as data.frame')
    return(data.frame(date = date_start, reb_wgt))
  }
  if (class(reb_wgt) != 'numeric') {
    warning('reb_wgt_vec is not numeric, attempting to force with as.numeric')
    reb_wgt <- as.numeric(reb_wgt)
  }
  day_vec <- seq.Date(from = date_start, to = date_end, by = 'days')
  reb_wgt_mat <- matrix(reb_wgt, nrow = length(day_vec), ncol = length(reb_wgt),
                        byrow = TRUE)
  reb_wgt_df <- data.frame(date = day_vec, reb_wgt_mat)
  change_freq(reb_wgt_df, freq, 'price')
}


#' @title Asset contribution to portfolio return
#' @param p result of function \code{rebal()}, see \code{?rebal} for more details
#' @param date_start starting date for return contribtution, default \code{NULL}
#' will use entire portfolio time-series
#' @param date_end ending date for return contribution
#' @return data.frame with asset contribution to return
#' @seealso rebal
#' @details \code{p} is the output of a rebal function which returns a list of
#' portfolio returns, asset returns, and historical weights. These time-series
#' are used to calculate each asset's contribution to the total portfolio
#' return over a given time-period. The resulting data.frame with the asset's
#' contribution to return will have a residual value as well which is the result
#' of compounding effects and transaction costs.
#' @export
#' @examples
#' data(ETF)
#' p <- rebal(ret, 'd')
#' contr_to_ret(p)
contr_to_ret <- function(p, date_start = NULL, date_end = Sys.Date()) {

  if (is.null(p$asset_index)) {
    stop('p must be the result of the rebal() function, see ?rebal')
  }
  if (is.null(date_start)) {
    date_start <- p$asset_index$date[1]
  }
  freq <- guess_freq(p$port_ret)
  if (freq == 'could not guess frequency') {
    stop('port_ret does not have regular frequency')
  }
  index_start <- seq.Date(date_start, length.out = 2, by = paste0('-1 ', freq_to_string(freq)))[2]
  before_start <- p$asset_index$date >= date_start
  if (all(before_start, FALSE)) {
    warning('asset last date is before date_start')
    na_df <- data.frame(asset = c(colnames(p$asset_index)[2:ncol(p$asset_index)], 'residual', 'total'),
                        contr.to.ret = rep(NA, nrow(p$curr_df) + 2))
    return(na_df)
  }
  if (min(p$asset_index$date) > date_start) {
    warning('asset inception is after date_start')
    na_df <- data.frame(asset = c(colnames(p$asset_index)[2:ncol(p$asset_index)], 'residual', 'total'),
                        contr.to.ret = rep(NA, nrow(p$curr_df) + 2))
    return(na_df)
  }
  asset_index <- trunc_time_series(p$asset_index, index_start, date_end)
  asset_ret <- trunc_time_series(p$asset_ret, date_start, date_end)
  port_index <- trunc_time_series(p$port_index, date_start, date_end)
  contr_mat <- asset_index[1:(nrow(asset_index) - 1), 2:ncol(asset_index)] *
    asset_ret[, 2:ncol(asset_ret)]
  contr <- apply(contr_mat, 2, sum, na.rm = TRUE) / port_index$port_index[1]
  total_ret <- port_index$port_index[nrow(port_index)] / port_index$port_index[1] - 1
  resid <- total_ret - sum(contr)
  res <- data.frame(asset = c(colnames(p$asset_index)[2:ncol(p$asset_index)], 'residual', 'total'),
                    contr.to.ret = c(as.numeric(contr), resid, total_ret))
  return(res)
}


init_cla <- function(mu_vec, up_bound = 1, low_bound = 0) {

  n_assets <- nrow(mu_vec)
  up_bound_vec <- matrix(up_bound, ncol = 1, nrow = n_assets)
  low_bound_vec <- matrix(low_bound, ncol = 1, nrow = n_assets)
  if (sum(low_bound_vec) > 1) {
    stop('sum of lower bound weights is greater than 1')
  }
  mu_vec_rank <- n_assets - rank(mu_vec) + 1
  wgt_vec <- low_bound_vec
  remain_wgt <- 1 - sum(wgt_vec)
  for (i in 1:length(mu_vec)) {
    max_mu_idx <- which(mu_vec_rank == i)
    if (remain_wgt == 0) {
      break
    }
    up_bound_i <- up_bound_vec[max_mu_idx, 1]
    add_wgt <- up_bound_i - wgt_vec[max_mu_idx]
    if (add_wgt <= remain_wgt) {
      remain_wgt <- remain_wgt - (up_bound_i - wgt_vec[max_mu_idx])
      wgt_vec[max_mu_idx] <- up_bound_i
    } else {
      wgt_vec[max_mu_idx] <- wgt_vec[max_mu_idx] + remain_wgt
      remain_wgt <- 0
    }
  }
  res <- list()
  res$wgt_vec <- wgt_vec
  res$i_free <- max_mu_idx
  res$wgt_vec <- wgt_vec
  return(res)
}

sub_mat <- function(mu_vec, cov_mat, f) {

  res <- list()
  res$cov_f <- cov_mat[f, f]
  res$mu_f <- mu_vec[f, 1]
  res$one_f <- matrix(1, nrow = length(f), ncol = 1)
  return(res)
}

crit_line_algo <- function(mu_vec, cov_mat, up_bound = 1, low_bound = 0) {

  mu_vec <- matrix(mu_vec, ncol = 1)
  n_assets <- nrow(mu_vec)
  init_dat <- init_cla(mu_vec)
  is_free <- matrix(FALSE, nrow = 16, ncol = 1)
  is_free[init_dat$i_free] <- TRUE
  which_is_free <- which(is_free)
  is_bound <- !is_free
  which_is_bound <- which(is_bound)
  lambda_current <- Inf
  i_inside <- 0
  i_outside <- 0
  t <- 1
  wgt_mat <- list()
  wgt_vec <- init_dat$wgt_vec
  wgt_mat[[1]] <- wgt_vec

  for (t in 1:1000) {
    # Case a) Free asset moves to its bound
    if (length(which_is_free) > 1) {
      lambda_vec_in <- rep(NA, length(which_is_free))
      s <- sub_mat(mu_vec, cov_mat, free_i)
      cov_f_inv <- MASS::ginv(s$cov_f)
      for (i in 1:length(which_is_free)) {
        c_i <- -(t(s$one_f) %*% cov_f_inv %*% s$one_f)[1] * (cov_f_inv %*% s$mu_f)[i] +
          (t(s$one_f) %*% cov_f_inv %*% s$mu_f)[1] * (cov_f_inv %*% s$one_f)[i]
        lambda_i <- (cov_f_inv %*% s$one_f)[i] / c_i
        lambda_vec_in[i] <- lambda_i
      }
      i_inside <- which(lambda_vec_in == max(lambda_vec_in[lambda_vec_in < lambda_current]))
    }

    # Case b) Asset on its bound becomes free
    if (sum(is_free) < n_assets) {
      lambda_vec_out <- rep(NA, length(which_is_bound))
      free_i <- which_is_free
      for (i in 1:length(which_is_bound)) {
        free_i <- c(free_i, which_is_bound[i])
        s <- sub_mat(mu_vec, cov_mat, free_i)
        cov_f_inv <- MASS::ginv(s$cov_f)
        c_i <- -(t(s$one_f) %*% cov_f_inv %*% s$one_f)[1] * (cov_f_inv %*% s$mu_f)[i] +
          (t(s$one_f) %*% cov_f_inv %*% s$mu_f)[1] * (cov_f_inv %*% s$one_f)[i]
        lambda_i <- (cov_f_inv %*% s$one_f)[i] / c_i
        lambda_vec_out[i] <- lambda_i
      }
      i_outside <- which(lambda_vec_out == max(lambda_vec_out[lambda_vec_out < lambda_current]))
    }

    # Find turning point by comparing cases
    if (length(i_inside) == 0) {
      i_inside <- 0
    }
    if (length(i_outside) == 0) {
      i_outside <- 0
    }
    if (i_inside != 0 | i_outside != 0) {
      t <- t + 1
      if (i_inside != 0) {
        lambda_inside <- lambda_vec_in[i_inside]
      } else {
        lambda_inside <- NA
      }
      if (i_outside != 0) {
        lambda_outside <- lambda_vec_out[i_outside]
      } else {
        lambda_outside <- NA
      }
      lambda_current <- max(lambda_inside, lambda_outside, na.rm = TRUE)
      is_lambda_in_max <- lambda_inside == lambda_current
      if (is.na(is_lambda_in_max)) {
        is_lambda_in_max <- FALSE
      }
      if (is_lambda_in_max) {
        is_free[i_inside] <- FALSE
      } else {
        is_free[i_outside] <- TRUE
      }
      which_is_free <- which(is_free)
      s <- sub_mat(mu_vec, cov_mat, which_is_free)
      cov_f_inv <- MASS::ginv(s$cov_f)
      gam <- 1 / (t(s$one_f) %*% cov_f_inv %*% s$one_f) -
        ((t(s$one_f) %*% cov_f_inv %*% s$mu_f) / (t(s$one_f) %*% cov_f_inv %*% s$one_f)) *
        lambda_current
      wgt_f <- lambda_current * cov_f_inv %*% s$mu_f + gam[1] * cov_f_inv %*% s$one_f
      wgt_vec[which_is_free] <- wgt_f
      wgt_mat[[t]] <- wgt_vec
    }
    if (i_inside == 0 & i_outside == 0) {
      break
    }
  }
  return(wgt_mat)
}
alejandro-sotolongo/InvestmentSuite documentation built on Jan. 19, 2020, 5:20 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
alejandro-sotolongo/InvestmentSuite
Investment Asset and Portfolio Time-Series Analysis

R/QuantAnalysis.R
In alejandro-sotolongo/InvestmentSuite: Investment Asset and Portfolio Time-Series Analysis

R Package Documentation

Browse R Packages

We want your feedback!

alejandro-sotolongo/InvestmentSuite Investment Asset and Portfolio Time-Series Analysis

R/QuantAnalysis.R In alejandro-sotolongo/InvestmentSuite: Investment Asset and Portfolio Time-Series Analysis

R Package Documentation

Browse R Packages

We want your feedback!

alejandro-sotolongo/InvestmentSuite
Investment Asset and Portfolio Time-Series Analysis

R/QuantAnalysis.R
In alejandro-sotolongo/InvestmentSuite: Investment Asset and Portfolio Time-Series Analysis