#' @title Wrapper for running linear regression
#' @param y data.frame of time-series of depedent variables
#' @param x data.frame of time-series of indepedent variables
#' @param freq model frequency
#' @param net_y boolean vector to net risk-free from each \code{y} variable
#' @param net_x boolean vector to net risk-free from each \code{x} variable
#' @return list with regression output
#' @export
run_reg <- function(y, x, rf, freq, net_y = TRUE, net_x = FALSE) {
y <- excess_ret_bool(y, rf, freq, net_y)
x <- excess_ret_bool(x, rf, freq, net_x)
fit <- mv_reg(y, x, freq)
res <- reg_fit_table(fit, freq)
res$fit <- fit
return(res)
}
#' @title Multi-variate regression
#' @param y data.frame time-series of one or more dependent variables
#' @param x data.frame time-series one or more indepedent variables
#' @param freq character to represent frequency, both time-series will be
#' converted to the specified frequency
#' @return list of fits from lm function
#' @export
#' @details \code{y} variables are split and combined with \code{x} variables to
#' apply a lm model on each dataset.
mv_reg <- function(y, x, freq) {
# lm formula relies on y values being named '.yval', need to rename any
# potential x columns named '.yval'
if ('.yval' %in% colnames(x)) {
warning('.yval found in x column names, attempting to rename to ..yval')
colnames(x)[which(colnames(x) == '.yval')] <- '..yval'
}
y_freq <- change_freq(y, freq)
x_freq <- change_freq(x, freq)
y_fill <- return_fill_na(y_freq)
x_fill <- return_fill_na(x_freq)
y_omit <- na.omit(y_fill)
x_omit <- na.omit(x_fill)
date_start <- max(min(c(y_omit$date, x_omit$date)))
date_end <- min(max(c(y_omit$date, x_omit$date)))
y_trunc <- trunc_time_series(y_omit, date_start, date_end)
x_trunc <- trunc_time_series(x_omit, date_start, date_end)
y_tidy <- tidy_ret(y_trunc)
y_tidy$series <- factor(y_tidy$series, unique(y_tidy$series))
y_split <- split(y_tidy, y_tidy$series)
y_split_df <- lapply(y_split, untidy_ret)
models <- lapply(y_split_df, y = x_trunc, merge, by = 'date', all = TRUE)
named_models <- lapply(models,
function(x) {
colnames(x)[2] <- '.yval'
x})
fit <- lapply(named_models, function(x) {lm(.yval ~ ., data = x[, -1])})
return(fit)
}
#' @title Format regression summary table from list of fits
#' @param fit list of fit from \code{lm} see output from \code{mv_reg}
#' @param freq character to represent model frequency
#' @return list with data.frame with numeric and formatted (string) regression output
#' @export
reg_fit_table <- function(fit, freq) {
asset <- names(fit)
coeff <- sapply(fit, coefficients)
coeff_df <- data.frame(asset = asset, t(coeff))
colnames(coeff_df)[2] <- 'Ann.Resid'
coeff_df$Ann.Resid <- coeff_df$Ann.Resid * freq_to_scale(freq)
coeff_arranged <- coeff_df[, c(1, 3:ncol(coeff_df), 2)]
summ <- lapply(fit, summary)
adj_r2 <- sapply(summ, '[[', 'adj.r.squared')
coeff_arranged$adj.r2 <- adj_r2
fmt_df <- coeff_arranged
fmt_df[, 2:(ncol(fmt_df) - 2)] <- apply(fmt_df[, 2:(ncol(fmt_df) - 2), drop = FALSE],
2, fNum)
fmt_df[, (ncol(fmt_df) - 1):ncol(fmt_df)] <- apply(fmt_df[, (ncol(fmt_df) - 1):ncol(fmt_df), drop = FALSE],
2, fPercent)
t_val <- sapply(summ, function(x) x$coefficients[, 't value'])
t_val_df <- data.frame(asset = asset, t(t_val))
colnames(t_val_df)[2] <- 'Ann.Resid'
t_val_arranged <- t_val_df[, c(1, 3:ncol(t_val_df), 2)]
res <- list()
res$num <- coeff_arranged
res$fmt <- fmt_df
res$t_val <- t_val_arranged
return(res)
}
#' @title Rolling absorption ratio
#' @param ret data.frame containing return time-series
#' @param n_pc number of pc for absorption ratio calculation, see details
#' @param roll_win window for rolling calculation
#' @param cov_win list of windows for covariance calculation, see details
#' @param cov_wgt list of weights for covariance calculation, see details
#' @param delta_win list of weights for standardized change calc, see details
#' @return data.frame with dates, absorption ratio, and standardized change in
#' the absorption ratio.
#' @export
#' @importFrom slide slide
#' @details See 2010 Kritzman, et al paper Principal Components as a Measure of
#' Systemic Risk, https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1582687.
#' The ratio is equal to the proportion of risk explained by the first \code{n_pc}
#' eigenvalues. A higher ration indicates higher systemic risk in the universe of
#' the \code{ret} parameter. The paper uses a weighted covariance calculation with
#' a half life decay, this function slightly differes by using a simple weighted
#' average covariance calculation, see \code{cov_wgt_avg}. There are three
#' covariance weights: long (default 252 observations and 0.5 weight), med (default
#' 63 observations and 0.25 weight), and short (default 21 observations and 0.25 weight).
#' This allows for flexibality, e.g., if you wanted a standard \code{cov} calculation you
#' would set the \code{cov_win$long} to the same number as \code{roll_win} and
#' set the \code{cov_wgt$long} to 1 and the med and short weights to 0. Note
#' the \code{cov_win} and \code{cov_wgt} arguments are lists with the names
#' long, med, and short. The \code{delta_win} calculates a standardized change (zscore)
#' of the rolling absorption ratio defined as (mean of short window - mean of long window) /
#' standard devation of the long window.
#' @examples
#' data(ret)
#' # use standard parameters
#' roll_absorp_ratio(ret)
#'
#' # use regular cov() covariance calculation
#' roll_absorp_ratio(ret, cov_wgt = list(long = 1, med = 0, short = 0))
#'
#' # use 500 day rolling window instead
#' roll_absorp_ratio(ret, roll_win = 500, cov_win = list(long = 500, med = 63, short = 21))
roll_absorp_ratio <- function(ret, n_pc = 2, roll_win = 252,
cov_win = list(long = 252, med = 63, short = 21),
cov_wgt = list(long = 0.5, med = 0.25, short = 0.25),
delta_win = list(long = 252, short = 15)) {
roll_cov <- slide::slide(ret,
~cov_wgt_avg(.x, cov_win$long, cov_win$med,
cov_win$short, cov_wgt$long,
cov_wgt$med, cov_wgt$short),
.before = roll_win, .complete = TRUE)
roll_ar <- slide::slide(roll_cov,
~absorp_ratio(.x[[1]], n_pc = n_pc),
.before = 1)
roll_ar_vec <- unlist(roll_ar)
roll_ar_delta <- slide::slide(roll_ar_vec,
~zscore_win(.x, delta_win$short, delta_win$long),
.before = roll_win, .complete = TRUE)
roll_ar_delta_vec <- unlist(roll_ar_delta)
res <- data.frame(date = ret$date,
absorp.ratio = c(rep(NA, 1 + roll_win), roll_ar_vec),
absorp.ratio.delta = c(rep(NA, 1 + roll_win + delta_win$long),
roll_ar_delta_vec))
}
#' @title Standardized change with short and long windows
#' @param x numeric vector
#' @param short_win short window
#' @param long_win long window
#' @return numeric vector of standardized change, i.e., short window
#' standardized by long window
#' @export
zscore_win <- function(x, short_win, long_win) {
(mean(x[1:short_win]) - mean(x[1:long_win])) / sd(x[1:long_win])
}
.zscore <- function(x) {
x - mean(x, na.rm = TRUE) / sd(x, na.rm = TRUE)
}
#' @title Calculate absorption ratio
#' @param cov_mat estimated covariance matrix
#' @param n_pc number of eigenvalues to use for calculation
#' @details See \code{roll_absorp_ratio} for more details. Also 2010 Kritzman,
#' et al paper Principal Components as a Measure of Systemic Risk,
#' https://papers.ssrn.com/sol3/papers.cfm?abstract_id=1582687.
#' @export
absorp_ratio <- function(cov_mat, n_pc = 2) {
if (is.null(cov_mat)) {
return(NULL)
}
eig <- svd(cov_mat)
sum(eig$d[1:n_pc]) / sum(eig$d)
}
#' @title Weighted average covariance matrix
#' @param ret data.frame containing returns
#' @param long_win = long window length
#' @param med_win = medium window length
#' @param short_win = short window length
#' @param long_wgt = weight of long window covariance
#' @param med_wgt = weight of medium window covariance
#' @param short_wgt = weight of short window covariance
#' @note The short window and weight place a higher emphasis on recent returns,
#' while the longer window and weight place a more neutral weight on recent returns
#' and returns further in the past. This covariance calculation is similar
#' to an exponentially weighted moving average but is less computationally
#' intensive.
#' @export
cov_wgt_avg <- function(ret, long_win = 252, med_win = 63, short_win = 21,
long_wgt = 0.25, med_wgt = 0.25, short_wgt = 0.5) {
ret <- check_time_series(ret)
ret_ordered <- ret[order(ret$date, decreasing = TRUE), 2:ncol(ret)]
num_cols <- 2:ncol(ret)
cov_short <- cov(ret[1:short_win, num_cols]) * short_wgt
cov_med <- cov(ret[1:med_win, num_cols]) * med_wgt
cov_long <- cov(ret[1:long_win, num_cols]) * long_wgt
cov_short + cov_med + cov_long
}
#' @title Principal Component Analysis from the covariance matrix
#' @param cov_mat covariance matrix
#' @return list with latents and coefficients
#' @export
pca_cov <- function(cov_mat) {
s <- svd(cov_mat)
latent <- s$d
raw_coeff <- s$v
p <- dim(raw_coeff)[1]
d <- dim(raw_coeff)[2]
max_index <- max.col(t(abs(raw_coeff)))
col_sign <- sign(raw_coeff[max_index + seq(from = 0, to = (d - 1) * p, by = p)])
coeff <- matrix(col_sign, nrow = p, ncol = d, byrow = TRUE) * raw_coeff
res <- list()
res$latent <- latent
res$coeff <- coeff
return(res)
}
#' @title Heirachical clustering around latents
#' @param ret data.frame contaning return time-series
#' @return hclust result from the distance calculated from PCA
#' @export
#' @examples
#' data(ETF)
#' hclust_res <- pca_hclust(res)
#'
#' # plot dendrogram
#' plot(h_clust_res)
pca_hclust <- function(ret) {
ret <- check_time_series(ret)
cor_mat <- cor(ret[, 2:ncol(ret)], use = 'pairwise.complete.obs')
p <- pca_cov(cor_mat)
meas <- diag(sqrt(p$latent)) %*% t(p$coeff)
dist_res <- dist(t(meas), method = 'euclidean')
hclust(dist_res)
}
#' @title Geometric return
#' @param ret data.frame containing return time-series
#' @param freq character representing frequency
#' @export
geo_ret <- function(ret, freq) {
ret <- change_freq(ret, freq)
apply(1 + ret[, 2:ncol(ret), drop = FALSE], 2, prod, na.rm = TRUE)^(freq_to_scale(freq) / nrow(ret)) - 1
}
#' @title Annualized volatility
#' @param ret data.frame containing return time-series
#' @param freq character representing frequency
#' @export
ann_vol <- function(ret, freq) {
ret <- change_freq(ret, freq)
apply(ret[, 2:ncol(ret), drop = FALSE], 2, sd) * sqrt(freq_to_scale(freq))
}
#' @title Check specification of y paramter
#' @param y data.frame containing univariate time-series
#' @return error if y is misspecified, otherwise nothing
#' @export
check_y_param <- function(y) {
if (ncol(y) != 2) {
stop('y is misspecified, y should be two columns: 1 dates and 2 returns')
}
}
#' @title Excess mean
#' @param x data.frame containing returns
#' @param y data.frame containing univariate return to net from each \code{x}
#' @param freq character representing common frequency
#' @return data.frame of \code{x} net of \code{y}
#' @export
#' @note the geometric mean of \code{y} is subtracted from the geometric mean
#' of \code{x}
excess_mean <- function(x, y, freq) {
check_y_param(y)
x <- change_freq(x, freq)
y <- change_freq(y, freq)
date_start <- max(c(min(x$date), min(y$date)))
date_end <- min(max(c(x$date, y$date)))
x_trunc <- trunc_time_series(x, date_start, date_end)
y_trunc <- trunc_time_series(y, date_start, date_end)
x_mean <- geo_ret(x_trunc, freq)
y_mean <- geo_ret(y_trunc, freq)
x_mean - y_mean
}
#' @title Excess return time-series
#' @param x data.frame containing returns
#' @param y data.frame containing univariate return to net from each \code{x}
#' @param freq character representing common frequency
#' @export
#' @return data.frame of \code{x} that is net of \code{y}
excess_ret <- function(x, y, freq) {
check_y_param(y)
combo <- combine_time_series(x, y, freq = freq)
combo[, 2:(ncol(combo) - 1)] <- combo[, 2:(ncol(combo)- 1)] - combo[, ncol(combo)]
return(combo[, 2:(ncol(combo) - 1)])
}
#' @title Sharpe Ratio
#' @param ret data.frame containing returns
#' @param rf either numeric value representing annual risk-free rate or data.frame
#' containing risk-free time-series
#' @param freq character representing frequency
#' @note Sharpe ratio = (annualized return less annual risk-free rate) / annualized volatility
#' @export
#' @examples
#' data(ETF)
#' data(FF)
#' sharpe_ratio(ret, ff$ff_ret, 'd')
#' sharpe_ratio(ret, 0.02, 'd')
sharpe_ratio <- function(ret, rf, freq) {
if (is.numeric(rf) & length(ret) == 1) {
rmrf <- geo_ret(ret, freq) - rf
} else {
rmrf <- excess_mean(ret, rf, freq)
}
rmrf / ann_vol(ret, freq)
}
#' @title Downside volatility
#' @param ret data.frame containing returns
#' @param mar minimal acceptable return to define downside, defualt is zero
#' @export
#' @details The \code{ret} data.frame is filtered to only include returns
#' below the \code{mar}. The resulting downside volatility is calculated from this
#' data.frame.
down_vol <- function(ret, freq, mar = 0) {
ret <- check_time_series(ret)
dvol <- apply(ret[, 2:ncol(ret)], 2, function(x) {sd(x[x < mar], na.rm = TRUE)})
dvol * sqrt(freq_to_scale(freq))
}
#' @title Sortino ratio
#' @param ret data.frame containing returns
#' @param rf either numeric value representing annual risk-free rate or data.frame
#' containing risk-free time-series
#' @param mar minimal acceptable return to define downside
#' @details Sortino ratio = (annualized return less annual risk-fre rate) / annualized
#' downside volatility.
#' @export
sortino_ratio <- function(ret, rf, freq, mar = 0) {
ret <- change_freq(ret, freq)
if (is.numeric(rf) & length(ret) == 1) {
rmrf <- geo_ret(ret, freq) - rf
} else {
rf <- change_freq(rf, freq)
rmrf <- excess_mean(ret, rf, freq)
}
rmrf / down_vol(ret, freq)
}
#' @title Omega ratio
#' @param ret data.frame containing returns
#' @param mar minimal acceptable return to define downside
#' @export
omega_ratio <- function(ret, mar = 0) {
# TO-DO add black-fisher approximation
ret <- check_time_series(ret)
x <- ret[, 2:ncol(ret)]
avg_hit <- apply(x, 2, function(x, mar) {mean(pmax(x - mar, 0), na.rm = TRUE)}, mar = mar)
avg_miss <- apply(x, 2, function(x, mar) {mean(pmax(mar - x, 0), na.rm = TRUE)}, mar = mar)
avg_hit / avg_miss
}
#' @title Value at Risk
#' @param ret data.frame containing return time-series
#' @param p numeric percentile for VaR tail
#' @param type string to specify calculation type: \code{historical},
#' \code{modified}, \code{normal}, or \code{stable}, see details
#' @details \code{type historical} calculates an emperical VaR from the time-series
#' observations. The other methods are parametrical. \code{modified} uses a
#' Cornish-Fisher expansion to consider skew and kurtosis. \code{normal} approximates
#' using a Guassian distribution. \code{stable} approximates using a stable
#' distribution. See \code{fBasics::stableFit} for more info.
#' @export
#' @importFrom fBasics stableFit
#' @importFrom stabledist qstable
val_at_risk <- function(ret, p = 0.05,
type = c('historical', 'modified', 'normal', 'stable')) {
x <- return_fill_na(ret[, 2:ncol(ret)])
type <- toupper(type[1])
if (type == 'HISTORICAL') {
return(apply(x, 2, function(x, p) {sort(x)[ceiling(p * length(x))]}, p = p))
} else if (type == 'MODIFIED') {
warning('modified still under construction')
return(NA)
} else if (type == 'NORMAL') {
return(apply(x, 2, function(x, p) {qnorm(p, mean(x, na.rm = TRUE), sd(x, na.rm = TRUE))}))
} else if (type == 'STABLE') {
# fits stable parameters with fBasics::stableFit using the Quantile Method
.quantile_stable(ret[[2]], p = p)
} else {
warning('type must be one of historical, modified, normal, or stable')
return(NA)
}
}
# utility function to estimate stable quantile
.quantile_stable <- function(x, p = 0.05) {
fit_object <- fBasics::stableFit(x, doplot = FALSE)
fit <- fit_object@fit$estimate
stabledist::qstable(p, fit[1], fit[2], fit[3], fit[4])
}
# utility function to fit a stable distribution and return pdf
.stable_fit <- function(x, ...) {
fit_object <- fBasics::stableFit(x, ...)
fit <- fit_object@fit$estimate
d <- stabledist::dstable(x, fit[1], fit[2], fit[3], fit[4])
res <- list()
res$fit_obj <- fit_object
res$fit <- fit
res$dstable <- d
return(res)
}
#' @title Calculate time-series of loss from prior peak
#' @param ret data.frame containing return time-series
#' @export
drawdown <- function(ret) {
ret <- check_time_series(ret)
x <- return_fill_na(ret)
dd_calc <- function(x) {
wi <- cumprod(x + 1)
peaks <- cummax(wi)
wi / peaks - 1
}
x[, 2:ncol(x)] <- apply(x[, 2:ncol(ret), drop = FALSE], 2, dd_calc)
return(x)
}
#' @title Calculate worst peak to trough loss
#' @param ret data.frame containing returns
#' @export
worst_drawdowm <- function(ret) {
dd <- drawdown(ret)
apply(dd[, 2:ncol(dd)], 2, min)
}
#' @title Find the worst n drawdowns
#' @param fund data.frame containing time-series of one asset
#' @param n number of drawdowns to find
#' @return with unique drawdowns start, end, and trough dates, and trough value
#' @export
#' @examples
#' data(ETF)
#' worst_n_drawdowns(ret[, 1:2], n = 5)
worst_n_drawdowns <- function(fund, n = 5) {
dd <- drawdown(fund)
discrete_dd <- find_drawdowns(dd)
ordered_dd <- discrete_dd[order(discrete_dd$Drawdown), ]
ordered_dd[1:n, ]
}
#' @title Find unique drawdowns from drawdown time-series
#' @param dd data.frame containing drawdown time-series of one asset, see \code{drawdown}
#' @return data.frame with unique drawdowns start, end, and trough dates, and trough value
#' @export
#' @examples
#' data(ETF)
#' dd <- drawdown(ret[, 1:2])
#' find_drawdowns(dd)
find_drawdowns <- function(dd) {
# This function finds the start and end dates of each unique drawdown
# by laggining the drawdown time-series and checking if the series is transtioning
# from 0 to negative or negative back to 0.
# Next the unique drawdowns are placed into a list using mapply with a function
# that truncates time-series based on the vector of starting and ending dates.
# Finally finding the minimum of each drawdown list is found and the results
# are returned in a data.frame
check_y_param(dd) # make sure dd time-series is univariate
colnames(dd) <- c('date', 'Drawdown')
dd$isDown <- dd[, 2] < 0
dd$isDownLag <- c(NA, dd[1:(nrow(dd) - 1), 'isDown'])
dd$start <- dd$isDown & !dd$isDownLag # change from 0 to negative signals drawdown start
dd$end <- !dd$isDown & dd$isDownLag # change from negative back to 0 signals recovery
start_date <- dd[dd$start, 1]
end_date <- dd[dd$end, 1]
# if lengths of start and end dates are the same the time-series ends on
# a drawdown and we need to adjust last end date to last start date
if (length(end_date) == length(start_date)) {
dd_date <- data.frame(StartDate = start_date,
EndDate = c(end_date[2:length(end_date)],
start_date[length(start_date)]))
} else {
dd_date <- data.frame(StartDate = start_date,
EndDate = end_date[2:length(end_date)])
}
# create unique list of drawdowns
dd_list <- mapply(trunc_time_series,
ret = list(dd[, 1:2]),
date_start = dd_date$StartDate,
date_end = dd_date$EndDate,
SIMPLIFY = FALSE)
.get_min_dd <- function(x) {
# find trough by finding which observation equals the minimum
indx <- which(x[, 2] == min(x[, 2]))
# if there are more than one observations equal to the minimum take the first one
if (length(indx) > 1) {
indx <- indx[1]
}
x[indx, ]
}
trough_list <- lapply(dd_list, .get_min_dd)
trough_df <- do.call(rbind, trough_list)
res <- data.frame(StartDate = dd_date$StartDate,
TroughDate = trough_df$date,
EndDate = dd_date$EndDate,
Trough = trough_df$Drawdown,
DaysToTrough = trough_df$date - dd_date$StartDate,
DaysToRecover = dd_date$EndDate - trough_df$date,
TotalDays = dd_date$EndDate - dd_date$StartDate)
colnames(res)[4] <- 'Drawdown'
return(res)
}
#' @title Tracking error
#' @param fund data.frame containing one or multiple fund returns
#' @param bench data.frame containing returns of one benchmark
#' @param freq character to specify common frequency
#' @return tracking error of each fund relative to benchmark
#' @export
#' @note Tracking error is the standard deviation of the fund minus the benchmark,
#' the tracking error will be annualized by the specified frequency.
tracking_error <- function(fund, bench, freq) {
er <- excess_ret(fund, bench, freq)
apply(er[, 2:ncol(er)], 2, sd, na.rm = TRUE) * sqrt(freq_to_scale(freq))
}
#' @title Beta of fund(s) relative to benchmark
#' @param fund data.frame containing one or multiple fund returns
#' @param bench data.frame containing returns of one benchmark
#' @param rf data.frame containing risk-free rate
#' @param freq character to specify common frequency
#' @return beta
#' @export
fund_beta <- function(fund, bench, rf, freq) {
combo <- combine_time_series(fund, bench, freq = freq)
er <- excess_ret(combo, rf, freq = freq)
cov_mat <- cov(er[, 2:ncol(er)], use = 'pairwise.complete.obs')
cov_mat[, ncol(cov_mat)] / cov_mat[nrow(cov_mat), ncol(cov_mat)]
}
#' @title Correlation of fund(s) relative to benchmark
#' @param fund data.frame containing one or multiple fund returns
#' @param bench data.frame containing returns of one benchmark
#' @param rf data.frame containing risk-free rate
#' @param freq character to specify common frequency
#' @return vector of correlation of each fund to benchmark
#' @export
fund_corr <- function(fund, bench, rf, freq) {
combo <- combine_time_series(fund, bench, freq = freq)
er <- excess_ret(combo, rf, freq = freq)
cor_mat <- cor(er[, 2:ncol(er)], use = 'pairwise.complete.obs')
cor_mat[, ncol(cor_mat)]
}
#' @title Information ratio
#' @export
info_ratio <- function(fund, bench, freq) {
er <- excess_mean(fund, bench, freq)
er / tracking_error(fund, bench, freq)
}
.subset_direction <- function(fund, bench, freq, direct = c('up', 'down')) {
combo <- combine_time_series(fund, bench, freq = freq)
if (direct == 'up') {
keep <- combo[, ncol(combo)] >= 0
} else if (direct == 'down') {
keep <- combo[, ncol(combo)] < 0
} else {
stop('direct must be up or down')
}
combo[keep, ]
}
#' @title Up capture ratio
#' @export
up_capture <- function(fund, bench, freq) {
up <- .subset_direction(fund, bench, freq, 'up')
geo_ret(up[, 1:(ncol(up) - 1)], freq) / geo_ret(up[, c(1, ncol(up))], freq)
}
#' @title Down capture ratio
#' @export
down_capture <- function(fund, bench, freq) {
down <- .subset_direction(fund, bench, freq, 'down')
geo_ret(down[, 1:(ncol(down) - 1)], freq) / geo_ret(down[, c(1, ncol(down))], freq)
}
#' @export
up_beta <- function(fund, bench, freq) {
up <- .subset_direction(fund, bench, freq, 'up')
cov_mat <- cov(up[, 2:ncol(up)], use = 'pairwise.complete.obs')
cov_mat[, ncol(cov_mat)] / cov_mat[nrow(cov_mat), ncol(cov_mat)]
}
#' @export
down_beta <- function(fund, beta, freq) {
down <- .subset_direction(fund, bench, freq, 'down')
cov_mat <- cov(down[, 2:ncol(down)], use = 'pairwise.complete.obs')
cov_mat[, ncol(cov_mat)] / cov_mat[nrow(cov_mat), ncol(cov_mat)]
}
#' @export
roll_mean <- function(ret, xwin) {
rl <- slide::slide(ret[, 2:ncol(ret), drop = FALSE],
~colMeans(.x),
.before = xwin,
.step = 1,
.complete = TRUE)
rdf <- do.call(rbind, rl)
res <- data.frame(date = ret$date[(xwin + 1):nrow(ret)],
rdf)
return(res)
}
#' @export
roll_excess_mean <- function(x, y, freq, xwin) {
.er <- function(combo) {
col_vec <- 2:(ncol(combo) - 1)
combo[, col_vec] <- combo[, col_vec] - combo[, ncol(combo)]
return(colMeans(combo[, col_vec, drop = FALSE]))
}
combo <- combine_time_series(x, y, freq = freq)
rl <- slide::slide(combo,
~.er(.x),
.before = xwin,
.step = 1,
.complete = TRUE)
rdf <- do.call(rbind, rl)
res <- data.frame(date = combo$date[(xwin + 1):nrow(combo)],
rdf)
return(res)
}
#' @export
roll_vol <- function(ret, xwin, freq = NULL) {
.col_sds <- function(x) {
apply(x, 2, sd, na.rm = TRUE)
}
rl <- slide::slide(ret[, 2:ncol(ret), drop = FALSE],
~.col_sds(.x),
.before = xwin,
.complete = TRUE)
rdf <- do.call(rbind, rl)
if (!is.null(freq)) {
rdf <- rdf * sqrt(freq_to_scale(freq))
}
res <- data.frame(date = ret$date[(xwin + 1):nrow(ret)],
rdf)
return(res)
}
#' @export
roll_beta <- function(fund, bench, freq, xwin) {
.beta <- function(x) {
cov_mat <- cov(x, use = 'pairwise.complete.obs')
cov_mat[1:(nrow(cov_mat) - 1), ncol(cov_mat)] / cov_mat[nrow(cov_mat), ncol(cov_mat)]
}
x <- combine_time_series(fund, bench, freq = freq)
rl <- slide::slide(x[, 2:ncol(x)],
~.beta(.x),
.before = xwin,
.step = 1,
.complete = TRUE)
rdf <- do.call(rbind, rl)
res <- data.frame(date = x$date[(xwin + 1):nrow(x)],
rdf)
return(res)
}
#' @title Portfolio rebalance
#' @param ret data.frame containing return time-series
#' @param ret_freq character to represent frequency of \code{ret}
#' @param wgt rebalance weights, see details
#' @param reb_freq rebalance frequency, see detials
#' @param date_start starting date to truncate portfolio returns
#' @param date_end ending date to truncate portfolio returns
#' @param t_cost optional numeric vector to specify transaction costs, see details
#' @return list containing portfolio returns, asset returns, and asset weights
#' @export
#' @details The rebalance weight parameter \code{wgt} can be entered as a
#' numeric vector of weights that correspond to each asset in \code{ret} or
#' a data.frame containing a time-series of weights with dates in the first
#' column similar to \code{ret}. If a numeric vector is entered \code{reb_freq}
#' will specify how often the portfolio is rebalanced back to the target weights.
#' If \code{reb_freq} is left as \code{NA} then a buy-and-hold strategy is assumed
#' with no rebalances. If \code{t_cost} is a single value then it's assumed all
#' assets have the same transaction cost. Otherwise a numeric vector that
#' corresponds to each asset can be specified. Finally, note that if \code{wgt}
#' is left as \code{NA} an equal weight portfolio will be assumed.
rebal <- function(ret, ret_freq, wgt = NA, reb_freq = NA, date_start = NA,
date_end = Sys.Date(), t_cost = 0) {
n_assets <- ncol(ret) - 1
if (all(is.na(wgt))) {
wgt <- rep(1 / n_assets, n_assets)
}
if (is.null(nrow(wgt))) {
wgt <- auto_reb_wgt(wgt, reb_freq, date_start, date_end)
}
if (length(t_cost) == 1) {
t_cost <- rep(t_cost, length(n_assets))
}
ret <- change_freq(ret, ret_freq)
ret <- return_fill_na(ret)
n_obs <- nrow(ret)
wgt <- check_time_series(wgt)
wgt <- return_fill_na(wgt)
date_vec <- wgt$date
ret_mat <- as.matrix(ret[, 2:ncol(ret)])
wgt_mat <- as.matrix(wgt[, 2:ncol(wgt)])
init_cap <- 100
asset_index <- matrix(0, nrow = n_obs + 1, ncol = n_assets)
asset_index[1, ] <- wgt_mat[1, ] * init_cap
port_index <- matrix(0, nrow = n_obs + 1, ncol = 1)
port_index[1, 1] <- init_cap
j_reb <- 1 # rebalance date counter
for(i_obs in 1:n_obs) {
# are we at a rebalance date?
is_rebal_date <- ret$date[i_obs] >= wgt$date[j_reb]
if (is.na(is_rebal_date)) {
is_rebal_date <- FALSE
}
# if we are at a rebalance date, rebalance at beginning of day i_obs
if (is_rebal_date) {
adj_w <- wgt_mat[j_reb, ] - t_cost
asset_index[i_obs, ] <- adj_w * port_index[i_obs, ]
j_reb <- j_reb + 1
}
asset_index[i_obs + 1, ] <- asset_index[i_obs, ] * (1 + ret_mat[i_obs, ])
pnl <- sum(asset_index[i_obs + 1, ]) - sum(asset_index[i_obs, ])
port_index[i_obs + 1, 1] <- port_index[i_obs, 1] + pnl
}
hist_wgt_df <- data.frame(date = c(ret$date[1] - 1, ret$date),
asset_index / apply(asset_index, 1, sum))
port_index_df <- data.frame(date = c(ret$date[1] - 1, ret$date),
port_index)
asset_index_df <- data.frame(date = c(ret$date[1] - 1, ret$date),
asset_index)
colnames(asset_index_df) <- c('date', colnames(ret)[2:ncol(ret)])
port_ret <- price_to_ret(port_index_df)
last_wgt <- hist_wgt_df[nrow(hist_wgt_df), 2:ncol(hist_wgt_df)]
curr_df <- data.frame(asset = colnames(ret)[2:ncol(ret)],
wgt = as.numeric(last_wgt))
res <- list()
res$port_ret <- port_ret
res$port_index <- port_index_df
res$curr_df <- curr_df
res$asset_ret <- price_to_ret(asset_index_df)
res$asset_index <- asset_index_df
res$hist_wgt <- hist_wgt_df
return(res)
}
#' @title Populate rebalance weight time-series from a vector of weights
#' @param reb_wgt numeric vector of rebalance weights
#' @param freq rebalance frequency
#' @param date_start starting date of rebalance weight time-series
#' @param date_end ending date of rebalance weight time-series
#' @return data.frame containing time-series of rebalance weights
#' @seealso \code{rebal}
#' @export
auto_reb_wgt <- function(reb_wgt, freq = NA, date_start = NA,
date_end = Sys.Date()) {
if (is.na(date_start)) {
print('date_start passed as NA, setting to 1970-01-01')
date_start <- as.Date('1970-01-01')
}
if (is.na(freq)) {
print('freq passed as NA, returning reb_wgt as data.frame')
return(data.frame(date = date_start, reb_wgt))
}
if (class(reb_wgt) != 'numeric') {
warning('reb_wgt_vec is not numeric, attempting to force with as.numeric')
reb_wgt <- as.numeric(reb_wgt)
}
day_vec <- seq.Date(from = date_start, to = date_end, by = 'days')
reb_wgt_mat <- matrix(reb_wgt, nrow = length(day_vec), ncol = length(reb_wgt),
byrow = TRUE)
reb_wgt_df <- data.frame(date = day_vec, reb_wgt_mat)
change_freq(reb_wgt_df, freq, 'price')
}
#' @title Asset contribution to portfolio return
#' @param p result of function \code{rebal()}, see \code{?rebal} for more details
#' @param date_start starting date for return contribtution, default \code{NULL}
#' will use entire portfolio time-series
#' @param date_end ending date for return contribution
#' @return data.frame with asset contribution to return
#' @seealso rebal
#' @details \code{p} is the output of a rebal function which returns a list of
#' portfolio returns, asset returns, and historical weights. These time-series
#' are used to calculate each asset's contribution to the total portfolio
#' return over a given time-period. The resulting data.frame with the asset's
#' contribution to return will have a residual value as well which is the result
#' of compounding effects and transaction costs.
#' @export
#' @examples
#' data(ETF)
#' p <- rebal(ret, 'd')
#' contr_to_ret(p)
contr_to_ret <- function(p, date_start = NULL, date_end = Sys.Date()) {
if (is.null(p$asset_index)) {
stop('p must be the result of the rebal() function, see ?rebal')
}
if (is.null(date_start)) {
date_start <- p$asset_index$date[1]
}
freq <- guess_freq(p$port_ret)
if (freq == 'could not guess frequency') {
stop('port_ret does not have regular frequency')
}
index_start <- seq.Date(date_start, length.out = 2, by = paste0('-1 ', freq_to_string(freq)))[2]
before_start <- p$asset_index$date >= date_start
if (all(before_start, FALSE)) {
warning('asset last date is before date_start')
na_df <- data.frame(asset = c(colnames(p$asset_index)[2:ncol(p$asset_index)], 'residual', 'total'),
contr.to.ret = rep(NA, nrow(p$curr_df) + 2))
return(na_df)
}
if (min(p$asset_index$date) > date_start) {
warning('asset inception is after date_start')
na_df <- data.frame(asset = c(colnames(p$asset_index)[2:ncol(p$asset_index)], 'residual', 'total'),
contr.to.ret = rep(NA, nrow(p$curr_df) + 2))
return(na_df)
}
asset_index <- trunc_time_series(p$asset_index, index_start, date_end)
asset_ret <- trunc_time_series(p$asset_ret, date_start, date_end)
port_index <- trunc_time_series(p$port_index, date_start, date_end)
contr_mat <- asset_index[1:(nrow(asset_index) - 1), 2:ncol(asset_index)] *
asset_ret[, 2:ncol(asset_ret)]
contr <- apply(contr_mat, 2, sum, na.rm = TRUE) / port_index$port_index[1]
total_ret <- port_index$port_index[nrow(port_index)] / port_index$port_index[1] - 1
resid <- total_ret - sum(contr)
res <- data.frame(asset = c(colnames(p$asset_index)[2:ncol(p$asset_index)], 'residual', 'total'),
contr.to.ret = c(as.numeric(contr), resid, total_ret))
return(res)
}
init_cla <- function(mu_vec, up_bound = 1, low_bound = 0) {
n_assets <- nrow(mu_vec)
up_bound_vec <- matrix(up_bound, ncol = 1, nrow = n_assets)
low_bound_vec <- matrix(low_bound, ncol = 1, nrow = n_assets)
if (sum(low_bound_vec) > 1) {
stop('sum of lower bound weights is greater than 1')
}
mu_vec_rank <- n_assets - rank(mu_vec) + 1
wgt_vec <- low_bound_vec
remain_wgt <- 1 - sum(wgt_vec)
for (i in 1:length(mu_vec)) {
max_mu_idx <- which(mu_vec_rank == i)
if (remain_wgt == 0) {
break
}
up_bound_i <- up_bound_vec[max_mu_idx, 1]
add_wgt <- up_bound_i - wgt_vec[max_mu_idx]
if (add_wgt <= remain_wgt) {
remain_wgt <- remain_wgt - (up_bound_i - wgt_vec[max_mu_idx])
wgt_vec[max_mu_idx] <- up_bound_i
} else {
wgt_vec[max_mu_idx] <- wgt_vec[max_mu_idx] + remain_wgt
remain_wgt <- 0
}
}
res <- list()
res$wgt_vec <- wgt_vec
res$i_free <- max_mu_idx
res$wgt_vec <- wgt_vec
return(res)
}
sub_mat <- function(mu_vec, cov_mat, f) {
res <- list()
res$cov_f <- cov_mat[f, f]
res$mu_f <- mu_vec[f, 1]
res$one_f <- matrix(1, nrow = length(f), ncol = 1)
return(res)
}
crit_line_algo <- function(mu_vec, cov_mat, up_bound = 1, low_bound = 0) {
mu_vec <- matrix(mu_vec, ncol = 1)
n_assets <- nrow(mu_vec)
init_dat <- init_cla(mu_vec)
is_free <- matrix(FALSE, nrow = 16, ncol = 1)
is_free[init_dat$i_free] <- TRUE
which_is_free <- which(is_free)
is_bound <- !is_free
which_is_bound <- which(is_bound)
lambda_current <- Inf
i_inside <- 0
i_outside <- 0
t <- 1
wgt_mat <- list()
wgt_vec <- init_dat$wgt_vec
wgt_mat[[1]] <- wgt_vec
for (t in 1:1000) {
# Case a) Free asset moves to its bound
if (length(which_is_free) > 1) {
lambda_vec_in <- rep(NA, length(which_is_free))
s <- sub_mat(mu_vec, cov_mat, free_i)
cov_f_inv <- MASS::ginv(s$cov_f)
for (i in 1:length(which_is_free)) {
c_i <- -(t(s$one_f) %*% cov_f_inv %*% s$one_f)[1] * (cov_f_inv %*% s$mu_f)[i] +
(t(s$one_f) %*% cov_f_inv %*% s$mu_f)[1] * (cov_f_inv %*% s$one_f)[i]
lambda_i <- (cov_f_inv %*% s$one_f)[i] / c_i
lambda_vec_in[i] <- lambda_i
}
i_inside <- which(lambda_vec_in == max(lambda_vec_in[lambda_vec_in < lambda_current]))
}
# Case b) Asset on its bound becomes free
if (sum(is_free) < n_assets) {
lambda_vec_out <- rep(NA, length(which_is_bound))
free_i <- which_is_free
for (i in 1:length(which_is_bound)) {
free_i <- c(free_i, which_is_bound[i])
s <- sub_mat(mu_vec, cov_mat, free_i)
cov_f_inv <- MASS::ginv(s$cov_f)
c_i <- -(t(s$one_f) %*% cov_f_inv %*% s$one_f)[1] * (cov_f_inv %*% s$mu_f)[i] +
(t(s$one_f) %*% cov_f_inv %*% s$mu_f)[1] * (cov_f_inv %*% s$one_f)[i]
lambda_i <- (cov_f_inv %*% s$one_f)[i] / c_i
lambda_vec_out[i] <- lambda_i
}
i_outside <- which(lambda_vec_out == max(lambda_vec_out[lambda_vec_out < lambda_current]))
}
# Find turning point by comparing cases
if (length(i_inside) == 0) {
i_inside <- 0
}
if (length(i_outside) == 0) {
i_outside <- 0
}
if (i_inside != 0 | i_outside != 0) {
t <- t + 1
if (i_inside != 0) {
lambda_inside <- lambda_vec_in[i_inside]
} else {
lambda_inside <- NA
}
if (i_outside != 0) {
lambda_outside <- lambda_vec_out[i_outside]
} else {
lambda_outside <- NA
}
lambda_current <- max(lambda_inside, lambda_outside, na.rm = TRUE)
is_lambda_in_max <- lambda_inside == lambda_current
if (is.na(is_lambda_in_max)) {
is_lambda_in_max <- FALSE
}
if (is_lambda_in_max) {
is_free[i_inside] <- FALSE
} else {
is_free[i_outside] <- TRUE
}
which_is_free <- which(is_free)
s <- sub_mat(mu_vec, cov_mat, which_is_free)
cov_f_inv <- MASS::ginv(s$cov_f)
gam <- 1 / (t(s$one_f) %*% cov_f_inv %*% s$one_f) -
((t(s$one_f) %*% cov_f_inv %*% s$mu_f) / (t(s$one_f) %*% cov_f_inv %*% s$one_f)) *
lambda_current
wgt_f <- lambda_current * cov_f_inv %*% s$mu_f + gam[1] * cov_f_inv %*% s$one_f
wgt_vec[which_is_free] <- wgt_f
wgt_mat[[t]] <- wgt_vec
}
if (i_inside == 0 & i_outside == 0) {
break
}
}
return(wgt_mat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.