# fundamental
library(data.table)
library(purrr)
# data
library(fmpcloudr)
# wrangling help packages
library(runner)
library(matrixStats)
library(roll)
library(ggplot2)
library(tidyr)
library(PerformanceAnalytics)
library(TTR)
library(mlr3verse)
library(fst)
library(TSrepr)
# backtest
library(esback)
# performance
library(parallel)
library(foreach)
library(future.apply)
# vilatility modeling
library(rugarch)
source('C:/Users/Mislav/Documents/GitHub/alphar/R/import_data.R')
# IMPORT DATA -------------------------------------------------------------
# import data
sp500_symbols <- import_sp500()
sp500_stocks <- import_locally("D:/market_data/equity/usa/hour/trades_adjusted", "csv")
spy <- import_blob(
symbols = 'SPY',
upsample = 1,
trading_hours = TRUE,
use_cache = TRUE,
combine_data = FALSE,
save_path = 'D:/market_data/equity/usa/hour/trades',
container = "equity-usa-hour-trades"
)[[1]]
# calculate returns
spy <- na.omit(as.data.table(spy)[, returns := (close / shift(close)) - 1][])
sp500_stocks[, returns := (close / shift(close)) - 1, by = .(symbol)]
# OLD APPROACH ------------------------------------------------------------
# calculate sum of extra negative and positive returns
spy <- na.omit(as.data.table(spy)[, returns := (close / shift(close)) - 1][])
sp500_stocks[, returns := (close / shift(close)) - 1, by = .(symbol)]
sp500_stocks[, p_999 := roll_quantile(returns, 255*8*2, p = 0.999), by = .(symbol)]
sp500_stocks[, p_001 := roll_quantile(returns, 255*8*2, p = 0.001), by = .(symbol)]
sp500_stocks[, above := ifelse(returns > p_999, returns - p_999, 0), by = .(symbol)]
sp500_stocks[, below := ifelse(returns < p_001, abs(returns - p_001), 0), by = .(symbol)]
sp500_stocks[, above_dummy := ifelse(returns > p_999, 1, 0), by = .(symbol)]
sp500_stocks[, below_dummy := ifelse(returns < p_001, 1, 0), by = .(symbol)]
# get tail risk mesures
sp500_indicators <- sp500_stocks[, .(below_sum = sum(below, na.rm = TRUE),
above_sum = sum(above, na.rm = TRUE)
), by = .(datetime)]
sp500_indicators <- sp500_indicators[order(datetime)]
sp500_indicators[, `:=`(excess = above_sum - below_sum)]
sp500_indicators[, excess_sma := SMA(excess, 8 * 9)]
sp500_indicators[, excess_sma_short := SMA(excess, 8 * 1)]
sp500_indicators <- as.data.table(spy)[, .(index, close, returns, volume)][sp500_indicators, on = c(index = 'datetime')]
sp500_indicators <- na.omit(sp500_indicators)
# analyse results
vetical_lines_sell <- sp500_indicators[below_sum > 0.7, index]
vetical_lines_buy <- sp500_indicators[above_sum > 0.7, index]
ggplot(sp500_indicators, aes(x = index, y = close)) +
geom_line() +
geom_vline(xintercept = vetical_lines_sell, color = 'red') +
geom_vline(xintercept = vetical_lines_buy, color = 'green')
ggplot(sp500_indicators[index %between% c('2020-01-01', '2020-06-01')], aes(x = index, y = close)) +
geom_line() +
geom_vline(xintercept = vetical_lines_sell, color = 'red') +
geom_vline(xintercept = vetical_lines_buy, color = 'green')
ggplot(sp500_indicators, aes(x = index, y = excess_sma)) +
geom_line()
ggplot(sp500_indicators[index %between% c('2020-01-01', '2020-06-01')], aes(x = index, y = excess_sma)) +
geom_line()
# plot excess sma tail returns
ggplot(sp500_indicators, aes(x = index)) +
geom_line(aes(y = excess_sma), color = 'blue') +
geom_line(aes(y = excess_sma_long), color = 'red') +
geom_line(aes(y = excess_sma), color = 'green')
ggplot(sp500_indicators[index %between% c('2020-01-01', '2020-06-01')], aes(x = index)) +
geom_line(aes(y = excess_sma), color = 'blue') +
geom_line(aes(y = excess_sma_long), color = 'red') +
geom_line(aes(y = excess_sma_short), color = 'green')
ggplot(sp500_indicators[index %between% c('2015-08-20', '2015-09-15')], aes(x = index)) +
geom_line(aes(y = excess_sma), color = 'blue') +
geom_line(aes(y = excess_sma_long), color = 'red') +
geom_line(aes(y = excess_sma), color = 'green')
ggplot(sp500_indicators[index %between% c('2007-01-01', '2009-01-01')], aes(x = index)) +
geom_line(aes(y = excess_sma), color = 'blue') +
geom_line(aes(y = excess_sma_long), color = 'red') +
geom_line(aes(y = excess_sma_short), color = 'green')
# trading rule
# side <- rep(NA, nrow(sp500_indicators))
# side[which(sp500_indicators$below > 0.6) + 1] <- 0
# side[which(sp500_indicators$above > 0.5) + 1] <- 1
# side[1] <- 1
# side <- na.locf(side, na.rm = FALSE)
# side <- ifelse(shift(sp500_indicators$excess_sma) < -0.01, 0, side)
# zero_to_one <- frollsum(side, 8*30)
# side[which(zero_to_one == 0)] <- 1
# short
# side <- ifelse(side == 0, -1, side)
# simple trading rule: when excess dif lower than threshold, sell
side <- ifelse(shift(sp500_indicators$excess_sma_short) < -0.005, 0, 1)
# exces diff cross moving average
# side <- ifelse(shift(sp500_indicators$excess_sma_short) < -0.015 &
# shift(sp500_indicators$excess_sma_short) < shift(sp500_indicators$excess_sma_long),
# 0, 1)
# backtest
returns_strategy <- side * sp500_indicators$returns
beckteset_data <- cbind(sp500_indicators[, .(index, returns)], returns_strategy)
beckteset_data <- na.omit(beckteset_data)
mlr3measures::acc(as.factor(ifelse(beckteset_data$returns[shift(sp500_indicators$excess_sma) < -0.001] > 0, 1, 0))[-1],
as.factor(ifelse(beckteset_data$returns_strategy[shift(sp500_indicators$excess_sma) < -0.001] > 0, 1, 0))[-1])
charts.PerformanceSummary(beckteset_data)
# OPTIMIZE STRATEGY -------------------------------------------------------
# params
sma_width <- 1:60
threshold <- seq(-0.00001, -0.01, by = -0.00001)
paramset <- expand.grid(sma_width, threshold)
colnames(paramset) <- c('sma_width', 'threshold')
# backtset
optimize_strategy <- function(sma_width, threshold) {
excess_sma <- SMA(sp500_indicators$excess, sma_width)
side <- ifelse(shift(excess_sma) < threshold, 0, 1)
returns_strategy <- side * sp500_indicators$returns
beckteset_data <- cbind(sp500_indicators[, .(returns)], returns_strategy)
beckteset_data <- na.omit(beckteset_data)
cum_return <- PerformanceAnalytics::Return.cumulative(beckteset_data)[, 2]
}
cum_returns <- future_vapply(1:nrow(paramset),
function(x) optimize_strategy(paramset[x, 1], paramset[x, 2]), numeric(1))
results <- as.data.table(cbind(paramset, cum_returns))
# n best
head(results[order(cum_returns, decreasing = TRUE), ], 50)
tail(results[order(cum_returns, decreasing = TRUE), ], 50)
# plot
ggplot(results, aes(x = sma_width, y = threshold, fill = cum_returns)) +
geom_tile()
ggplot(results[sma_width %in% 10:25 & threshold %between% c(-0.001, -0.00001)],
aes(x = sma_width, y = threshold, fill = cum_returns)) +
geom_tile()
ggplot(results[sma_width %in% 1:10 & threshold %between% c(-0.01, -0.0001)],
aes(x = sma_width, y = threshold, fill = cum_returns)) +
geom_tile()
ggplot(results, aes(cum_returns)) +
geom_histogram()
# try one backtest
sp500_indicators[, excess_sma := SMA(excess, 35)]
# sp500_indicators[, excess_sma_long := SMA(excess, 8 * 10)]
# side <- ifelse(shift(sp500_indicators$excess_sma) < -0.007 | shift(sp500_indicators$excess_sma_long < -0.005), 0, 1)
side <- ifelse(shift(sp500_indicators$excess_sma < -0.0002), 0, 1)
# side <- ifelse(shift(sp500_indicators$excess_sma < -0.007), 0, 1)
returns_strategy <- side * sp500_indicators$returns
beckteset_data <- cbind(sp500_indicators[, .(index, returns)], returns_strategy)
beckteset_data <- na.omit(beckteset_data)
charts.PerformanceSummary(beckteset_data, plot.engine = 'ggplot2')
# OPTIMIZE STRATEGY WITH SHORT AND LONG -----------------------------------
# params
threshold_short <- seq(-0.001, -0.01, by = -0.001)
threshold_long <- seq(-0.001, -0.01, by = -0.001)
paramset <- expand.grid(threshold_short, threshold_long)
colnames(paramset) <- c('threshold_short', 'threshold_long')
# backtset
optimize_strategy <- function(threshold_short, threshold_long) {
side <- ifelse(shift(sp500_indicators$excess_sma_short) < threshold_short |
shift(sp500_indicators$excess_sma < threshold_long), 0, 1)
returns_strategy <- side * sp500_indicators$returns
beckteset_data <- cbind(sp500_indicators[, .(returns)], returns_strategy)
beckteset_data <- na.omit(beckteset_data)
cum_return <- PerformanceAnalytics::Return.cumulative(beckteset_data)[, 2]
}
plan(multiprocess(workers = availableCores() - 16))
cum_returns <- future_vapply(1:nrow(paramset), function(x) optimize_strategy(paramset[x, 1], paramset[x, 2]), numeric(1))
results <- as.data.table(cbind(paramset, cum_returns))
# n best
head(results[order(cum_returns, decreasing = TRUE), ], 50)
# plot
ggplot(results, aes(x = threshold_short, y = threshold_long, fill = cum_returns)) +
geom_tile()
ggplot(results[sma_width %in% 1:10 & threshold %between% c(-0.02, -0.0001)],
aes(x = sma_width, y = threshold, fill = cum_returns)) +
geom_tile()
# PREDICT EXCESS ----------------------------------------------------------
library(forecast)
# train test split
sample <- unlist(sp500_indicators[((255*8*2)+481):nrow(sp500_indicators), 'excess_sma'], use.names = FALSE)
split_ratio <- 0.6
train <- sample[1:floor(length(sample) * split_ratio)]
test <- sample[(floor(length(sample) * split_ratio)+1):length(sample)]
# Multi-step forecasts with re-estimation
h <- 5 # forecasting horizont
n <- length(test) - h + 1
fit <- auto.arima(train)
order <- arimaorder(fit)
fcmat <- matrix(0, nrow=n, ncol=h)
for (i in 1:n) {
x <- window(hsales, end=1989.99 + (i-1)/12)
refit <- Arima(x, order=order[1:3], seasonal=order[4:6])
}
arima_forecasts <- runner(
x = sample[1:1020],
f = function(x) {
fit <- auto.arima(x)
forecast(fit, h = 5)$mean
},
k = 1000,
na_pad = FALSE
)
arima_forecasts[1, ]
# compare forecasts and real
# CRYPTO ------------------------------------------------------------------
# load data
crypto <- as.data.table(read_fst('data/crypto.fst'))
setkey(crypto, crypto_id)
crypto_ids <- unique(crypto$crypto_id)
sample <- crypto[.(crypto_ids[2:200])]
sample[, date_time := as.POSIXct(date_time, "%Y-%m-%d %H:%M:%S")]
# calculate sum of extra negative and positive returns
bitcoin <- crypto[.(crypto_ids[1])]
bitcoin[, date_time := as.POSIXct(date_time, "%Y-%m-%d %H:%M:%S")]
bitcoin <- na.omit(bitcoin[, returns := (price / shift(price)) - 1][])
sample[, returns := (price / shift(price)) - 1, by = .(crypto_id)]
sample[, p_999 := roll_quantile(returns, 255*8*2, p = 0.999), by = .(crypto_id)]
sample[, p_001 := roll_quantile(returns, 255*8*2, p = 0.001), by = .(crypto_id)]
sample[, above := ifelse(returns > p_999, returns - p_999, 0), by = .(crypto_id)]
sample[, below := ifelse(returns < p_001, abs(returns - p_001), 0), by = .(crypto_id)]
sample[, above_dummy := ifelse(returns > p_999, 1, 0), by = .(crypto_id)]
sample[, below_dummy := ifelse(returns < p_001, 1, 0), by = .(crypto_id)]
# get tail risk mesures
indicators <- sample[, .(below = sum(below, na.rm = TRUE),
above = sum(above, na.rm = TRUE)), by = .(date_time)]
indicators[, excess := above - below]
indicators[, excess_sma := SMA(excess, 8 * 9)]
indicators <- bitcoin[, .(date_time, price, returns)][indicators, on = 'date_time']
indicators <- indicators[order(date_time)]
indicators <- na.omit(indicators)
# analyse results
vetical_lines_sell <- indicators[below > 0.4, date_time]
vetical_lines_buy <- indicators[above > 0.4, date_time]
ggplot(indicators, aes(x = date_time, y = price)) +
geom_line() +
geom_vline(xintercept = vetical_lines_sell, color = 'red') +
geom_vline(xintercept = vetical_lines_buy, color = 'green')
# optimize params
sma_width <- 1:40
threshold <- seq(-0.001, -1, by = -0.01)
paramset <- expand.grid(sma_width, threshold)
colnames(paramset) <- c('sma_width', 'threshold')
# optimize
optimize_strategy <- function(sma_width, threshold) {
excess_sma <- SMA(indicators$excess, 8 * sma_width)
side <- ifelse(shift(excess_sma) < threshold, 0, 1)
returns_strategy <- side * indicators$returns
beckteset_data <- cbind(indicators[, .(returns)], returns_strategy)
beckteset_data <- na.omit(beckteset_data)
cum_return <- PerformanceAnalytics::Return.cumulative(beckteset_data)[, 2]
}
cum_returns <- future_vapply(1:nrow(paramset),
function(x) optimize_strategy(paramset[x, 1], paramset[x, 2]), numeric(1))
results <- as.data.table(cbind(paramset, cum_returns))
# n best
head(results[order(cum_returns, decreasing = TRUE), ], 50)
tail(results[order(cum_returns, decreasing = TRUE), ], 50)
# plot
ggplot(results, aes(x = sma_width, y = threshold, fill = cum_returns)) +
geom_tile()
ggplot(results[sma_width %in% 1:10 & threshold %between% c(-0.02, -0.0001)],
aes(x = sma_width, y = threshold, fill = cum_returns)) +
geom_tile()
ggplot(results[sma_width %in% 1:10 & threshold %between% c(-0.01, -0.0001)],
aes(x = sma_width, y = threshold, fill = cum_returns)) +
geom_tile()
ggplot(results, aes(cum_returns)) +
geom_histogram()
# try one backtest
indicators[, excess_sma_long := SMA(excess, 8 * 2)]
# side <- ifelse(shift(sp500_indicators$excess_sma) < -0.007 | shift(sp500_indicators$excess_sma_long < -0.005), 0, 1)
side <- ifelse(shift(indicators$excess_sma < -0.005), 0, 1)
table(side)
# side <- ifelse(shift(sp500_indicators$excess_sma < -0.007), 0, 1)
returns_strategy <- side * indicators$returns
beckteset_data <- cbind(indicators[, .(date_time, returns)], returns_strategy)
beckteset_data <- na.omit(beckteset_data)
charts.PerformanceSummary(beckteset_data)
# ES AND VAR ESTIMATION --------------------------------------------------------------
# import data and merge
gpd_risks_left_tail <- fread(file.path(risk_path, 'gpd_risks_left_tail.csv'), sep = ';')
gpd_risks_right_tail <- fread(file.path(risk_path, 'gpd_risks_right_tail.csv'))
cols <- colnames(gpd_risks_right_tail)[grep('q_|e_', colnames(gpd_risks_right_tail))]
gpd_risks_right_tail[, (cols) := lapply(.SD, as.numeric), .SDcols = cols]
gpd_risks_left_tail[, (cols) := lapply(.SD, as.numeric), .SDcols = cols]
colnames(gpd_risks_left_tail) <- paste0("left_", colnames(gpd_risks_left_tail))
colnames(gpd_risks_right_tail) <- paste0("right_", colnames(gpd_risks_right_tail))
# merge spy stocks and risk measures
sp500_stocks <- sp500_stocks[gpd_risks_left_tail[left_symbol != 'SPY'], on = c(.id = 'left_symbol', index = 'left_date')]
sp500_stocks <- sp500_stocks[gpd_risks_right_tail[right_symbol != 'SPY'], on = c(.id = 'right_symbol', index = 'right_date')]
# calculate indicators
left_q_cols <- colnames(sp500_stocks)[grep('left_q', colnames(sp500_stocks))]
right_q_cols <- colnames(sp500_stocks)[grep('right_q', colnames(sp500_stocks))]
excess_cols <- gsub("right", "excess", colnames(sp500_stocks)[grep('right_q', colnames(sp500_stocks))])
excess_cols_dummy <- paste0('dummy_', excess_cols)
sp500_stocks[, (excess_cols) := sp500_stocks[, ..left_q_cols] - sp500_stocks[, ..right_q_cols]]
sp500_stocks[, (excess_cols_dummy) := lapply(.SD, function(x) ifelse(x > 0, 1, 0)), .SDcols = excess_cols]
sp500_stocks[, paste0(excess_cols, '_median') := lapply(.SD, function(x) roll_mean(x, 300, min_obs = 100)),
by = .(.id),
.SDcols = excess_cols]
# alterative indicators
sample <- sp500_stocks[, .(.id, index)]
sample[, indicator := rowSums(sp500_stocks[, ..excess_cols_dummy], na.rm = TRUE)]
sample <- sample[order(indicator, decreasing = TRUE)]
sample <- sample[, date := lubridate::date(index)]
test <- sample[, sum(indicator, na.rm = TRUE), by = c('.id', 'date')]
test <- test[order(V1, decreasing = TRUE)]
test[1:99]
#
sample <- sp500_stocks[, ..excess_cols] - sp500_stocks[, paste0(excess_cols, '_median'), with = FALSE]
sample <- data.table(excess_mean = rowMeans(as.matrix(sample), na.rm = TRUE))
sample <- cbind(.id = sp500_stocks$.id, index = sp500_stocks$index, sample)
sample <- sample[order(excess_mean, decreasing = TRUE)]
sample <- sample[index %between% c('2015-01-01', '2020-12-01')]
sample <- na.omit(sample)
sample[, date := lubridate::date(index)]
# sample <- sample[excess_mean %between% c(2, 10)]
sample[(nrow(sample)-199):(nrow(sample) - 100)]
test <- sample[, median(excess_mean, na.rm = TRUE), by = c('.id', 'date')]
test <- test[order(V1, decreasing = TRUE)]
test <- test[V1 %between% c(-20, 20)]
x <- test[.id == 'KSS']
x <- x[order(date)]
plot(x$date, x$V1)
# strategy
symbol <- 'TSLA'
sample <- sp500_stocks[.id == symbol, .(index, close, returns, volume, right_q_9990_500_15, left_q_9990_500_15)]
sample[, premium := right_q_9990_500_15 - left_q_9990_500_15]
sample[, premium_median := roll_quantile(premium, 1000, p = c(0.90), min_obs = 100)]
sample <- na.omit(sample)
# ggplot(sample, aes(index)) + geom_line(aes(y = premium_median * 100)) +
# geom_line(aes(y = premium * 100)) + geom_line(aes(y = close))
# backtest
sample[, side := ifelse(shift(premium) > shift(premium_median), 1, 0)]
sample[, returns_strategy := side * returns]
PerformanceAnalytics::charts.PerformanceSummary(sample[, .(index, returns, returns_strategy)], plot.engine = 'ggplot2')
charts.PerformanceSummary(sample[index %between% c('2017-01-01', '2018-01-01'), .(index, returns, returns_strategy)],
plot.engine = 'ggplot2')
# graph analzsis
alpha <- cbind(sample,
es_left = gpd_risks_left_tail$e_9990_500_10,
es_right = gpd_risks_right_tail$e_9990_500_10,
q_left = gpd_risks_left_tail$q_9990_500_10,
q_right = gpd_risks_right_tail$q_9990_500_10
)
alpha <- alpha[order(date)]
alpha <- alpha[, .(es_right_mean = mean(es_right, na.rm = TRUE),
es_left_mean = mean(es_left, na.rm = TRUE),
es_left_median = median(es_left, na.rm = TRUE),
es_right_median = median(es_right, na.rm = TRUE),
es_right_sum = sum(es_right, na.rm = TRUE),
es_left_sum = sum(es_left, na.rm = TRUE),
q_right_mean = mean(q_right, na.rm = TRUE),
q_left_mean = mean(q_left, na.rm = TRUE),
q_left_median = median(q_left, na.rm = TRUE),
q_right_median = median(q_right, na.rm = TRUE),
q_right_sum = sum(q_right, na.rm = TRUE),
q_left_sum = sum(q_left, na.rm = TRUE)
), by = .(date)]
alpha <- alpha[spy, on = c('date' = 'index')]
alpha[, colnames(alpha)[2:12] := lapply(.SD, na.locf, na.rm = FALSE), .SDcols = (colnames(alpha)[2:12])]
alpha[, `:=`(es_excess_mean = es_right_mean - es_left_mean,
es_excess_median = es_right_median - es_left_median,
es_excess_sum = es_right_sum - es_left_sum,
q_excess_mean = q_right_mean - q_left_mean,
q_excess_median = q_right_median - q_left_median,
q_excess_sum = q_right_sum - q_left_sum
)]
alpha <- alpha[es_excess_mean < 100 & es_excess_sum < 100 & es_excess_mean > -100 & es_excess_sum > -100]
# basic series
ggplot(alpha, aes(x = date)) +
# geom_line(aes(y = es_excess_mean * 1000), color = 'blue') +
# geom_line(aes(y = es_excess_median * 1000), color = 'red') +
# geom_line(aes(y = es_excess_sum * 10), color = 'orange') +
geom_line(aes(y = q_excess_mean * 10000), color = 'purple') +
geom_line(aes(y = q_excess_median * 10000), color = 'brown') +
# geom_line(aes(y = q_excess_sum * 10), color = 'green') +
geom_line(aes(y = close))
ggplot(alpha[date %between% c('2007-01-01', '2010-01-01')], aes(x = date)) +
geom_line(aes(y = es_excess_mean * 100), color = 'blue') +
geom_line(aes(y = es_excess_median * 1000), color = 'red') +
geom_line(aes(y = es_excess_sum * 10), color = 'orange') +
geom_line(aes(y = q_excess_mean * 1000), color = 'purple') +
geom_line(aes(y = q_excess_median * 1000), color = 'brown') +
geom_line(aes(y = q_excess_sum * 100), color = 'green') +
geom_line(aes(y = close))
ggplot(alpha[date %between% c('2020-01-01', '2021-06-01')], aes(x = date)) +
geom_line(aes(y = es_excess_mean * 1000), color = 'blue') +
geom_line(aes(y = es_excess_median * 1000), color = 'red') +
geom_line(aes(y = es_excess_sum * 100), color = 'orange') +
geom_line(aes(y = q_excess_mean * 10000), color = 'purple') +
geom_line(aes(y = q_excess_median * 10000), color = 'brown') +
geom_line(aes(y = q_excess_sum * 100), color = 'green') +
geom_line(aes(y = close))
# diff
ggplot(alpha, aes(x = date)) +
geom_line(aes(y = (es_excess) * 1000), color = 'blue') +
geom_line(aes(y = close))
ggplot(alpha[date %between% c('2007-06-01', '2010-01-01')], aes(x = date)) +
geom_line(aes(y = (es_right - es_left) * 1000), color = 'blue') +
geom_line(aes(y = close))
ggplot(alpha[date %between% c('2020-01-01', '2021-01-01')], aes(x = date)) +
geom_line(aes(y = (es_excess) * 1000), color = 'blue') +
geom_line(aes(y = close))
ggplot(alpha[date %between% c('2015-01-01', '2016-01-01')], aes(x = date)) +
geom_line(aes(y = (es_excess) * 1000), color = 'blue') +
geom_line(aes(y = close))
# sma
ggplot(alpha, aes(x = date)) +
geom_line(aes(y = SMA(es_left, 10) * 1000), color = 'blue') +
geom_line(aes(y = SMA(es_left, 200) * 1000), color = 'red') +
geom_line(aes(y = close))
ggplot(alpha[date %between% c('2020-01-01', '2021-01-01')], aes(x = date)) +
geom_line(aes(y = SMA(es_left, 10) * 1000), color = 'blue') +
geom_line(aes(y = SMA(es_left, 200) * 1000), color = 'red') +
geom_line(aes(y = close))
ggplot(alpha[date %between% c('2007-01-01', '2010-01-01')], aes(x = date)) +
geom_line(aes(y = SMA(es_left, 10) * 1000), color = 'blue') +
geom_line(aes(y = SMA(es_left, 200) * 1000), color = 'red') +
geom_line(aes(y = close))
# backtest
side <- ifelse(shift(alpha$es_excess * 1000) < -25, 0, 1)
table(side)
returns_strategy <- side * alpha$returns
beckteset_data <- cbind(alpha[, .(date, returns)], returns_strategy)
beckteset_data <- na.omit(beckteset_data)
charts.PerformanceSummary(beckteset_data)
# PERFORMANCE ANALYTICS ---------------------------------------------------
#
PerformanceAnalytics::CVaR()
# GARCH MODEL -------------------------------------------------------------
# generate forecasts using rugarch package
.all_ok <- function(fit, spec) {
tryCatch({
valid_messages <- c( # nlminb
'X-convergence (3)',
'relative convergence (4)',
'both X-convergence and relative convergence (5)',
'singular convergence (7)',
'false convergence (8)'
)
all_ok <- all(is.null(fit) == FALSE, inherits(fit, 'try-error') == FALSE)
if (all_ok) {
message <- fit@fit$message
if (is.null(message)) {
all_ok <- TRUE
} else {
all_ok <- (message %in% valid_messages)
}
}
if (all_ok) {
spec_fix <- spec
setfixed(spec_fix) <- as.list(fit@fit$solver$sol$par)
filter <- ugarchfilter(spec=spec_fix, data=fit@model$modeldata$data)
forc <- ugarchforecast(spec_fix, fit@model$modeldata$data, n.ahead=1)
all_ok <- all(c(is.finite(c(fitted(forc), sigma(forc), quantile(forc, 0.01))), abs(quantile(forc, 0.01)) < 1))
}
if (all_ok) {
z <- (filter@model$modeldata[[2]] - fitted(filter)) / sigma(filter)
all_ok <- all(all(is.finite(z)), all(abs(z) < 1e6))
}
all_ok
}, error=function(cond) {FALSE})
}
get_forcecasts <- function(r, spec, alpha, win, refit) {
n <- nrow(r)
end_values <- seq(win + refit, n, by = refit)
nr_windows <- length(end_values)
out.sample <- rep(refit, nr_windows)
if (end_values[nr_windows] < n) {
end_values <- c(end_values, n)
nr_windows <- nr_windows + 1
out.sample <- c(out.sample, end_values[nr_windows] - end_values[nr_windows - 1])
}
forc <- foreach(i = 1:nr_windows, .combine = 'rbind') %do% {
# Subset data
r_ <- r[(end_values[i]-win-refit+1):end_values[i], ]
# Fit the model
fit <- try(ugarchfit(spec=spec, data=r_, out.sample=out.sample[i], solver='nlminb'), silent=TRUE)
if (!.all_ok(fit, spec)) {
fit <- try(ugarchfit(spec=spec, data=df, out.sample=out.sample[i], solver='solnp'), silent=TRUE)
}
if (.all_ok(fit, spec)) {
last_working_fit <- fit
} else {
print(paste0('restored model at window = ', i))
fit <- last_working_fit
}
# Forecasts of mean and variance
spec_fix <- spec
setfixed(spec_fix) <- as.list(fit@fit$solver$sol$par)
forc <- ugarchforecast(spec_fix, r_, n.ahead = 1, n.roll = out.sample[i] - 1, out.sample = out.sample[i])
m <- fitted(forc)
s <- sigma(forc)
m <- xts(as.numeric(m), order.by = tail(index(r_), out.sample[i]))
s <- xts(as.numeric(s), order.by = tail(index(r_), out.sample[i]))
# Quantile function of the innovations
qf <- function(x, spec) qdist(distribution=spec@model$modeldesc$distribution, p=x, mu=0, sigma=1,
shape=spec@model$fixed.pars$shape, skew=spec@model$fixed.pars$skew,
lambda=spec@model$fixed.pars$ghlambda)
# VaR and ES of the innovations
vq <- qf(alpha, spec_fix)
ve <- integrate(qf, 0, alpha, spec = spec_fix)$value / alpha
# VaR and ES of the returns
q <- m + s * vq
e <- m + s * ve
df <- cbind(tail(r_, out.sample[i]), m, s, q, e)
colnames(df) <- c('r', 'm', 's', 'q', 'e')
df
}
forc
}
# RISK FACTORS ------------------------------------------------------------
# prepare data
spy <- na.omit(as.data.table(spy)[, returns := (close / shift(close)) - 1][])
sp500_stocks[, returns := (close / shift(close)) - 1, by = .(`.id`)]
returns <- na.omit(sp500_stocks[, .(.id, index, returns)])
setnames(returns, colnames(returns), c('symbol', 'date', 'return'))
returns <- returns[returns[, .N, by = .(symbol)][N > 3000], on = "symbol"] # stock have at least 1 year of data
sorted_tickers <- unique(sp500_stocks$.id)[1:2]
# garch ES and VaR estimates
setup <- expand.grid(
variance.model = c('gjrGARCH'), # c('gjrGARCH', 'csGARCH')
distribution.model = c('std'), # c('std', 'ged', 'jsu')
last_x = 2500,
win = c(1000, 2000),
refit = c(5),
alpha = 0.025,
symbol = sorted_tickers,
stringsAsFactors = FALSE
)
tmp <- foreach(row = seq_along(seq_len(nrow(setup))), .errorhandling = 'pass') %dopar% {
st <- c(setup[row,])
file <- paste0('backtests/results/', paste(st, collapse = '-'), '.rds')
if (file.exists(file)) {next()} else {file.create(file)}
print(paste0('Processing ', st$symbol, ', win = ', st$win, ', refit = ', st$refit))
r <- returns %>%
dplyr::filter(symbol == st$symbol) %>%
tbl2xts::tbl_xts()
# r <- as.xts.data.table(r)
spec <- ugarchspec(
mean.model = list(armaOrder = c(0, 0), include.mean = FALSE),
variance.model = list(model = st$variance.model),
distribution.model = st$distribution.model
)
forc <- get_forcecasts(r = r, spec = spec, alpha = st$alpha, win = st$win, refit = st$refit)
saveRDS(list(forc = forc, settings = st), file)
print(paste0('Save results to ', file))
}
stopCluster(cl)
plan(multiprocess(4))
tmp <- future_lapply(1:nrow(setup), function(x) {
st <- c(setup[x,])
r <- returns[symbol == st$symbol]
setcolorder(r, c("date", "return"))
r <- as.xts.data.table(r)
spec <- ugarchspec(
mean.model = list(armaOrder = c(0, 0), include.mean = FALSE),
variance.model = list(model = st$variance.model),
distribution.model = st$distribution.model
)
forc <- get_forcecasts(r = r, spec = spec, alpha = st$alpha, win = st$win, refit = st$refit)
return(forc)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.