library(data.table)
library(timechange)
library(roll)
library(tiledb)
library(lubridate)
library(rtsplot)
library(TTR)
library(patchwork)
library(ggplot2)
library(AzureStor)
library(PerformanceAnalytics)
library(DescTools)
# UTILS -------------------------------------------------------------------
# date segments
GFC <- c("2007-01-01", "2010-01-01")
COVID <- c("2020-01-01", "2021-06-01")
AFTER_COVID <- c("2021-06-01", "2022-01-01")
NEW <- c("2022-01-01", as.character(Sys.Date()))
# IMPORT DATA -------------------------------------------------------------
# import market data
# arr <- tiledb_array("/home/matej/Desktop/stock_data/equity-usa-hour-fmpcloud-adjusted", as.data.frame = TRUE)
# system.time(hour_data <- arr[])
# tiledb_array_close(arr)
# hour_data_dt <- as.data.table(hour_data)
# hour_data_dt[, time := as.POSIXct(time, tz = "UTC")]
#
# # keep only trading hours
# hour_data_dt <- hour_data_dt[as.integer(time_clock_at_tz(time,
# tz = "America/New_York",
# units = "hours")) %in% 10:16]
#
# # clean data
# hour_data_dt[, returns := close / shift(close) - 1, by = "symbol"]
# hour_data_dt <- unique(hour_data_dt, by = c("symbol", "time"))
# hour_data_dt <- na.omit(hour_data_dt)
#
# # spy data
# spy <- hour_data_dt[symbol == "SPY", .(time, close, returns)]
# spy <- unique(spy, by = "time")
#
# # visualize
# rtsplot(as.xts.data.table(spy[, .(time, close)]))
# rtsplot(as.xts.data.table(hour_data_dt[symbol == "AAPL", .(time, close)]))
#
# MINMAX INDICATORS -------------------------------------------------------
# read old data
# market_data <- fread("/home/matej/Desktop/minmax_data_20221207.csv")
market_data <- fread("D:/risks/minmax/minmax_data_20221207.csv")
market_data <- as.data.table(market_data)
market_data[, time := as.POSIXct(time, tz = "UTC")]
# keep only trading hours
market_data <- market_data[as.integer(time_clock_at_tz(time,
tz = "America/New_York",
units = "hours")) %in% 10:16]
# clean data
market_data[, returns := close / shift(close) - 1, by = "symbol"]
market_data <- unique(market_data, by = c("symbol", "time"))
market_data <- na.omit(market_data)
# spy data
spy <- market_data[symbol == "SPY", .(time, close, returns)]
spy <- unique(spy, by = "time")
rm(arr)
rm(config)
rm(context_with_config)
rm(hour_data)
rm(hour_data_dt)
gc()
# exrtreme returns
cols <- colnames(market_data)[grep("^p_9", colnames(market_data))]
cols_new <- paste0("above_", cols)
market_data[, (cols_new) := lapply(.SD, function(x) ifelse(returns > shift(x), returns - shift(x), 0)),
by = .(symbol), .SDcols = cols]
cols <- colnames(market_data)[grep("^p_0", colnames(market_data))]
cols_new <- paste0("below_", cols)
market_data[, (cols_new) := lapply(.SD, function(x) ifelse(returns < shift(x), abs(returns - shift(x)), 0)),
by = .(symbol), .SDcols = cols]
# crate dummy variables
cols <- colnames(market_data)[grep("^p_9", colnames(market_data))]
cols_new <- paste0("above_dummy_", cols)
market_data[, (cols_new) := lapply(.SD, function(x) ifelse(returns > shift(x), 1, 0)), by = .(symbol), .SDcols = cols]
cols <- colnames(market_data)[grep("^p_0", colnames(market_data))]
cols_new <- paste0("below_dummy_", cols)
market_data[, (cols_new) := lapply(.SD, function(x) ifelse(returns < shift(x), 1, 0)), by = .(symbol), .SDcols = cols]
# get tail risk mesures with sum
cols <- colnames(market_data)[grep("below_p|above_p|dummy", colnames(market_data))]
indicators <- market_data[, lapply(.SD, function(x) sum(x, na.rm = TRUE)), by = .(time), .SDcols = cols]
colnames(indicators) <- c("time", paste0("sum_", cols))
setorder(indicators, time)
above_sum_cols <- colnames(indicators)[grep("above", colnames(indicators))]
below_sum_cols <- colnames(indicators)[grep("below", colnames(indicators))]
excess_sum_cols <- gsub("above", "excess", above_sum_cols)
indicators[, (excess_sum_cols) := indicators[, ..above_sum_cols] - indicators[, ..below_sum_cols]]
# get tail risk mesures with sd
cols <- colnames(market_data)[grep("below_p|above_p|dummy", colnames(market_data))]
indicators_sd <- market_data[, lapply(.SD, function(x) sd(x, na.rm = TRUE)), by = .(time), .SDcols = cols]
colnames(indicators_sd) <- c("time", paste0("sd_", cols))
setorder(indicators_sd, time)
above_sum_cols <- colnames(indicators_sd)[grep("above", colnames(indicators_sd))]
below_sum_cols <- colnames(indicators_sd)[grep("below", colnames(indicators_sd))]
excess_sum_cols <- gsub("above", "excess", above_sum_cols)
indicators_sd[, (excess_sum_cols) := indicators_sd[, ..above_sum_cols] - indicators_sd[, ..below_sum_cols]]
# merge indicators and spy
indicators <- merge(indicators, indicators_sd, by = c("time"), all.x = TRUE, all.y = FALSE)
indicators <- merge(indicators, spy, by = "time")
rm(market_data)
gc()
# OPTIMIZE STRATEGY -------------------------------------------------------
# params for returns
sma_width <- 1:60
threshold <- seq(-0.1, 0, by = 0.002)
vars <- colnames(indicators)[grep("sum_excess_p", colnames(indicators))]
paramset <- expand.grid(sma_width, threshold, vars, stringsAsFactors = FALSE)
colnames(paramset) <- c('sma_width', 'threshold', "vars")
# backtst function
backtest <- function(returns, indicator, threshold, return_cumulative = TRUE) {
sides <- vector("integer", length(indicator))
for (i in seq_along(sides)) {
if (i %in% c(1) || is.na(indicator[i-1])) {
sides[i] <- NA
} else if (indicator[i-1] < threshold) {
sides[i] <- 0
} else {
sides[i] <- 1
}
}
sides <- ifelse(is.na(sides), 1, sides)
returns_strategy <- returns * sides
if (return_cumulative) {
return(PerformanceAnalytics::Return.cumulative(returns_strategy))
} else {
return(returns_strategy)
}
}
library(Rcpp)
Rcpp::cppFunction("
double backtest_cpp(NumericVector returns, NumericVector indicator, double threshold) {
int n = indicator.size();
NumericVector sides(n);
for(int i=0; i<n; i++){
if(i==0 || R_IsNA(indicator[i-1])) {
sides[i] = 1;
} else if(indicator[i-1] < threshold){
sides[i] = 0;
} else {
sides[i] = 1;
}
}
NumericVector returns_strategy = returns * sides;
double cum_returns{ 1 + returns_strategy[0]} ;
for(int i=1; i<n; i++){
cum_returns *= (1 + returns_strategy[i]);
}
cum_returns = cum_returns - 1;
return cum_returns;
}
", rebuild = TRUE)
backtest_dummy <- function(returns, indicator, threshold, return_cumulative = TRUE) {
sides <- vector("integer", length(indicator))
for (i in seq_along(sides)) {
if (i %in% c(1) || is.na(indicator[i-1])) {
sides[i] <- NA
} else if (indicator[i-1] > threshold) {
sides[i] <- 0
} else if (indicator[i-1] <= threshold) {
sides[i] <- 1
}
}
sides <- ifelse(is.na(sides), 1, sides)
returns_strategy <- returns * sides
if (return_cumulative) {
return(PerformanceAnalytics::Return.cumulative(returns_strategy))
} else {
return(returns_strategy)
}
}
X <- as.data.frame(indicators[, .(returns, sum_excess_p_999_halfyear)])
X <- na.omit(X)
X$sum_excess_p_999_halfyear <- Winsorize(X$sum_excess_p_999_halfyear, probs = c(0.0001, 0.9999))
plot(X$sum_excess_p_999_halfyear, type = "l")
library(runner)
library(tsDyn)
library(parallel)
window_lengths <- c(7 * 22 * 6)
cl <- makeCluster(4)
clusterExport(cl, "X", envir = environment())
clusterEvalQ(cl, {library(tsDyn)})
roll_preds <- lapply(window_lengths, function(w) {
runner(
x = X,
f = function(x) {
# debug
# x = X[1:500, ]
# # TVAR (1)
tv1 <- tryCatch(
TVAR(data = x,
lag = 3, # Number of lags to include in each regime
model = "TAR", # Whether the transition variable is taken in levels (TAR) or difference (MTAR)
nthresh = 2, # Number of thresholds
thDelay = 1, # 'time delay' for the threshold variable
trim = 0.05, # trimming parameter indicating the minimal percentage of observations in each regime
mTh = 2, # combination of variables with same lag order for the transition variable. Either a single value (indicating which variable to take) or a combination
plot = FALSE),
error = function(e) NULL)
if (is.null(tv1)) {
tv1_pred_onestep <- NA
} else {
test <- tryCatch({
tv1_pred <- predict(tv1)[, 1]
names(tv1_pred) <- paste0("predictions_", 1:5)
data.frame(threshold = tv1$model.specific$Thresh,
predictions = data.frame(as.list(tv1_pred)))
}, error = function(e) NULL)
if (is.null(test)) {
y1 <<- x
y2 <<- tv1
print(tv1)
print(x)
stop("Error")
}
return(test)
}
},
k = w,
lag = 0L,
cl = cl,
na_pad = TRUE
)
})
stopCluster(cl)
gc()
#save results
file_name <- paste0("minmax_optimization", format.POSIXct(Sys.time(), format = "%Y%m%d%H%M%S"), ".rds")
file_name <- file.path("/home/matej/Desktop/minmax", file_name)
saveRDS(roll_preds, file_name)
# read results
# list.files("D:/features")
#roll_preds <- readRDS(file.path("D:/features", "exuber_threshold2_20221128054703.rds"))
# extract info from object
roll_results <- lapply(roll_preds, function(x) lapply(x, as.data.table))
roll_results <- lapply(roll_preds, function(l) {
lapply(l, function(x) {
if (length(x) > 1) {
threshold_1 <- x[1, 1]
threshold_2 <- x[2, 1]
predictions <- x[1, 2:6]
return(cbind.data.frame(threshold_1, threshold_2, predictions))
} else {
na_df <- cbind.data.frame(t(rep(NA, 7)))
colnames(na_df) <- c("threshold_1", "threshold_2",
"predictions.predictions_1",
"predictions.predictions_2",
"predictions.predictions_3",
"predictions.predictions_4",
"predictions.predictions_5")
return(na_df)
}
})
})
roll_results <- lapply(roll_results, rbindlist, fill = TRUE)
tvar_res <- lapply(roll_results, function(x){
cbind(indicators[1:nrow(x), .(time, returns, sum_excess_p_999_halfyear)], x)
})
tvar_res[[1]]$sum_excess_p_999_halfyear <- Winsorize(tvar_res[[1]]$sum_excess_p_999_halfyear,
probs = c(0.0001, 0.9999))
# visualize
ggplot(tvar_res[[1]], aes(time)) +
geom_line(aes(y = threshold_1), color = "green") +
geom_line(aes(y = threshold_2), color = "red") +
geom_line(aes(y = sum_excess_p_999_halfyear))
ggplot(tvar_res[[1]][time %between% c("2021-01-01", "2021-10-01")], aes(time)) +
geom_line(aes(y = threshold_1), color = "green") +
geom_line(aes(y = threshold_2), color = "red") +
geom_line(aes(y = sum_excess_p_999_halfyear))
# tvar_res<- as.data.frame(tvar_res)
# threshold based backtest
tvar_backtest <- function(tvar_res) {
returns <- tvar_res$returns
threshold_1 <- tvar_res$threshold_1
threshold_2 <- tvar_res$threshold_2
indicator <- tvar_res$sum_excess_p_999_halfyear
predictions <- tvar_res$predictions.predictions_1
sides <- vector("integer", length(predictions))
sides <- vector("integer", length(returns))
for (i in seq_along(sides)) {
if (i %in% c(1) || is.na(threshold_2[i-1]) || is.na(indicator[i-1])) {
sides[i] <- NA
} else if (indicator[i-1] < threshold_1[i-1]) {
# } else if (indicator[i-1] > threshold_2[i-1] & abs(predictions[i-1]) < 0.01) {
sides[i] <- 1
} else {
sides[i] <- 0
}
}
sides <- ifelse(is.na(sides), 1, sides)
returns_strategy <- returns * sides
returns_strategy
}
lapply(tvar_res, function(x) Return.cumulative(tvar_backtest(x)))
returns_strategy <- tvar_backtest(tvar_res[[1]])
Return.cumulative(returns_strategy)
returns <- tvar_res[[1]]$returns
charts.PerformanceSummary(xts(cbind(returns, returns_strategy), order.by = tvar_res[[1]]$time))
charts.PerformanceSummary(xts(tail(cbind(returns, returns_strategy), 5000), order.by = tail(tvar_res[[1]]$time, 5000)))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.