library(evir)
library(data.table)
library(Rcpp)
library(purrr)
library(ggplot2)
library(cowplot)
library(highfrequency)
library(quantmod)
library(dpseg)
library(future.apply)
library(anytime)
library(PerformanceAnalytics)
library(roll)
library(TTR)
library(simfinapi)
library(gridExtra)
library(forecast)
library(tseries)
library(rugarch)
library(GAS)
library(parallel) # for the GASS package
library(BatchGetSymbols) # import data
source('C:/Users/Mislav/Documents/GitHub/alphar/R/parallel_functions.R')
source('C:/Users/Mislav/Documents/GitHub/alphar/R/outliers.R')
source('C:/Users/Mislav/Documents/GitHub/alphar/R/import_data.R')
source('C:/Users/Mislav/Documents/GitHub/alphar/R/execution.R')
source('C:/Users/Mislav/Documents/GitHub/alphar/R/features.R')
# performance
plan(multiprocess(workers = availableCores() - 10)) # multiprocess(workers = availableCores() - 8)
# PARAMETERS --------------------------------------------------------------
# before backcusum function
contract = 'SPY'
freq = 5 # 1 is one minute, 60 is hour, 480 is day etc
upsample = FALSE
add_fundamentals = FALSE
add_vix = FALSE
# model parameters
roll_window = c(2500, 5000, 7500, 10000, 20000)
thresholds <- c(0.0008, 0.001, 0.0012)
# backtest
sma_long <- c(seq(100, 1000, 100), seq(1500, 10000, 1500))
sma_short <- c(10, 50, 100)
# IMPORT DATA -------------------------------------------------------------
if (freq < 60) {
paste0(contract, freq)
# HFD
market_data <- import_mysql(
contract = paste0(contract, freq),
save_path = 'D:/market_data/usa/ohlcv',
trading_days = TRUE,
upsample = upsample,
RMySQL::MySQL(),
dbname = 'odvjet12_market_data_usa',
username = 'odvjet12_mislav',
password = 'Theanswer0207',
host = '91.234.46.219'
)
vix <- import_mysql(
contract = paste0(contract, freq),
save_path = 'D:/market_data/usa/ohlcv',
trading_days = TRUE,
upsample = upsample,
RMySQL::MySQL(),
dbname = 'odvjet12_market_data_usa',
username = 'odvjet12_mislav',
password = 'Theanswer0207',
host = '91.234.46.219'
)
} else if (freq == 480) {
market_data <- BatchGetSymbols(tickers = contract, first.date = '2000-01-01')[[2]]
market_data <- xts::xts(market_data[, c(1:6, 9, 10)], order.by = market_data$ref.date)
market_data$returns <- market_data$ret.adjusted.prices
} else {
# LFD
market_data <- import_ib(
contract = contract,
frequencies = ifelse(freq == 60, '1 hour', '1 day'),
duration = '30 Y',
type = 'equity',
trading_days = FALSE,
what_to_show = 'TRADES'
)
colnames(market_data) <- c('open', 'high', 'low', 'close', 'volume', 'WAP', 'hasGaps', 'count')
vix <- import_ib(
contract = 'VIX',
frequencies = ifelse(freq == 60, '1 hour', '1 day'),
duration = '30 Y',
type = 'index',
trading_days = FALSE
)
colnames(vix) <- c('open', 'high', 'low', 'close', 'volume', 'WAP', 'hasGaps', 'count')
}
# FUNDAMENTALS
if (add_fundamentals) {
fundamentals <- simfinapi::sfa_get_statement(Ticker = contract,
statement = 'all',
period = 'quarters',
ttm = TRUE,
shares = TRUE,
api_key = '8qk9Xc9scFc0Rbpfrx6PLdaiomvi7Dxc')
if (!is.null(fundamentals)) {
fundamentals <- janitor::clean_names(fundamentals)
fundamentals <- fundamentals[, .(publish_date, shares, sales_per_share)]
fundamentals <- xts::xts(fundamentals[, 2:ncol(fundamentals)], order.by = as.POSIXct(fundamentals$publish_date))
market_data <- merge(market_data, fundamentals, all = TRUE)
market_data$sales_per_share <- zoo::na.locf(market_data$sales_per_share)
market_data <- market_data[!is.na(market_data$close), ]
market_data$ps <- market_data$close / market_data$sales_per_share
}
}
# PREPROCESSING -----------------------------------------------------------
# Remove outliers
market_data <- remove_outlier_median(market_data, median_scaler = 25)
# merge market data and VIX
if (add_vix) {
market_data <- merge(market_data, vix[, 'close'], join = 'left')
colnames(market_data)[ncol(market_data)] <- 'vix'
market_data <- na.omit(market_data)
}
# Add features
market_data <- add_features(market_data)
# Remove NA values
market_data <- na.omit(market_data)
head(market_data)
# ROLL GPD ----------------------------------------------------------------
# roll gpd
gpd_roll <- lapply(roll_window,
function(x) {
lapply(thresholds, function(y) {
result <- frollapply_parallel(
y = (market_data$returns * -1),
n_cores = -1,
roll_window = x,
FUN = function(x) {
es <- evir::gpd(x, threshold = y, method = 'ml', information = 'expected')
evir::riskmeasures(es, c(0.999))[, 3]
},
fill = NA,
align = 'right'
)
result <- lapply(result, function(x) x[!is.na(x)])
result <- unlist(purrr::flatten(result))
result <- c(rep(NA, x-1), result)
result <- cbind(market_data, es = result)
na.omit(result)
})
})
# names
gpd_roll_names <- lapply(roll_window,
function(x) {
lapply(thresholds, function(y) {
paste0(contract, '-', x, '-', y*10000)
})
})
gpd_roll_names <- unlist(gpd_roll_names)
# results
results <- purrr::flatten(gpd_roll)
# gpd_roll <- function(data, window_size = 5000) {
# input_vector <- as.data.table(data)$close * -1
# print(head(input_vector))
# result <- slider_parallel(
# .x = input_vector,
# .f = ~ {
# es <- evir::gpd(.x, threshold = 0.001, method = 'ml', information = 'expected')
# es <- evir::riskmeasures(es, c(0.99, 0.999))[, 3]
# return(es)
# },
# .before = window_size - 1,
# .complete = TRUE,
# n_cores = -1
# )
# # result <- lapply(result, function(x) x[!is.na(x)])
# # result <- unlist(purrr::flatten(result))
# # result <- c(rep(NA, window_size-1), result)
# # result <- cbind(data, es = result)
# # result <- na.omit(result)
# return(result)
# }
# plot
# data_plot <- as.data.table(results[, c('close', 'es')])
# ggplot(data_plot, aes(x = index, y = close, colour = es > 0.015)) +
# geom_line(aes(group = 1), size= 1.1)
# es_emas <- ggplot(data_plot, aes(x = index)) +
# geom_line(aes(y = es), colour = 'green') +
# geom_line(aes(y = TTR::EMA(es, 10)), colour = 'red') +
# geom_line(aes(y = TTR::EMA(es, 100)), colour = 'blue')
# es_smas <- ggplot(data_plot, aes(x = index)) +
# geom_line(aes(y = es), colour = 'green') +
# geom_line(aes(y = TTR::SMA(es, 50)), colour = 'red') +
# geom_line(aes(y = TTR::SMA(es, 500)), colour = 'orange') +
# geom_line(aes(y = TTR::SMA(es, 1000)), colour = 'blue') +
# geom_line(aes(y = TTR::SMA(es, 10000)), colour = 'black')
# ggplot(data_plot, aes(x = index, y = close, colour = es > 0.005 & TTR::SMA(es, 500) > TTR::SMA(es, 50))) +
# geom_line(aes(group = 1), size= 1.1)
# head(data_plot)
# CUSUM -------------------------------------------------------------------
# backcusum roll
# back_cusum_test <- function(data, col_name, window_size = 100, side = c('greater', 'less')) {
# bc <- slider_parallel(
# .x = as.data.table(data),
# .f = ~ {
# formula_ <- paste0('.$', col_name, ' ~ 1')
# backCUSUM::SBQ.test(stats::as.formula(formula_), alternative = side)[['statistic']]
# },
# .before = window_size - 1,
# .complete = TRUE,
# n_cores = -1
# )
# bc <- unlist(bc)
# bc <- c(rep(NA, window_size - 1), bc)
# return(bc)
# }
#
# # backcusum roll for chooosen variables
# results$es_greater <- back_cusum_test(results, 'es', 150, side = 'greater')
# results$es_greater_sma <- TTR::SMA(results$es_greater, 100)
# results$es_greater_sma_q <- roll::roll_quantile(
# results$es_greater_sma, width = 12*8*200, p = 0.95, min_obs = 12*8*30)
# plot(results$es_greater)
# plot(results$es_greater_sma)
# quantile(results$es_greater, seq(0, 1, 0.05), na.rm = TRUE)
# quantile(results$es_greater_sma, seq(0, 1, 0.05), na.rm = TRUE)
# BACKTEST ----------------------------------------------------------------
# backtet functoin
backtest <- function(data, sma_long, sma_short) {
# trading rule
indicator <- as.vector(zoo::coredata(data$es))
# indicator_q <- roll::roll_quantile(indicator, 12*8*255*5, p = 0.90, min_obs = 12*8*255)
indicator_sma_long <- as.vector(zoo::coredata(TTR::SMA(indicator, sma_long)))
indicator_sma_short <- as.vector(zoo::coredata(TTR::SMA(indicator, sma_short)))
side <- vector(mode = 'integer', length = length(indicator))
for (i in 1:length(indicator)) {
if (i == 1 || is.na(indicator_sma_long[i-1])) {
side[i] <- NA
} else if (indicator[i-1] > 0.015 & (indicator_sma_short[i-1] > indicator_sma_long[i-1])) {
side[i] <- 0
} else {
side[i] <- 1
}
}
# merge
returns_strategy <- xts::xts(data$returns * side, order.by = zoo::index(data))
perf <- na.omit(merge(data[, 'returns'], returns_strategy = returns_strategy))
colnames(perf)[ncol(perf)] <- 'returns_strategy'
return(perf)
}
# sequence of backtests
backtests <- lapply(results, function(x) {
lapply(sma_long, function(y) {
lapply(sma_short, function(z) {
backtest(x, y, z)
})
})
})
backtests <- purrr::flatten(purrr::flatten(backtests))
backtests_all <- do.call(cbind, backtests)
backtests_all <- backtests_all[, c(1, seq(2, ncol(backtests_all), 2))]
# columns names
backtests_names <- lapply(gpd_roll_names, function(x) {
lapply(sma_long, function(y) {
lapply(sma_short, function(z) {
paste0('returns-', x, '-', y, '-', z)
})
})
})
backtests_names <- unlist(backtests_names)
colnames(backtests_all) <- gsub('-', '_', c('returns', backtests_names))
# perfrmance
cumreturns <- t(Return.cumulative(backtests_all))
cumreturns <- data.table(model = row.names(cumreturns), cumreturns = cumreturns)
cumreturns <- cumreturns[, c('type', 'contract', 'window', 'p', 'sma_long', 'sma_short') := data.table::tstrsplit(cumreturns$model, split = '_')]
colnames(cumreturns)[2] <- 'return'
cumreturns[, mean(return), by = .(window)]
cumreturns[, mean(return), by = .(p)]
cumreturns[, mean(return), by = .(sma_long)]
cumreturns[, mean(return), by = .(sma_short)]
cumreturns[, mean(return), by = .(sma_long, sma_short)]
cumreturns[, median(return), by = .(sma_long, sma_short)]
# tet <- table.Drawdowns(backtests_all[, 10])
# plots <- charts.PerformanceSummary(backtests[, c('returns', 'returns_strategy')], plot.engine = 'ggplot2')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.