# Title: PRA min
# Author: Mislav Sagovac
# Description: PRA strategy on min frequency
# packages
library(tiledb)
library(data.table)
library(checkmate)
library(nanotime)
library(findata)
library(lubridate)
library(QuantTools)
library(ggplot2)
library(patchwork)
library(rextendr)
library(PerformanceAnalytics)
# SET UP ------------------------------------------------------------------
# check if we have all necessary env variables
assert_choice("AWS-ACCESS-KEY", names(Sys.getenv()))
assert_choice("AWS-SECRET-KEY", names(Sys.getenv()))
assert_choice("AWS-REGION", names(Sys.getenv()))
assert_choice("BLOB-ENDPOINT", names(Sys.getenv()))
assert_choice("BLOB-KEY", names(Sys.getenv()))
assert_choice("APIKEY-FMPCLOUD", names(Sys.getenv()))
assert_choice("FRED-KEY", names(Sys.getenv()))
# set credentials
config <- tiledb_config()
config["vfs.s3.aws_access_key_id"] <- Sys.getenv("AWS-ACCESS-KEY")
config["vfs.s3.aws_secret_access_key"] <- Sys.getenv("AWS-SECRET-KEY")
config["vfs.s3.region"] <- Sys.getenv("AWS-REGION")
context_with_config <- tiledb_ctx(config)
# parameters
windows = c(90, 90 * 5, 90 * 22, 90 * 22 * 6, 90 * 22 * 12, 90 * 22 * 24)
uri = "s3://predictors-pra5min" # if NULL, it calculates PRA values.
# UNIVERSE ----------------------------------------------------------------
# sp500 universe
fmp = FMP$new()
symbols <- fmp$get_sp500_symbols()
# PRA -------------------------------------------------------------
# calculate PRA
cols <- paste0("pr_", windows)
for (s in symbols) {
# debug
print(s)
# import existing data
if (tiledb_object_type("s3://predictors-pra5min") == "ARRAY") {
system.time({
arr <- tiledb_array(uri,
as.data.frame = TRUE,
query_layout = "UNORDERED",
attrs = c("close"),
selected_ranges = list(symbol = cbind(s, s)))
x <- arr[]
})
tiledb_array_close(arr)
# TODO: UPDATE IF GOOD
if (nrow(x) > 100) {
next
}
}
# import minute data
system.time({
arr <- tiledb_array("D:/equity-usa-minute-fmpcloud-adjusted",
as.data.frame = TRUE,
query_layout = "UNORDERED",
attrs = c("close"),
selected_ranges = list(symbol = cbind(s, s)))
x <- arr[]
})
tiledb_array_close(arr)
if (nrow(x) == 0) {
next
}
# clean table
x <- as.data.table(x)
attr(x$time, "tz") <- Sys.getenv("TZ")
x[, time := with_tz(time, "America/New_York")]
# keep trading hours
x <- x[as.ITime(time) %between% c(as.ITime("09:30:00"), as.ITime("15:59:00"))]
x <- unique(x, cols = c("symbol", "time"))
setorderv(x, c('symbol', 'time'))
# create 5 min data
x_ <- copy(x)
x_[, time := as.nanotime(time)]
x_5 <- x_[, .(close = tail(close, 1)),
by = .(symbol,
time = nano_ceiling(time, as.nanoduration("00:05:00")))]
x_5[, time := as.POSIXct(time, tz = "UTC")]
x_5[, time := with_tz(time, "America/New_York")]
# check N
if (nrow(x_5) < max(windows)) {
next
}
# calculate PRApra_save
system.time(x_5[, (cols) := lapply(windows, function(w) roll_percent_rank(close, w))])
# save to tiledb
x_save <- copy(x_5)
x_save[, time := with_tz(time, "UTC")]
if (tiledb_object_type(uri) != "ARRAY") {
fromDataFrame(
obj = as.data.frame(x_save),
uri = uri,
col_index = c("symbol", "time"),
sparse = TRUE,
tile_domain=list(time=c(as.POSIXct("1970-01-01 00:00:00", tz = "UTC"),
as.POSIXct("2099-12-31 23:59:59", tz = "UTC"))),
allows_dups = FALSE
)
} else {
arr <- tiledb_array(uri)
arr[] <- as.data.frame(x_save)
tiledb_array_close(arr)
}
}
# import pra indicators
arr <- tiledb_array(uri, as.data.frame = TRUE, query_layout = "UNORDERED")
system.time({pra <- arr[]})
tiledb_array_close(arr)
# free resources
gc()
# convert to data.table
pra <- as.data.table(pra)
# DUMMY -------------------------------------------------------------------
# dummy
cols_above_999 <- paste0("pr_above_dummy_", windows)
pra[, (cols_above_999) := lapply(.SD, function(x) ifelse(x > 0.999, 1, 0)), .SDcols = cols]
cols_below_001 <- paste0("pr_below_dummy_", windows)
pra[, (cols_below_001) := lapply(.SD, function(x) ifelse(x < 0.001, 1, 0)), .SDcols = cols]
cols_net_1 <- paste0("pr_below_dummy_net_", windows)
pra[, (cols_net_1) := pra[, ..cols_above_999] - pra[, ..cols_below_001]]
cols_above_99 <- paste0("pr_above_dummy_99_", windows)
pra[, (cols_above_99) := lapply(.SD, function(x) ifelse(x > 0.99, 1, 0)), .SDcols = cols]
cols_below_01 <- paste0("pr_below_dummy_01_", windows)
pra[, (cols_below_01) := lapply(.SD, function(x) ifelse(x < 0.01, 1, 0)), .SDcols = cols]
cols_net_2 <- paste0("pr_below_dummy_net_0199", windows)
pra[, (cols_net_2) := pra[, ..cols_above_99] - pra[, ..cols_below_01]]
cols_above_97 <- paste0("pr_above_dummy_97_", windows)
pra[, (cols_above_97) := lapply(.SD, function(x) ifelse(x > 0.97, 1, 0)), .SDcols = cols]
cols_below_03 <- paste0("pr_below_dummy_03_", windows)
pra[, (cols_below_03) := lapply(.SD, function(x) ifelse(x < 0.03, 1, 0)), .SDcols = cols]
cols_net_3 <- paste0("pr_below_dummy_net_0397", windows)
pra[, (cols_net_3) := pra[, ..cols_above_97] - pra[, ..cols_below_03]]
cols_above_95 <- paste0("pr_above_dummy_95_", windows)
pra[, (cols_above_95) := lapply(.SD, function(x) ifelse(x > 0.95, 1, 0)), .SDcols = cols]
cols_below_05 <- paste0("pr_below_dummy_05_", windows)
pra[, (cols_below_05) := lapply(.SD, function(x) ifelse(x < 0.05, 1, 0)), .SDcols = cols]
cols_net_4 <- paste0("pr_below_dummy_net_0595", windows)
pra[, (cols_net_4) := pra[, ..cols_above_95] - pra[, ..cols_below_05]]
# get risk measures
indicators <- pra[symbol != "SPY", lapply(.SD, sum, na.rm = TRUE),
.SDcols = c(colnames(pra)[grep("pr_\\d+", colnames(pra))],
cols_above_999, cols_above_99, cols_below_001, cols_below_01,
cols_above_97, cols_below_03, cols_above_95, cols_below_05,
cols_net_1, cols_net_2, cols_net_3, cols_net_4),
by = .(time)]
indicators <- unique(indicators, by = c("time"))
indicators[, time := force_tz(time, "America/New_York")]
setorder(indicators, "time")
# import spy data
# TODO: make function that imports minute data and automaticly upsample in required period
arr <- tiledb_array("D:/equity-usa-minute-fmpcloud-adjusted",
as.data.frame = TRUE,
query_layout = "UNORDERED",
attrs = c("close"),
selected_ranges = list(symbol = cbind("SPY", "SPY")))
spy <- arr[]
spy <- as.data.table(spy)
attr(spy$time, "tz") <- Sys.getenv("TZ")
spy[, time := with_tz(time, "America/New_York")]
spy <- spy[as.ITime(time) %between% c(as.ITime("09:30:00"), as.ITime("15:59:00"))]
spy <- unique(spy, cols = c("symbol", "time"))
setorderv(spy, c('symbol', 'time'))
# create 5 min SPY data
spy[, time := as.nanotime(time)]
spy_5 <- spy[, .(close = tail(close, 1)),
by = .(time = nano_ceiling(time, as.nanoduration("00:05:00")))]
spy_5[, time := as.POSIXct(time, tz = "UTC")]
spy_5[, time := with_tz(time, "America/New_York")]
# merge spy and pra
backtest_data <- spy_5[indicators, on = 'time']
backtest_data <- na.omit(backtest_data)
setorder(backtest_data, "time")
backtest_data[, returns := close / shift(close) - 1]
backtest_data <- na.omit(backtest_data)
# VISUALIZATION -----------------------------------------------------------
# date segments
GFC <- c("2007-01-01", "2010-01-01")
AFTERGFCBULL <- c("2010-01-01", "2015-01-01")
COVID <- c("2020-01-01", "2021-06-01")
AFTER_COVID <- c("2021-06-01", "2022-01-01")
NEW <- c("2022-01-01", "2022-03-15")
# check individual stocks
sample_ <- pra[symbol == "AAPL"]
v_buy <- sample_[pr_above_dummy_95_1980 == 1, time, ]
v_sell <- sample_[pr_below_dummy_05_1980 == 1, time, ]
ggplot(sample_, aes(x = time)) +
geom_line(aes(y = close)) +
geom_vline(xintercept = v_buy, color = "green") +
geom_vline(xintercept = v_sell, color = "red")
ggplot(sample_[time %between% GFC], aes(x = time)) +
geom_line(aes(y = close)) +
geom_vline(xintercept = v_buy, color = "green") +
geom_vline(xintercept = v_sell, color = "red")
ggplot(sample_[datetime %between% AFTERGFCBULL], aes(x = datetime)) +
geom_line(aes(y = close)) +
geom_vline(xintercept = v_buy, color = "green") +
geom_vline(xintercept = v_sell, color = "red")
ggplot(sample_[datetime %between% COVID], aes(x = datetime)) +
geom_line(aes(y = close)) +
geom_vline(xintercept = v_buy, color = "green") +
geom_vline(xintercept = v_sell, color = "red")
ggplot(sample_[datetime %between% AFTER_COVID], aes(x = datetime)) +
geom_line(aes(y = close)) +
geom_vline(xintercept = v_buy, color = "green") +
geom_vline(xintercept = v_sell, color = "red")
ggplot(sample_[datetime %between% NEW], aes(x = datetime)) +
geom_line(aes(y = close)) +
geom_vline(xintercept = v_buy, color = "green") +
geom_vline(xintercept = v_sell, color = "red")
# plots sum for backtest data
g1 <- ggplot(backtest_data, aes(x = time)) +
geom_line(aes(y = pr_below_dummy_90))
g2 <- ggplot(backtest_data, aes(x = time)) +
geom_line(aes(y = pr_below_dummy_450))
g3 <- ggplot(backtest_data, aes(x = time)) +
geom_line(aes(y = pr_below_dummy_1980))
g4 <- ggplot(backtest_data, aes(x = time)) +
geom_line(aes(y = pr_below_dummy_11880))
( g1 | g2 ) / ( g3 | g4 )
# plots sum 95 / 05
g1 <- ggplot(backtest_data, aes(x = time)) +
geom_line(aes(y = pr_below_dummy_05_90)) +
geom_line(aes(y = pr_above_dummy_95_90), color ="red")
g2 <- ggplot(backtest_data, aes(x = time)) +
geom_line(aes(y = pr_below_dummy_05_450)) +
geom_line(aes(y = pr_above_dummy_95_450), color ="red")
g3 <- ggplot(backtest_data, aes(x = time)) +
geom_line(aes(y = pr_below_dummy_05_1980)) +
geom_line(aes(y = pr_above_dummy_95_1980), color ="red")
g4 <- ggplot(backtest_data, aes(x = time)) +
geom_line(aes(y = pr_below_dummy_05_11880)) +
geom_line(aes(y = pr_above_dummy_95_11880), color ="red")
( g1 | g2 ) / ( g3 | g4 )
# net
g1 <- ggplot(backtest_data, aes(x = time)) +
geom_line(aes(y = pr_below_dummy_net_90))
g2 <- ggplot(backtest_data, aes(x = time)) +
geom_line(aes(y = pr_below_dummy_net_450))
g3 <- ggplot(backtest_data, aes(x = time)) +
geom_line(aes(y = pr_below_dummy_net_1980))
g4 <- ggplot(backtest_data, aes(x = time)) +
geom_line(aes(y = pr_below_dummy_net_11880))
( g1 | g2 ) / ( g3 | g4 )
# BACKTEST ----------------------------------------------------------------
# backtest function
backtest <- function(returns, indicator, threshold, return_cumulative = TRUE) {
sides <- vector("integer", length(indicator))
for (i in seq_along(sides)) {
if (i %in% c(1) || is.na(indicator[i-1])) {
sides[i] <- NA
} else if (indicator[i-1] > threshold) {
sides[i] <- 0
} else {
sides[i] <- 1
}
}
sides <- ifelse(is.na(sides), 1, sides)
returns_strategy <- returns * sides
if (return_cumulative) {
return(PerformanceAnalytics::Return.cumulative(returns_strategy))
} else {
return(returns_strategy)
}
}
library(Rcpp)
Rcpp::cppFunction("
double backtest_cpp(NumericVector returns, NumericVector indicator, double threshold) {
int n = indicator.size();
NumericVector sides(n);
for(int i=0; i<n; i++){
if(i==0 || R_IsNA(indicator[i-1])) {
sides[i] = 1;
} else if(indicator[i-1] > threshold){
sides[i] = 0;
} else {
sides[i] = 1;
}
}
NumericVector returns_strategy = returns * sides;
double cum_returns{ 1 + returns_strategy[0]} ;
for(int i=1; i<n; i++){
cum_returns *= (1 + returns_strategy[i]);
}
cum_returns = cum_returns - 1;
return cum_returns;
}
", rebuild = TRUE)
# optimizations loop
thresholds <- c(seq(0, 100, 1))
colnames(backtest_data)
variables <- colnames(indicators)[grep("below_dummy_net", colnames(indicators))]
params <- expand.grid(thresholds, variables, stringsAsFactors = FALSE)
returns_strategies <- list()
returns_ <- backtest_data$returns
vars_ <- params$Var2
x <- vapply(1:nrow(params), function(i) backtest_cpp(returns_,
backtest_data[, get(vars_[i])],
# SMA(backtest_data[, get(params[i, 2])], 4),
params[i, 1]),
numeric(1))
optim_results <- cbind(params, cum_return = x)
setorder(optim_results, cum_return)
# inspect
tail(optim_results[order(optim_results$cum_return), ], 50)
ggplot(optim_results[grep("dummy_\\d+$", optim_results$Var2),], aes(x = cum_return)) + geom_histogram() +
geom_vline(xintercept = 4.1, color = "red") + facet_grid(. ~ Var2)
ggplot(optim_results[grep("01", optim_results$Var2),], aes(x = cum_return)) + geom_histogram() +
geom_vline(xintercept = 4.1, color = "red") + facet_grid(. ~ Var2)
ggplot(optim_results[grep("03", optim_results$Var2),], aes(x = cum_return)) + geom_histogram() +
geom_vline(xintercept = 4.1, color = "red") + facet_grid(. ~ Var2)
ggplot(optim_results[grep("03", optim_results$Var2),], aes(x = cum_return)) + geom_histogram() +
geom_vline(xintercept = 4.1, color = "red") + facet_grid(. ~ Var2)
summary_results <- as.data.table(optim_results)
summary_results[, median(cum_return), by = Var2]
# for net
unique(optim_results$Var2)
ggplot(optim_results[grep("net_40|net_176|net_528|net_1056|net_2112|net_4224", optim_results$Var2),], aes(x = cum_return)) +
geom_histogram() +
geom_vline(xintercept = 4.1, color = "red") + facet_grid(. ~ Var2)
ggplot(optim_results[grep("net_01|net_40|net_176|net_528|net_1056|net_2112|net_4224", optim_results$Var2),], aes(x = cum_return)) +
geom_histogram() +
geom_vline(xintercept = 4.1, color = "red") + facet_grid(. ~ Var2)
ggplot(optim_results[grep("net_03|net_05", optim_results$Var2),], aes(x = cum_return)) + geom_histogram() +
geom_vline(xintercept = 4.1, color = "red") + facet_grid(. ~ Var2)
# backtest individual
strategy_returns <- backtest(returns_, backtest_data$pr_below_dummy_90, 71, return_cumulative = FALSE)
charts.PerformanceSummary(xts(cbind(returns_, strategy_returns), order.by = backtest_data$time))
charts.PerformanceSummary(xts(cbind(backtest_data$returns, strategy_returns)[30000:length(strategy_returns), ],
order.by = backtest_data$datetime[30000:length(strategy_returns)]))
Performance(xts(strategy_returns, order.by = backtest_data$datetime))
Performance(xts(backtest_data$returns, order.by = backtest_data$datetime))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.