# session options
options(repr.plot.width = 14, repr.plot.height=7, warn = -1)
# library(tidyverse)
# library(tibbletime)
# chart options
theme_update(text = element_text(size = 20))
perps = fread("https://github.com/Robot-Wealth/r-quant-recipes/raw/master/quantifying-combining-alphas/binance_perp_daily.csv")
# Create universe
universe_dt = perps[, let(
trail_volume = roll_mean(dollar_volume, 30),
total_fwd_return_simple = shift(funding_rate, -1, type = "shift") +
(shift(close, -1, type = "shift") - close) / close
), by = ticker]
universe_dt = perps[, let(
total_fwd_return_simple_2 = shift(total_fwd_return_simple, -1, type = "shift"),
total_fwd_return_log = log(1 + total_fwd_return_simple)
), by = ticker]
universe_dt = na.omit(universe_dt)
universe_dt[, volume_decile := dplyr::ntile(trail_volume, 10), by = date] # TODO Find data.table way to do this
universe_dt[, is_universe := volume_decile >= 3]
# Plot Universe size
universe_dt[, .(count = .N), by = .(date, is_universe)] |>
ggplot(aes(x = date, y = count, color = is_universe)) +
geom_line() +
labs(title = 'Universe size')
# # Checks
# test = as.data.table(universe)[universe_dt, on = c("ticker", "date")]
# test[, all(close == i.close)]
# test[, all(trail_volume == i.trail_volume)]
# test[, all(volume_decile == i.volume_decile)]
# test[, .(volume_decile, i.volume_decile)]
# test[, all(is_universe == i.is_universe)]
# Create simple features
setorder(universe_dt, ticker, date)
features_dt = universe_dt[, let(
breakout = 9.5 - frollapply(close, 20, function(x) {
idx_of_high = which.max(x)
days_since_high = length(x) - idx_of_high
momo = close - shift(close, 10, type = "lag") / close,
carry = funding_rate
), by = ticker]
features_dt = na.omit(features_dt)
# # Checks
# test = as.data.table(features)[features_dt, on = c("ticker", "date")]
# test = na.omit(test)
# test[, all(close == i.close)]
# test[, all(carry == i.carry)]
# test[, all(momo == i.momo)]
# test[, all(breakout == i.breakout)]
# Plot features
melt(features_dt[is_universe == TRUE],
id.vars = setdiff(names(features_dt[is_universe == TRUE]), c("breakout", "momo", "carry")),
variable.name = "feature",
value.name = "value",
measure.vars = c("breakout", "momo", "carry")) |>
ggplot(aes(x = value, colour = feature)) +
geom_density() +
facet_wrap(~feature, scales = "free")
# Scale features
features_scaled_dt = features_dt[is_universe == TRUE][
, let(
demeaned_fwd_returns = total_fwd_return_simple - mean(total_fwd_return_simple),
zscore_carry = (carry - mean(carry, na.rm = TRUE)) / sd(carry, na.rm = TRUE),
decile_carry = dplyr::ntile(carry, 10),
zscore_momo = (momo - mean(momo, na.rm = TRUE)) / sd(momo, na.rm = TRUE),
decile_momo = dplyr::ntile(momo, 10)
by = date
features_scaled_dt = na.omit(features_scaled_dt)
# # Checks
# test = as.data.table(features_scaled)[features_scaled_dt, on = c("ticker", "date")]
# test = na.omit(test)
# test[, all(zscore_carry == i.zscore_carry)]
# test[, all(decile_carry == i.decile_carry)]
# test[, all(zscore_momo == i.zscore_momo)]
# test[, all(decile_momo == i.decile_momo)]
# Factor plot of the decile_carry feature against next day relative returns
features_scaled_dt[, .(mean_return = mean(mean(demeaned_fwd_returns))), by = decile_carry] |>
ggplot(aes(x = factor(decile_carry), y = mean_return)) +
geom_bar(stat = "identity") +
x = "Carry Decile",
y = "Cross-Sectional Return",
title = "Carry decile feature vs next-day cross-sectional return"
# Factor plot of the breakout feature feature against next day relative returns
features_scaled_dt[, .(mean_return = mean(mean(total_fwd_return_simple))), by = breakout] |>
ggplot(aes(x = breakout, y = mean_return)) +
geom_bar(stat = "identity") +
x = "Carry Decile",
y = "Cross-Sectional Return",
title = "Carry decile feature vs next-day cross-sectional return"
# Factor plot of the momentum feature feature against next day relative returns
features_scaled_dt[, .(mean_return = mean(mean(demeaned_fwd_returns))), by = decile_momo] |>
ggplot(aes(x = decile_momo, y = mean_return)) +
geom_bar(stat = "identity") +
x = "Carry Decile",
y = "Cross-Sectional Return",
title = "Carry decile feature vs next-day cross-sectional return"
# Information coefficient
id.vars = setdiff(names(features_scaled_dt),
c("breakout", "zscore_carry", "zscore_momo", "decile_carry", "decile_momo")),
variable.name = "feature",
value.name = "value",
measure.vars = c("breakout", "zscore_carry", "zscore_momo", "decile_carry", "decile_momo")) |>
_[, .(IC = cor(value, demeaned_fwd_returns)), by = feature] |>
ggplot(aes(x = factor(feature, levels = c('breakout', 'zscore_carry', 'decile_carry', 'zscore_momo', 'decile_momo')), y = IC)) +
geom_bar(stat = "identity") +
x = "Feature",
y = "IC",
title = "Relative Return Information Coefficient"
id.vars = setdiff(names(features_scaled_dt),
c("breakout", "zscore_carry", "zscore_momo", "decile_carry", "decile_momo")),
variable.name = "feature",
value.name = "value",
measure.vars = c("breakout", "zscore_carry", "zscore_momo", "decile_carry", "decile_momo")) |>
_[, .(IC = cor(value, total_fwd_return_simple)), by = feature] |>
ggplot(aes(x = factor(feature, levels = c('breakout', 'zscore_carry', 'decile_carry', 'zscore_momo', 'decile_momo')), y = IC)) +
geom_bar(stat = "identity") +
x = "Feature",
y = "IC",
title = "Relative Return Information Coefficient"
# Decay
setorder(features_scaled_dt, ticker, date)
features_scaled_decay_dt = features_scaled_dt[is_universe == TRUE][, let(
demeaned_fwd_returns_2 = shift(demeaned_fwd_returns, -1, type = "shift"),
demeaned_fwd_returns_3 = shift(demeaned_fwd_returns, -2, type = "shift"),
demeaned_fwd_returns_4 = shift(demeaned_fwd_returns, -3, type = "shift"),
demeaned_fwd_returns_5 = shift(demeaned_fwd_returns, -4, type = "shift"),
demeaned_fwd_returns_6 = shift(demeaned_fwd_returns, -5, type = "shift")
), by = ticker]
features_scaled_decay_dt = na.omit(features_scaled_decay_dt)
features_scaled_decay_dt = melt(features_scaled_decay_dt,
id.vars = setdiff(names(features_scaled_decay_dt),
c("zscore_carry", "zscore_momo", "decile_carry", "decile_momo")),
variable.name = "feature",
value.name = "value",
measure.vars = c("zscore_carry", "zscore_momo", "decile_carry", "decile_momo"))
features_scaled_decay_dt[, .(
IC_1 = cor(value, demeaned_fwd_returns),
IC_2 = cor(value, demeaned_fwd_returns_2),
IC_3 = cor(value, demeaned_fwd_returns_3),
IC_4 = cor(value, demeaned_fwd_returns_4),
IC_5 = cor(value, demeaned_fwd_returns_5),
IC_6 = cor(value, demeaned_fwd_returns_6)
), by = feature] |>
id.vars = "feature",
measure.vars = setdiff(names(dt), "feature"),
variable.name = "IC_period",
value.name = "IC") |>
ggplot(aes(x = factor(IC_period), y = IC, colour = feature, group = feature)) +
geom_line() +
geom_point() +
title = "IC by forward period against relative returns",
x = "IC period"
# Time series decay
setorder(features_scaled_dt, ticker, date)
features_scaled_decay_dt = features_scaled_dt[is_universe == TRUE][, let(
fwd_returns_2 = shift(total_fwd_return_simple, -1, type = "shift"),
fwd_returns_3 = shift(total_fwd_return_simple, -2, type = "shift"),
fwd_returns_4 = shift(total_fwd_return_simple, -3, type = "shift"),
fwd_returns_5 = shift(total_fwd_return_simple, -4, type = "shift"),
fwd_returns_6 = shift(total_fwd_return_simple, -5, type = "shift")
), by = ticker]
features_scaled_decay_dt = na.omit(features_scaled_decay_dt)
features_scaled_decay_dt = melt(features_scaled_decay_dt,
id.vars = setdiff(names(features_scaled_decay_dt),
variable.name = "feature",
value.name = "value",
measure.vars = c("breakout"))
features_scaled_decay_dt[, .(
IC_1 = cor(value, total_fwd_return_simple),
IC_2 = cor(value, fwd_returns_2),
IC_3 = cor(value, fwd_returns_3),
IC_4 = cor(value, fwd_returns_4),
IC_5 = cor(value, fwd_returns_5),
IC_6 = cor(value, fwd_returns_6)
), by = feature] |>
id.vars = "feature",
measure.vars = setdiff(names(dt), "feature"),
variable.name = "IC_period",
value.name = "IC") |>
ggplot(aes(x = factor(IC_period), y = IC, colour = feature, group = feature)) +
geom_line() +
geom_point() +
title = "IC by forward period against relative returns",
x = "IC period"
# Start simulation from date we first have n tickers in the universe
min_trading_universe_size = 10
start_date = features_dt[, .(count = .N), by = .(date, is_universe)][order(date)]
start_date = start_date[count >= min_trading_universe_size]
start_date = start_date[, first(date)]
model_dt = features_dt[is_universe == TRUE]
model_dt = model_dt[date >= start_date]
model_dt[, let(
carry_decile = ntile(carry, 10),
momo_decile = ntile(momo, 10)
), by = date]
model_dt[, let(
carry_weight = (carry_decile - 5.5),
momo_weight = -(momo_decile - 5.5),
breakout_weight = breakout / 2
), by = date]
model_dt[, let(
combined_weight = (0.5*carry_weight + 0.2*momo_weight + 0.3*breakout_weight)
), by = date]
model_dt[, let(
scaled_weight = if_else(combined_weight == 0, 0, combined_weight/sum(abs(combined_weight)))
), by = date]
returns_plot = model_dt[, .(
returns = scaled_weight * total_fwd_return_simple
), by = date]
returns_plot = returns_plot[, .(logreturns = log(returns + 1)), by = date]
setorder(returns_plot, date)
returns_plot = ggplot(returns_plot, aes(x=date, y=cumsum(logreturns))) +
geom_line() +
title = 'Combined Carry, Momentum, and Trend Model on top 80% Perp Universe',
subtitle = "Unleveraged returns",
x = "",
y = "Cumulative return"
weights_plot = model_dt[, .(total_weight = sum(scaled_weight)), by = date]
weights_plot = weights_plot |>
ggplot(aes(x = date, y = total_weight)) +
geom_line() +
labs(x = "Date", y = "Portfolio net weight")
returns_plot / weights_plot + plot_layout(heights = c(2,1))
# CHAPTER 2 ---------------------------------------------------------------
