Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
fig.width = 7,
fig.height = 5,
dpi = 96,
message = FALSE,
warning = FALSE
)
set.seed(1)
# CRAN vs local
heavy <- identical(Sys.getenv("NOT_CRAN"), "true") # CRAN sets this to "false"
# Optional backends (Suggests)
has_xgb <- requireNamespace("xgboost", quietly = TRUE)
has_rf <- requireNamespace("ranger", quietly = TRUE)
# Sequences are OFF by default (CRAN-safe). Opt-in locally with:
# Sys.setenv(RUN_SEQ = "true") # or RUN_KERAS_VIGNETTES = "true" (back-compat)
run_seq <- FALSE
run_seq_env <- tolower(Sys.getenv("RUN_SEQ", Sys.getenv("RUN_KERAS_VIGNETTES", "false")))
if (heavy && run_seq_env %in% c("true","1","yes") && requireNamespace("keras", quietly = TRUE)) {
# only now ask Keras if it's actually usable (this can spin up TF)
run_seq <- isTRUE(try(keras::is_keras_available(), silent = TRUE))
}
# Base-R helper to sandbox TF Autograph cache per-chunk (no 'withr' usage)
tf_autograph_scope <- function() {
ag_dir <- file.path(tempdir(), paste0("tf_ag_cache_", as.integer(runif(1, 1, 1e9))))
dir.create(ag_dir, showWarnings = FALSE, recursive = TRUE)
old_ag <- Sys.getenv("TF_AUTOGRAPH_CACHE_DIR", unset = NA_character_)
Sys.setenv(TF_AUTOGRAPH_CACHE_DIR = ag_dir)
# Ensure cleanup when the chunk completes
on.exit({
if (!is.na(old_ag)) Sys.setenv("TF_AUTOGRAPH_CACHE_DIR", old_ag) else Sys.unsetenv("TF_AUTOGRAPH_CACHE_DIR")
unlink(ag_dir, recursive = TRUE, force = TRUE)
unlink(Sys.glob(file.path(tempdir(), "__autograph_generated_file*.py")), force = TRUE)
unlink(file.path(tempdir(), "__pycache__"), recursive = TRUE, force = TRUE)
}, add = TRUE)
invisible(NULL)
}
## ----libs_data----------------------------------------------------------------
suppressPackageStartupMessages(library(PortfolioTesteR))
suppressPackageStartupMessages(library(data.table))
# Bundled sample data
data(sample_prices_weekly)
data(sample_prices_daily)
# Optional peek
head(sample_prices_weekly[, 1:6])
## ----baseline_pooled----------------------------------------------------------
# Prepare tabular features and labels
X <- ml_prepare_features(sample_prices_weekly, sample_prices_daily)
Y <- make_labels(sample_prices_weekly, horizon = 4L, type = "log")
# Linear/Ridge baseline
ridge <- ml_make_model("ridge")
set.seed(1)
res_baseline <- ml_backtest(
features_list = X, labels = Y,
fit_fn = ridge$fit,
predict_fn = ridge$predict,
schedule = list(is = 104L, oos = 4L, step = 4L), # 2y IS, 1m OOS, monthly step
transform = "zscore", # IS-only scaling (no leakage)
selection = list(top_k = 15L),
weighting = list(method = "rank"),
prices = sample_prices_weekly,
initial_capital = 1e5,
name = "Baseline Ridge (pooled)"
)
print(res_baseline$backtest)
plot(res_baseline$backtest, type = "performance")
## ----ensemble, eval=has_rf || has_xgb-----------------------------------------
# Start with ridge (always available)
models <- list(ml_make_model("ridge"))
# Add RF only if ranger is installed
if (has_rf) {
models <- c(models, list(ml_make_model("rf", num.trees = if (heavy) 400L else 200L, mtry = 3L)))
}
# Add XGBoost only if xgboost is installed
if (has_xgb) {
models <- c(models, list(
ml_make_model(
"xgboost",
params = list(objective = "reg:squarederror", max_depth = 4L, eta = 0.07),
nrounds = if (heavy) 300L else 150L
)
))
}
# Build the ensemble from whatever is available
ens <- do.call(ml_make_ensemble, models)
set.seed(2)
res_ens <- ml_backtest(
features_list = X, labels = Y,
fit_fn = ens$fit, predict_fn = ens$predict,
schedule = list(is = 104L, oos = 4L, step = 4L),
transform = "zscore",
selection = list(top_k = 15L),
weighting = list(method = "rank"),
prices = sample_prices_weekly,
initial_capital = 1e5,
name = "Ensemble (available learners)"
)
print(res_ens$backtest)
plot(res_ens$backtest, type = "performance")
## ----xgb_per_group, eval=has_xgb----------------------------------------------
symbols <- setdiff(names(sample_prices_weekly), "Date")
gmap <- demo_sector_map(symbols, n_groups = 4L) # demo mapping for the sample data
xgb_g <- ml_make_model(
"xgboost",
params = list(objective = "reg:squarederror", max_depth = 3L, eta = 0.05),
nrounds = if (heavy) 250L else 150L
)
set.seed(3)
res_xgb_sect <- ml_backtest(
features_list = X, labels = Y,
fit_fn = xgb_g$fit, predict_fn = xgb_g$predict,
schedule = list(is = 104L, oos = 4L, step = 4L),
group = "per_group", group_map = gmap, # sector-neutral training
transform = "zscore",
selection = list(top_k = 15L),
weighting = list(method = "softmax", temperature = 12),
prices = sample_prices_weekly,
initial_capital = 1e5,
name = "XGBoost (per-sector neutral)"
)
print(res_xgb_sect$backtest)
plot(res_xgb_sect$backtest, type = "performance")
## ----seq_gru_pooled, eval=run_seq---------------------------------------------
# tf_autograph_scope() # isolate TF Autograph cache to avoid detritus NOTE
#
# # Build a compact 'returns pyramid' of momentum-like sequences (lagged)
# r1 <- panel_lag(calc_momentum(sample_prices_weekly, 1L), 1L)
# r4 <- panel_lag(calc_momentum(sample_prices_weekly, 4L), 1L)
# r12 <- panel_lag(calc_momentum(sample_prices_weekly, 12L), 1L)
#
# features_seq <- list(r1 = r1, r4 = r4, r12 = r12)
# Y_seq <- make_labels(sample_prices_weekly, horizon = 4L, type = "log")
#
# units <- if (heavy) 32L else 8L
# epochs <- if (heavy) 12L else 4L
#
# seq_gru <- ml_make_seq_model(
# "gru",
# steps = 26L,
# units = units,
# epochs = epochs,
# seed = 42L
# )
#
# set.seed(42)
# res_seq_gru <- ml_backtest_seq(
# features_list = features_seq,
# labels = Y_seq,
# steps = 26L,
# horizon = 4L,
# fit_fn = seq_gru$fit,
# predict_fn = seq_gru$predict,
# schedule = list(is = 104L, oos = 4L, step = 4L),
# group = "pooled",
# normalize = "zscore",
# selection = list(top_k = 10L),
# weighting = list(method = "softmax", temperature = 12),
# prices = sample_prices_weekly,
# initial_capital = 1e5,
# name = "Seq-GRU (pooled): returns pyramid (26x3)"
# )
#
# print(res_seq_gru$backtest)
# plot(res_seq_gru$backtest, type = "performance")
## ----seq_gru_per_group, eval=run_seq------------------------------------------
# tf_autograph_scope() # isolate TF Autograph cache for this chunk too
#
# symbols <- setdiff(names(sample_prices_weekly), "Date")
# gmap <- demo_sector_map(symbols, n_groups = 4L)
#
# units_g <- if (heavy) 32L else 8L
# epochs_g <- if (heavy) 12L else 4L
#
# seq_gru_g <- ml_make_seq_model(
# "gru",
# steps = 26L,
# units = units_g,
# epochs = epochs_g,
# seed = 123L
# )
#
# set.seed(123)
# res_seq_gru_g <- ml_backtest_seq(
# features_list = features_seq,
# labels = Y_seq,
# steps = 26L,
# horizon = 4L,
# fit_fn = seq_gru_g$fit,
# predict_fn = seq_gru_g$predict,
# schedule = list(is = 104L, oos = 4L, step = 4L),
# group = "per_group", group_map = gmap,
# normalize = "zscore",
# selection = list(top_k = 10L),
# weighting = list(method = "softmax", temperature = 12),
# prices = sample_prices_weekly,
# initial_capital = 1e5,
# name = "Seq-GRU (per-sector neutral): returns pyramid (26x3)"
# )
#
# print(res_seq_gru_g$backtest)
# plot(res_seq_gru_g$backtest, type = "performance")
## ----light_tuning, eval=heavy-------------------------------------------------
# topk_vals <- c(8L, 10L, 12L, 15L)
# temp_vals <- c(8, 12, 16)
#
# score_tbl <- tune_ml_backtest(
# features_list = X, labels = Y, prices = sample_prices_weekly,
# fit_fn = ridge$fit, predict_fn = ridge$predict,
# schedule = list(is = 104L, oos = 4L, step = 4L),
# grid = list(
# top_k = topk_vals,
# temperature = temp_vals,
# method = "softmax",
# transform = "zscore"
# )
# )
#
# score_tbl[order(-sharpe)][1:10]
## ----tf_cleanup, include=FALSE------------------------------------------------
# Best-effort cleanup of TensorFlow Autograph cache files that can trigger
# "detritus in the temp directory" NOTE
try(unlink(Sys.glob(file.path(tempdir(), "__autograph_generated_file*.py")), force = TRUE), silent = TRUE)
try(unlink(file.path(tempdir(), "__pycache__"), recursive = TRUE, force = TRUE), silent = TRUE)
## ----session------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.