knitr::opts_chunk$set(echo = F, warning = F, message = F) ## Set options options(scipen = 999, # prevent scientific notation on large numbers stringsAsFactors = F) # prevent R from turning everything to factors
# Define paths and load libraries rm(list=ls()) working_directory <- here::here() source(file.path(working_directory,"R/set_paths.R")) source(file.path(working_directory,"results/parameters.R")) library(pbo) library(xts) require(lattice) require(latticeExtra) require(grid) library(ggplot2) library(reshape2) library(dygraphs) # Time the script allbegin <- Sys.time() ###################################################### # convenience function to convert tidy tibble to xts tidy_tibble_to_xts <- function(tibble) { xts_tibble <- xts(tibble %>% select(-date), order.by=tibble$date) return(xts_tibble) }
###################################################### # read in returns # remove date column because it is not required by pbo package daily_returns <- tidy_tibble_to_xts( read_feather(file.path(results_directory, "daily_returns.feather"))) total_returns <- tidy_tibble_to_xts( read_feather(file.path(results_directory, "total_returns.feather")))
Number of Trials: r ncol(daily_returns) - 1
# Plot each trials total return on a separate axis df_melt = melt(total_returns, id.vars = 'date') #ggplot(df_melt, aes(x = date, y = value)) + # geom_line() + # facet_wrap(~ variable, scales = 'free_y', ncol = 1) # Plot all returns on a single plot #color <- rainbow(ncol(total_returns)) #ts.plot(total_returns, gpars= list(col=color)) #legend("topright", legend=colnames(total_returns), lty=1, col=color) dygraph(total_returns, main = "All Trials Normalised Total Returns", ylab = "Indexed Value") %>% dyLegend(width = 600, show = "follow") %>% dyOptions(maxNumberWidth = 20, stackedGraph = FALSE) %>% dyRangeSelector %>% dyRebase(value=100) %>% dyHighlight(highlightSeriesOpts = list(strokeWidth = 3))
We use the methods described in the (Probability of Backtest Overfitting) [https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2326253] to detect overfitting.
To compute the probability of backtest overfitting, we need to define a performance metric. We can use any performance metric we want. We use the same performance metric here as was used in the orignal paper - the Sharpe Ratio. This ratio is defined in the pbo
package vignette as -
sharpe <- function(x,rf=daily_risk_free_rate) { sr <- apply(x,2,function(col) { er = col - rf return(mean(er)/sd(er)) }) return(sr) }
my_pbo <- pbo(daily_returns,s=8,f=sharpe,threshold=0) pbo_summary <- summary(my_pbo) summary <- as.data.frame(pbo_summary) rownames(summary) <- c("PBO", "Slope", "Adjusted R-squared", "Probability of Loss") colnames(summary) <- "Value" summary$Range <- c("0->1", "-inf->inf", "0->1", "0->1") summary$Desirable <- c("1", "NA", "1", "0") knitr::kable(summary)
This is a histogram of the logits. A negative value indicate a best in-sample trial that performed below the median trial out of sample; a positive value indicates a best in-sample trial that performed better than the median trial out of sample.
histogram(my_pbo, type='density')
The performance degradation is a regression of out of sample performance to in-sample performance. A negative slope indicates that greater in-sample performance is related to weaker out of sample performance.
xyplot(my_pbo,plotType="degradation")
This plot checks whether the the algorithm selection procedure (in this case, ranking by Sharpe Ratio) is preferable to random selection of trials from the cohort.
xyplot(my_pbo,plotType="dominance",increment=0.001)
This is a sorted plot of how often each study is selected. We would expect a signal-finding algorithm to be selected more often.
dotplot(my_pbo)
This shows how well in sample selection does out of sample. We would like to see some sort of relationship between them.
xyplot(my_pbo,plotType="pairs")
xyplot(my_pbo,plotType="ranks",ylim=c(0,20))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.