knitr::opts_chunk$set(echo = F,
                      warning = F,
                      message = F)

## Set options
options(scipen = 999, # prevent scientific notation on large numbers
        stringsAsFactors = F) # prevent R from turning everything to factors

Summary of Trials

# Define paths and load libraries
rm(list=ls())
working_directory <- here::here()
source(file.path(working_directory,"R/set_paths.R"))
source(file.path(working_directory,"results/parameters.R"))

library(pbo)
library(xts)
require(lattice)
require(latticeExtra)
require(grid)
library(ggplot2)
library(reshape2)
library(dygraphs)
# Time the script
allbegin <- Sys.time()

######################################################
# convenience function to convert tidy tibble to xts
tidy_tibble_to_xts <- function(tibble) {
  xts_tibble <- xts(tibble %>% select(-date), order.by=tibble$date)
  return(xts_tibble)
}
######################################################
# read in returns
# remove date column because it is not required by pbo package
daily_returns <- tidy_tibble_to_xts(
  read_feather(file.path(results_directory, "daily_returns.feather")))
total_returns <- tidy_tibble_to_xts(
  read_feather(file.path(results_directory, "total_returns.feather")))

Number of Trials: r ncol(daily_returns) - 1

Returns

# Plot each trials total return on a separate axis
df_melt = melt(total_returns, id.vars = 'date')
#ggplot(df_melt, aes(x = date, y = value)) + 
#  geom_line() + 
#  facet_wrap(~ variable, scales = 'free_y', ncol = 1)

# Plot all returns on a single plot
#color <- rainbow(ncol(total_returns))
#ts.plot(total_returns, gpars= list(col=color))
#legend("topright", legend=colnames(total_returns), lty=1, col=color)
dygraph(total_returns, 
                main = "All Trials Normalised Total Returns", 
        ylab = "Indexed Value") %>%
  dyLegend(width = 600, show = "follow") %>%
  dyOptions(maxNumberWidth = 20, stackedGraph = FALSE) %>%
  dyRangeSelector %>%
  dyRebase(value=100) %>%
  dyHighlight(highlightSeriesOpts = list(strokeWidth = 3))

Probability of Backtest Overfitting

We use the methods described in the (Probability of Backtest Overfitting) [https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2326253] to detect overfitting.

To compute the probability of backtest overfitting, we need to define a performance metric. We can use any performance metric we want. We use the same performance metric here as was used in the orignal paper - the Sharpe Ratio. This ratio is defined in the pbo package vignette as -

sharpe <- function(x,rf=daily_risk_free_rate) {
  sr <- apply(x,2,function(col) {
    er = col - rf
    return(mean(er)/sd(er))
  })
  return(sr)
}

Results

my_pbo <- pbo(daily_returns,s=8,f=sharpe,threshold=0)
pbo_summary <- summary(my_pbo)
summary <- as.data.frame(pbo_summary)
rownames(summary) <- c("PBO", "Slope", "Adjusted R-squared", "Probability of Loss")
colnames(summary) <- "Value"
summary$Range <- c("0->1", "-inf->inf", "0->1", "0->1")
summary$Desirable  <- c("1", "NA", "1", "0")
knitr::kable(summary)

Histogram

This is a histogram of the logits. A negative value indicate a best in-sample trial that performed below the median trial out of sample; a positive value indicates a best in-sample trial that performed better than the median trial out of sample.

histogram(my_pbo, type='density')

Degradation

The performance degradation is a regression of out of sample performance to in-sample performance. A negative slope indicates that greater in-sample performance is related to weaker out of sample performance.

xyplot(my_pbo,plotType="degradation")

Stochastic Dominance

This plot checks whether the the algorithm selection procedure (in this case, ranking by Sharpe Ratio) is preferable to random selection of trials from the cohort.

xyplot(my_pbo,plotType="dominance",increment=0.001)

Dotplot

This is a sorted plot of how often each study is selected. We would expect a signal-finding algorithm to be selected more often.

dotplot(my_pbo)

Pairs

This shows how well in sample selection does out of sample. We would like to see some sort of relationship between them.

xyplot(my_pbo,plotType="pairs")

Ranks

xyplot(my_pbo,plotType="ranks",ylim=c(0,20))


riazarbi/equity_analysis documentation built on June 8, 2019, 1:36 p.m.