R/backtestLeaderboard.R
In portfolioBacktest: Automated Backtesting of Portfolios over Multiple Datasets

Documented in backtestLeaderboard

#' @title Leaderboard of portfolios from the backtest results
#' 
#' @description Leaderboard of portfolios according to the backtesting results 
#' and a ranking based on the combination of several performance criteria.
#' Since the different performance measures hava different ranges and distributions, 
#' each is first transformed according to its empirical distribution function (along 
#' the empirical distribution of the portfolios being ranked) to obtain percentile 
#' scores. After that transformation, each of the measures has an empirical uniform 
#' distribution in the interval \code{[0, 100]} and can be weighted to obtain the final ranking.
#' 
#' @inheritParams backtestSummary
#' @param weights List of weights for the different performance measures as obtained 
#'                in \code{\link{backtestSummary}()$performance} (i.e., 
#'                \code{"Sharpe ratio"}, \code{"max drawdown"}, \code{"annual return"}, \code{"annual volatility"}, 
#'                \code{"Sterling ratio"}, \code{"Omega ratio"}, \code{"ROT bps"}, as well as
#'                \code{"cpu time"} and \code{"failure rate"}.
#'                For example: \code{weights = list("Sharpe ratio" = 8, "max drawdown" = 4)}.
#'                
#' @return List with the following elements:
#' \item{\code{leaderboard_scores}}{Matrix with the individual scores for the portfolios (as chosen in \code{weights}) and the final score.}
#' \item{\code{leaderboard_performance}}{Matrix with all the performance measures for the portfolios.}
#' \item{\code{error_summary}}{Error messages generated by each portfolio on each dataset. 
#'                             Useful for debugging and give feedback to the portfolio managers of the 
#'                             different portfolios.}
#' 
#' @author Daniel P. Palomar and Rui Zhou
#' 
#' @examples
#' \donttest{ 
#' library(portfolioBacktest)
#' data(dataset10)  # load dataset
#' 
#' # define your own portfolio function
#' quintile_portfolio <- function(data, ...) {
#'   X <- diff(log(data$adjusted))[-1]  
#'   N <- ncol(X)
#'   ranking <- sort(colMeans(X), decreasing = TRUE, index.return = TRUE)$ix
#'   w <- rep(0, N)
#'   w[ranking[1:round(N/5)]] <- 1/round(N/5)
#'   return(w)
#' }
#' 
#' # do backtest
#' bt <- portfolioBacktest(quintile_portfolio, dataset10,
#'                         benchmark = c("1/N", "index"))
#' 
#' # see all performance measures available for the ranking
#' backtestSummary(bt)$performance
#' 
#' # show leaderboard
#' leaderboard <- backtestLeaderboard(bt, weights = list("Sharpe ratio"  = 6,
#'                                                       "max drawdown"  = 1,
#'                                                       "ROT (bps)"     = 1,
#'                                                       "cpu time"      = 1,
#'                                                       "failure rate"  = 1))
#' leaderboard$leaderboard_scores
#' }
#'
#' @importFrom stats median
#' @importFrom utils modifyList
#' @export
backtestLeaderboard <- function(bt = NA, weights = list(), summary_fun = median, show_benchmark = TRUE) {
  if (!is.list(weights)) stop("Argument \"weights\" must be a list.")
  if (any(unlist(weights) < 0)) stop("All weights must be non-negative.")
  if (all(unlist(weights) == 0)) stop("Cannot set all weights be zero.")

  tmp <- backtestSummary(bt, summary_fun = summary_fun, show_benchmark = show_benchmark)
  performance_summary <- t(tmp$performance_summary)
  error_message       <- tmp$error_message
  
  performance_names <- colnames(performance_summary)
  judge <- c(attr(bt[[1]][[1]]$performance, "desired_direction"), -1, -1)  # the last two are for "cpu time" and "failure rate"
  weights_defname <- colnames(performance_summary)
  weights_default <- as.list(rep(0, length(weights_defname)))
  names(weights_default) <- weights_defname
  weights_comb <- modifyList(weights_default, weights)
  if (length(weights_comb) != length(weights_default)) stop("Contain invalid elements in \"weights\".")
  
  weights_comb <- unlist(weights_comb)
  mask_criteria <- weights_comb > 0
  
  # sort the valid scores
  weights_rescaled <- weights_comb / sum(weights_comb)
  mask_valid <- performance_summary[, "failure rate"] != 1
  if (all(!mask_valid))
    stop("All portfolios gave errors.")
  scores <- apply(t(t(performance_summary[mask_valid, ]) * judge), 2, rank_percentile)
  final_score <- scores %*% weights_rescaled
  index_sorting <- sort(final_score, decreasing = TRUE, index = TRUE, na.last = TRUE)$ix
  
  # combine the valid and invalid scores
  leaderboard_valid <- cbind(scores[index_sorting, ], final_score[index_sorting])
  leaderboard_invalid <- matrix(NA, sum(!mask_valid), length(weights_comb) + 1)
  leaderboard <- rbind(leaderboard_valid, leaderboard_invalid)
  
  # add names
  index_vaild_sorted <- (1:length(mask_valid))[mask_valid][index_sorting]
  index_sorted <- c(index_vaild_sorted, (1:length(mask_valid))[-index_vaild_sorted])

  # also show original performance
  error_summary <- error_message[index_sorted]
  leaderboard_performance <- performance_summary[index_sorted, ]
  
  # add rownames and colnames
  rownames(leaderboard) <- rownames(leaderboard_performance) <- names(error_message)[index_sorted]
  colnames(leaderboard) <- paste(c(names(weights_default), 'final'), 'score')
  colnames(leaderboard_performance) <- names(weights_default)
  
  # return 
  return(list("leaderboard_scores" = leaderboard[, c(mask_criteria, TRUE)],
              "leaderboard_performance" = leaderboard_performance,
              "error_summary" = error_summary))
}

rank_percentile <- function(x) {
  N <- length(x)
  rank_pctl <- stats::ecdf(x)(x)
  rank_pctl <- (rank_pctl - 1/N)/(1 - 1/N)
  return (100*rank_pctl)
}
# sanity check: hist(rank_percentile(rnorm(10000)), breaks = 100)