####################################################################################
# FILE make_predmat.R
#
# TODO: Validate and test make_predmat with complex features
#
####################################################################################
#
# FUNCTION make_predmat
#
#' Generate a predictor xts matrix for use by a ML model
#'
#' The assumption is multiple xts matrices have apriori been computed, each of
#' them holding the values of a given predictor. For example, prior to calling
#' this function, a matrix (mom10) may have been calculated to hold the values
#' of 10 months momentum for all the symbols in the universe of interest. Function
#' make_predmat will then extract all the predictors from the parent environment
#' as specified in argument prednames.
#'
#' In addition, the function can do a negative lag (lead) the predictor variable.
#'
#'
#'
#' @param feature_list A list containing multiple xts matrices, each corresponding to a
#' feature. Each xts matrix may contain the feature for a universe of
#' assets, but only the column specified by `symbol` will be extracted.
#' One of the features in the list should be the target variable if this
#' is used to train a ML model via supervised learning. The name of the
#' target variable is specified by argument `target` (see below).
#'
#' @param symbol The name of the symbol in the form of a character vector of length 1.
#' This is used to extract the relevant column from each feature matrices
#' in the feature_list.
#'
#' @param Nlags The number of periods (positive or negative) by which each of features
#' in the feature_list will be lagged. Must be a numeric vector of length
#' (feature_list). When the feature list is generated by function
#' make_features(), then the first feature is normally the target value y.
#' The default value is c(1, rep(0, length(feature_list) - 1)) to lag the
#' target y by one day.
#'
#' @param target This is the name of the target value as found in the feature_list.
#' Default is "y".
#'
#' @return Returns an xts matrix with the following columns:
#' \describe{
#' \item{\preformatted{xts index:}}{
#' The predictor dates are the xts matrix index, in the form of as.Date. This is
#' normally the close date at which the predictions are made
#'
#' }
#' \item{\preformatted{y:}}{
#' The target variable y, appropriately named as y. This column is moved forward in
#' time by Nlags periods to reflect that y is in the future. NA padding at the tail
#' is done as appropriate.
#'
#' }
#' \item{\preformatted{predictor columns (kept in order):}}{
#' The extracted predictor columns, kept in the order specified in prednames.
#'
#' }
#' }
#'
#' @export
#-------------------------------------------------------------------------------------
make_predmat <- function(feature_list, symbol,
Nlags = c(1, rep(0, length(feature_list) - 1)),
target = "y") {
prednames <- names(feature_list)
# Check for errors
if(length(Nlags) != length(prednames))
stop("make_predmat: length(Nlags) must equal length(feature_list)")
# Extract all predictors from the features list
pred <- xts()
for(i in prednames) {
x <- feature_list[[i]]
# match consecutive characters in colnames of x to symbol
cn <- colnames(x)
cn_which <- cn[str_detect(cn, symbol)]
# extract it and merge to pred
x <- x[, cn_which, drop = FALSE]
colnames(x) <- i
pred <- merge(pred, x)
}
# Clean up the index to have date only, no GMT times
pred <- xts(pred, order.by = as.Date(index(pred)))
#------------------------------------------------------------
# Build the matrix and lag each column by their
# respective Nlags value.
#------------------------------------------------------------
#y <- y[, symbol]
#colnames(y) <- "y"
#predmat <- merge(y, pred[, prednames])
predmat <- pred[, prednames]
for(i in 1:length(Nlags)) {
predmat[, i] <- xts::lag.xts(predmat[, i], k = Nlags[i])
}
return(predmat)
}
#------------------------------------------------------------------------------------
# This is the old version that uses the environment to get its data, rather
# than a list.
#' @export
#-------------------------------------------------------------------------------------
make_predmatold <- function(y, prednames, symbol, Nlags = c(1, rep(0, length(prednames))),
envir = as.environment(-1)) {
# Check for errors
#sprint("Length Nlags: %s, Length prednames: %s", length(Nlags), length(prednames))
if(length(Nlags) != length(prednames) + 1)
stop("make_predmat: length(Nlags) must equal length(prednames) + 1")
# Extract all predictors from the parent environment (prednames)
pred <- xts()
for(i in prednames) {
x <- get(i, inherits = TRUE, envir = envir)
# match consecutive characters in colnames of x to symbol
cn <- colnames(x)
cn_which <- cn[str_detect(cn, symbol)]
# extract it and merge to pred
x <- x[, cn_which, drop = FALSE]
colnames(x) <- i
pred <- merge(pred, x)
}
# Clean up the index to have date only, no GMT times
pred <- xts(pred, order.by = as.Date(index(pred)))
#------------------------------------------------------------
# Build the matrix and lag each column by their
# respective Nlags value.
#------------------------------------------------------------
y <- y[, symbol]
colnames(y) <- "y"
predmat <- merge(y, pred[, prednames])
for(i in 1:length(Nlags)) {
predmat[, i] <- xts::lag.xts(predmat[, i], k = Nlags[i])
}
#predmat <- merge(xts::lag.xts(y, k = -Nlag), pred[, prednames])
# predmat <- predmat[complete.cases(predmat), ]
return(predmat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.