ignore/make_predmat.R

####################################################################################
# FILE make_predmat.R
#
# TODO:  Validate and test make_predmat with complex features
#
####################################################################################
#
# FUNCTION make_predmat
#
#' Generate a predictor xts matrix for use by a ML model
#'
#' The assumption is multiple xts matrices have apriori been computed, each of
#' them holding the values of a given predictor.  For example, prior to calling
#' this function, a matrix (mom10) may have been calculated to hold the values
#' of 10 months momentum for all the symbols in the universe of interest.  Function
#' make_predmat will then extract all the predictors from the parent environment
#' as specified in argument prednames.
#'
#' In addition, the function can do a negative lag (lead) the predictor variable.
#'
#'
#'
#' @param feature_list  A list containing multiple xts matrices, each corresponding to a
#'                      feature. Each xts matrix may contain the feature for a universe of
#'                      assets, but only the column specified by `symbol` will be extracted.
#'                      One of the features in the list should be the target variable if this
#'                      is used to train a ML model via supervised learning.  The name of the
#'                      target variable is specified by argument `target` (see below).
#'
#' @param symbol        The name of the symbol in the form of a character vector of length 1.
#'                      This is used to extract the relevant column from each feature matrices
#'                      in the feature_list.
#'
#' @param Nlags         The number of periods (positive or negative) by which each of features
#'                      in the feature_list will be lagged.  Must be a numeric vector of length
#'                      (feature_list).  When the feature list is generated by function
#'                      make_features(), then the first feature is normally the target value y.
#'                      The default value is c(1, rep(0, length(feature_list) - 1)) to lag the
#'                      target y by one day.
#'
#' @param target        This is the name of the target value as found in the feature_list.
#'                      Default is "y".
#'
#' @return  Returns an xts matrix with the following columns:
#' \describe{
#'   \item{\preformatted{xts index:}}{
#'      The predictor dates are the xts matrix index, in the form of as.Date.  This is
#'      normally the close date at which the predictions are made
#'
#'      }
#'   \item{\preformatted{y:}}{
#'      The target variable y, appropriately named as y. This column is moved forward in
#'      time by Nlags periods to reflect that y is in the future.  NA padding at the tail
#'      is done as appropriate.
#'
#'      }
#'   \item{\preformatted{predictor columns (kept in order):}}{
#'      The extracted predictor columns, kept in the order specified in prednames.
#'
#'      }
#' }
#'
#' @export
#-------------------------------------------------------------------------------------
make_predmat <- function(feature_list, symbol,
                         Nlags = c(1, rep(0, length(feature_list) - 1)),
                         target = "y") {

  prednames <- names(feature_list)
  # Check for errors

  if(length(Nlags) != length(prednames))
    stop("make_predmat:  length(Nlags) must equal length(feature_list)")

  # Extract all predictors from the features list
  pred <- xts()
  for(i in prednames) {

    x    <- feature_list[[i]]

    # match consecutive characters in colnames of x to symbol
    cn       <- colnames(x)
    cn_which <- cn[str_detect(cn, symbol)]

    # extract it and merge to pred
    x   <- x[, cn_which, drop = FALSE]
    colnames(x) <- i
    pred <- merge(pred, x)
  }

  # Clean up the index to have date only, no GMT times
  pred <- xts(pred, order.by = as.Date(index(pred)))


  #------------------------------------------------------------
  # Build the matrix and lag each column by their
  # respective Nlags value.
  #------------------------------------------------------------
  #y           <- y[, symbol]
  #colnames(y) <- "y"
  #predmat  <- merge(y, pred[, prednames])

  predmat <- pred[, prednames]

  for(i in 1:length(Nlags)) {
    predmat[, i] <- xts::lag.xts(predmat[, i], k = Nlags[i])
  }


  return(predmat)
}

#------------------------------------------------------------------------------------
#  This is the old version that uses the environment to get its data, rather
#  than a list.
#' @export
#-------------------------------------------------------------------------------------
make_predmatold <- function(y, prednames, symbol, Nlags = c(1, rep(0, length(prednames))),
                            envir = as.environment(-1)) {

  # Check for errors
  #sprint("Length Nlags: %s,  Length prednames: %s", length(Nlags), length(prednames))
  if(length(Nlags) != length(prednames) + 1)
    stop("make_predmat:  length(Nlags) must equal length(prednames) + 1")

  # Extract all predictors from the parent environment (prednames)
  pred <- xts()
  for(i in prednames) {
    x   <- get(i, inherits = TRUE, envir = envir)

    # match consecutive characters in colnames of x to symbol
    cn       <- colnames(x)
    cn_which <- cn[str_detect(cn, symbol)]

    # extract it and merge to pred
    x   <- x[, cn_which, drop = FALSE]
    colnames(x) <- i
    pred <- merge(pred, x)
  }

  # Clean up the index to have date only, no GMT times
  pred <- xts(pred, order.by = as.Date(index(pred)))


  #------------------------------------------------------------
  # Build the matrix and lag each column by their
  # respective Nlags value.
  #------------------------------------------------------------
  y           <- y[, symbol]
  colnames(y) <- "y"
  predmat  <- merge(y, pred[, prednames])

  for(i in 1:length(Nlags)) {
    predmat[, i] <- xts::lag.xts(predmat[, i], k = Nlags[i])
  }
  #predmat  <- merge(xts::lag.xts(y, k = -Nlag), pred[, prednames])
  #  predmat <- predmat[complete.cases(predmat), ]

  return(predmat)
}
jeanmarcgp/xtsanalytics documentation built on May 19, 2019, 12:38 a.m.