# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
#' Calculate a pointer to a moment function from the function name (string).
#'
#' @param \code{funame} A \emph{character} \emph{string} specifying the
#' function name.
#'
#' @return A pointer to a moment function.
#'
#' @details
#' The function \code{calc_momptr()} calculates a pointer to a moment
#' function from the function name (string).
#' The function pointer is used internally in the \code{C++} code, but it's
#' not exported to \code{R}.
#' A moment function takes three arguments:
#' \itemize{
#' \item A \emph{time series} or a \emph{matrix} of data,
#' \item A \emph{character string} specifying the type of the moment,
#' \item A number specifying the confidence level for calculating the
#' quantiles of returns.
#' The function name must be one of the following:
#' \itemize{
#' \item "calc_mean" for the estimator of the mean (location),
#' \item "calc_var" for the estimator of the dispersion (variance),
#' \item "calc_skew" for the estimator of the skewness,
#' \item "calc_kurtosis" for the estimator of the kurtosis.
#' }
#' (The default is the \code{funame = "calc_mean"}.)
#' }
#'
#' @export
NULL
#' Create a named list of model parameters that can be passed into regression
#' and machine learning functions.
#'
#' @param \code{method} A \emph{character string} specifying the type of
#' regression model (the default is \code{method = "least_squares"}).
#'
#' @param \code{intercept} A \emph{Boolean} specifying whether an intercept
#' term should be added to the predictor (the default is \code{intercept =
#' TRUE}).
#'
#' @param \code{singmin} A \emph{numeric} threshold level for discarding
#' small \emph{singular values} in order to regularize the inverse of the
#' predictor matrix (the default is \code{1e-5}).
#'
#' @param \code{dimax} An \emph{integer} equal to the number of \emph{singular
#' values} used for calculating the \emph{reduced inverse} of the
#' predictor matrix (the default is \code{dimax = 0} - standard matrix
#' inverse using all the \emph{singular values}).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#' returns (the default is \code{confl = 0.75}).
#'
#' @param \code{alpha} The shrinkage intensity of \code{returns} (with values
#' between \code{0} and \code{1} - the default is \code{0}).
#'
#' @return A named list of model parameters that can be passed into regression
#' and machine learning functions.
#'
#' @details
#' The function \code{param_reg()} creates a named list of model parameters
#' that can be passed into regression and machine learning functions. For
#' example into the functions \code{calc_reg()} and \code{roll_reg()}.
#'
#' The function \code{param_reg()} simplifies the creation of regression
#' parameter lists. The users can create a parameter list with the default
#' values, or they can specify custom parameter values.
#'
#' @examples
#' \dontrun{
#' # Create a default list of regression parameters
#' controlv <- HighFreq::param_reg()
#' unlist(controlv)
#' # Create a custom list of regression parameters
#' controlv <- HighFreq::param_reg(intercept=FALSE, method="regular", dimax=4)
#' unlist(controlv)
#' }
#'
#' @export
param_reg <- function(regmod = "least_squares", intercept = TRUE, singmin = 1e-5, dimax = 0L, residscale = "none", confl = 0.1, alpha = 0.0) {
.Call(`_HighFreq_param_reg`, regmod, intercept, singmin, dimax, residscale, confl, alpha)
}
#' Create a named list of model parameters that can be passed into portfolio
#' optimization functions.
#'
#' @param \code{method} A \emph{character string} specifying the method for
#' calculating the portfolio weights (the default is \code{method =
#' "sharpem"}).
#'
#' @param \code{singmin} A \emph{numeric} threshold level for discarding
#' small \emph{singular values} in order to regularize the inverse of the
#' \code{covariance matrix} of \code{returns} (the default is \code{1e-5}).
#'
#' @param \code{dimax} An \emph{integer} equal to the number of \emph{singular
#' values} used for calculating the \emph{reduced inverse} of the
#' \code{covariance matrix} of \code{returns} matrix (the default is
#' \code{dimax = 0} - standard matrix inverse using all the \emph{singular
#' values}).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#' returns (the default is \code{confl = 0.75}).
#'
#' @param \code{alpha} The shrinkage intensity of \code{returns} (with values
#' between \code{0} and \code{1} - the default is \code{0}).
#'
#' @param \code{rankw} A \emph{Boolean} specifying whether the weights should
#' be ranked (the default is \code{rankw = FALSE}).
#'
#' @param \code{centerw} A \emph{Boolean} specifying whether the weights should
#' be centered (the default is \code{centerw = FALSE}).
#'
#' @param \code{scalew} A \emph{character string} specifying the method for
#' scaling the weights (the default is \code{scalew = "voltarget"}).
#'
#' @param \code{voltarget} A \emph{numeric} volatility target for scaling the
#' weights (the default is \code{0.01})
#'
#'
#' @return A named list of model parameters that can be passed into portfolio
#' optimization functions.
#'
#' @details
#' The function \code{param_portf()} creates a named list of model parameters
#' that can be passed into portfolio optimization functions. For example
#' into the functions \code{calc_weights()} and \code{back_test()}.
#' See the function \code{calc_weights()} for more details.
#'
#' The function \code{param_portf()} simplifies the creation of portfolio
#' optimization parameter lists. The users can create a parameter list with
#' the default values, or they can specify custom parameter values.
#'
#' @examples
#' \dontrun{
#' # Create a default list of portfolio optimization parameters
#' controlv <- HighFreq::param_portf()
#' unlist(controlv)
#' # Create a custom list of portfolio optimization parameters
#' controlv <- HighFreq::param_portf(method="regular", dimax=4)
#' unlist(controlv)
#' }
#'
#' @export
param_portf <- function(method = "sharpem", singmin = 1e-5, dimax = 0L, confl = 0.1, alpha = 0.0, rankw = FALSE, centerw = FALSE, scalew = "voltarget", voltarget = 0.01) {
.Call(`_HighFreq_param_portf`, method, singmin, dimax, confl, alpha, rankw, centerw, scalew, voltarget)
}
#' Apply a lag to a single-column \emph{time series} or a \emph{vector}
#' using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A single-column \emph{time series} or a
#' \emph{vector}.
#'
#' @param \code{lagg} An \emph{integer} equal to the number of periods to lag.
#' (The default is \code{lagg = 1}.)
#'
#' @param \code{pad_zeros} \emph{Boolean} argument: Should the output be padded
#' with zeros? (The default is \code{pad_zeros = TRUE}.)
#'
#' @return A column \emph{vector} with the same number of elements as the input
#' time series.
#'
#' @details
#' The function \code{lag_vec()} applies a lag to the input \emph{time
#' series} \code{tseries} by shifting its elements by the number equal to the
#' argument \code{lagg}. For positive \code{lagg} values, the elements are
#' shifted forward in time (down), and for negative \code{lagg} values they
#' are shifted backward (up).
#'
#' The output \emph{vector} is padded with either zeros (the default), or
#' with data from \code{tseries}, so that it has the same number of element
#' as \code{tseries}.
#' If the \code{lagg} is positive, then the first element is copied and added
#' upfront.
#' If the \code{lagg} is negative, then the last element is copied and added
#' to the end.
#'
#' As a rule, if \code{tseries} contains returns data, then the output
#' \emph{matrix} should be padded with zeros, to avoid data snooping.
#' If \code{tseries} contains prices, then the output \emph{matrix} should
#' be padded with the prices.
#'
#' @examples
#' \dontrun{
#' # Create a vector of random returns
#' retp <- rnorm(1e6)
#' # Compare lag_vec() with rutils::lagit()
#' all.equal(drop(HighFreq::lag_vec(retp)),
#' rutils::lagit(retp))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::lag_vec(retp),
#' Rcode=rutils::lagit(retp),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
lag_vec <- function(tseries, lagg = 1L, pad_zeros = TRUE) {
.Call(`_HighFreq_lag_vec`, tseries, lagg, pad_zeros)
}
#' Apply a lag to the rows of a \emph{time series} or a \emph{matrix} using
#' \code{RcppArmadillo}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#'
#' @param \code{lagg} An \emph{integer} equal to the number of periods to lag
#' (the default is \code{lagg = 1}).
#'
#' @param \code{pad_zeros} \emph{Boolean} argument: Should the output be padded
#' with zeros? (The default is \code{pad_zeros = TRUE}.)
#'
#' @return A \emph{matrix} with the same dimensions as the input argument
#' \code{tseries}.
#'
#' @details
#' The function \code{lagit()} applies a lag to the input \emph{matrix} by
#' shifting its rows by the number equal to the argument \code{lagg}. For
#' positive \code{lagg} values, the rows are shifted \emph{forward} (down),
#' and for negative \code{lagg} values they are shifted \emph{backward} (up).
#'
#' The output \emph{matrix} is padded with either zeros (the default), or
#' with rows of data from \code{tseries}, so that it has the same dimensions
#' as \code{tseries}.
#' If the \code{lagg} is positive, then the first row is copied and added
#' upfront.
#' If the \code{lagg} is negative, then the last row is copied and added
#' to the end.
#'
#' As a rule, if \code{tseries} contains returns data, then the output
#' \emph{matrix} should be padded with zeros, to avoid data snooping.
#' If \code{tseries} contains prices, then the output \emph{matrix} should
#' be padded with the prices.
#'
#' @examples
#' \dontrun{
#' # Create a matrix of random returns
#' retp <- matrix(rnorm(5e6), nc=5)
#' # Compare lagit() with rutils::lagit()
#' all.equal(HighFreq::lagit(retp), rutils::lagit(retp))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::lagit(retp),
#' Rcode=rutils::lagit(retp),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
lagit <- function(tseries, lagg = 1L, pad_zeros = TRUE) {
.Call(`_HighFreq_lagit`, tseries, lagg, pad_zeros)
}
#' Calculate the differences between the neighboring elements of a
#' single-column \emph{time series} or a \emph{vector}.
#'
#' @param \code{tseries} A single-column \emph{time series} or a \emph{vector}.
#'
#' @param \code{lagg} An \emph{integer} equal to the number of time periods to
#' lag when calculating the differences (the default is \code{lagg = 1}).
#'
#' @param \code{pad_zeros} \emph{Boolean} argument: Should the output
#' \emph{vector} be padded (extended) with zeros, in order to return a
#' \emph{vector} of the same length as the input? (the default is
#' \code{pad_zeros = TRUE})
#'
#' @return A column \emph{vector} containing the differences between the
#' elements of the input vector.
#'
#' @details
#' The function \code{diff_vec()} calculates the differences between the
#' input \emph{time series} or \emph{vector} and its lagged version.
#'
#' The argument \code{lagg} specifies the number of lags. For example, if
#' \code{lagg=3} then the differences will be taken between each element
#' minus the element three time periods before it (in the past). The default
#' is \code{lagg = 1}.
#'
#' The argument \code{pad_zeros} specifies whether the output \emph{vector}
#' should be padded (extended) with zeros at the front, in order to
#' return a \emph{vector} of the same length as the input. The default is
#' \code{pad_zeros = TRUE}. The padding operation can be time-consuming,
#' because it requires the copying the data in memory.
#'
#' The function \code{diff_vec()} is implemented in \code{RcppArmadillo}
#' \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Create a vector of random returns
#' retp <- rnorm(1e6)
#' # Compare diff_vec() with rutils::diffit()
#' all.equal(drop(HighFreq::diff_vec(retp, lagg=3, pad=TRUE)),
#' rutils::diffit(retp, lagg=3))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::diff_vec(retp, lagg=3, pad=TRUE),
#' Rcode=rutils::diffit(retp, lagg=3),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
diff_vec <- function(tseries, lagg = 1L, pad_zeros = TRUE) {
.Call(`_HighFreq_diff_vec`, tseries, lagg, pad_zeros)
}
#' Calculate the row differences of a \emph{time series} or a \emph{matrix}
#' using \emph{RcppArmadillo}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#'
#' @param \code{lagg} An \emph{integer} equal to the number of rows (time
#' periods) to lag when calculating the differences (the default is
#' \code{lagg = 1}).
#'
#' @param \code{pad_zeros} \emph{Boolean} argument: Should the output
#' \emph{matrix} be padded (extended) with zero values, in order to return a
#' \emph{matrix} with the same number of rows as the input? (the default is
#' \code{pad_zeros = TRUE})
#'
#' @return A \emph{matrix} containing the differences between the rows of the
#' input \emph{matrix} \code{tseries}.
#'
#' @details
#' The function \code{diffit()} calculates the differences between the rows
#' of the input \emph{matrix} \code{tseries} and its lagged version.
#'
#' The argument \code{lagg} specifies the number of lags applied to the rows
#' of the lagged version of \code{tseries}.
#' For positive \code{lagg} values, the lagged version of \code{tseries} has
#' its rows shifted \emph{forward} (down) by the number equal to \code{lagg}
#' rows. For negative \code{lagg} values, the lagged version of
#' \code{tseries} has its rows shifted \emph{backward} (up) by the number
#' equal to \code{-lagg} rows.
#' For example, if \code{lagg=3} then the lagged version will have its rows
#' shifted down by \code{3} rows, and the differences will be taken between
#' each row minus the row three time periods before it (in the past). The
#' default is \code{lagg = 1}.
#'
#' The argument \code{pad_zeros} specifies whether the output \emph{matrix}
#' should be padded (extended) with zero values in order to return a
#' \emph{matrix} with the same number of rows as the input \code{tseries}.
#' The default is \code{pad_zeros = TRUE}. If \code{pad_zeros = FALSE} then
#' the return \emph{matrix} has a smaller number of rows than the input
#' \code{tseries}. The padding operation can be time-consuming, because it
#' requires the copying the data in memory.
#'
#' The function \code{diffit()} is implemented in \code{RcppArmadillo}
#' \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Create a matrix of random data
#' datav <- matrix(sample(15), nc=3)
#' # Calculate differences with lagged rows
#' HighFreq::diffit(datav, lagg=2)
#' # Calculate differences with advanced rows
#' HighFreq::diffit(datav, lagg=-2)
#' # Compare HighFreq::diffit() with rutils::diffit()
#' all.equal(HighFreq::diffit(datav, lagg=2),
#' rutils::diffit(datav, lagg=2),
#' check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::diffit(datav, lagg=2),
#' Rcode=rutils::diffit(datav, lagg=2),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
diffit <- function(tseries, lagg = 1L, pad_zeros = TRUE) {
.Call(`_HighFreq_diffit`, tseries, lagg, pad_zeros)
}
#' Calculate a vector of end points that divides an integer time sequence of
#' time periods into equal time intervals.
#'
#' @param \code{length} An \emph{integer} equal to the length of the time
#' sequence to be divided into equal intervals.
#'
#' @param \code{step} The number of time periods in each interval between
#' neighboring end points (the default is \code{step = 1}).
#'
#' @param \code{stub} An \emph{integer} equal to the first non-zero end point
#' (the default is \code{stub = 0}).
#'
#' @param \code{stubs} A \emph{Boolean} specifying whether to include stub
#' intervals (the default is \code{stubs = TRUE}).
#'
#' @return A vector of equally spaced \emph{integers} representing the end
#' points.
#'
#' @details
#' The end points are a vector of integers which divide the sequence of time
#' periods of length equal to \code{length} into equally spaced time
#' intervals.
#' The number of time periods between neighboring end points is equal to the
#' argument \code{step}.
#' If a whole number of intervals doesn't fit over the whole sequence, then
#' \code{calc_endpoints()} adds a stub interval at the end.
#' A stub interval is one where the number of periods between neighboring end
#' points is less than the argument \code{step}.
#'
#' If \code{stubs = TRUE} (the default) then the first end point is
#' equal to \code{0} (since indexing in \code{C++} code starts at \code{0}).
#' The first non-zero end point is equal to \code{step} or \code{stub} (if
#' it's not zero).
#' If \code{stub = 0} (the default) then the first end point is equal to
#' \code{0} (even if \code{stubs = FALSE}).
#' If \code{stubs = TRUE} (the default) then the last end point is always
#' equal to \code{length-1}.
#' The argument \code{stub} should be less than the \code{step}: \code{stub <
#' step}.
#'
#' If \code{step = 1} and \code{stub = 0} (the default), then the vector of
#' end points is simply equal to:
#' \deqn{
#' \{ 0, 1, 2, ..., length - 1 \}
#' }
#'
#' If \code{stub = 0} (the default) and \code{stubs = TRUE} (the default)
#' then the vector of end points is equal to:
#' \deqn{
#' \{ 0, step, 2*step, ..., length - 1 \}
#' }
#'
#' If \code{stub = 0} (the default) and \code{stubs = FALSE} then the vector
#' of end points is equal to:
#' \deqn{
#' \{ 0, step, 2*step, ..., n*step \}
#' }
#'
#' If \code{stub > 0} and \code{stubs = TRUE} (the default), then the vector
#' of end points is equal to:
#' \deqn{
#' \{ 0, stub, stub + step, ..., length - 1 \}
#' }
#'
#' For example, the end points for \code{length = 20}, divided into intervals
#' of \code{step = 5} are equal to: \code{0, 5, 10, 15, 19}.
#'
#' If \code{stub = 1} then the first non-zero end point is equal to \code{1}
#' and the end points are equal to: \code{0, 1, 6, 11, 16, 19}.
#' The stub interval at the beginning is equal to \code{2} (including
#' \code{0} and \code{1}).
#' The stub interval at the end is equal to \code{3 = 19 - 16}.
#'
#' The end points for \code{length = 21} divided into intervals of length
#' \code{step = 5}, with \code{stub = 0}, are equal to: \code{0, 5, 10, 15,
#' 20}.
#' The beginning interval is equal to \code{5}.
#' The end interval is equal to \code{5 = 20 - 15}.
#'
#' If \code{stub = 1} then the first non-zero end point is equal to \code{1}
#' and the end points are equal to: \code{0, 1, 6, 11, 16, 20}.
#' The beginning stub interval is equal to \code{2}.
#' The end stub interval is equal to \code{4 = 20 - 16}.
#'
#' The function \code{calc_endpoints()} is similar to the function
#' \code{rutils::calc_endpoints()} from package
#' \href{https://github.com/algoquant/rutils}{rutils}.
#'
#' But the end points are shifted by \code{-1} compared to \code{R} code
#' because indexing starts at \code{0} in \code{C++} code, while it starts at
#' \code{1} in \code{R} code. So if \code{calc_endpoints()} is used in
#' \code{R} code then \code{1} should be added to it.
#'
#' @examples
#' # Calculate the end points without a stub interval
#' HighFreq::calc_endpoints(length=20, step=5)
#' # Calculate the end points with a final stub interval
#' HighFreq::calc_endpoints(length=23, step=5)
#' # Calculate the end points with initial and final stub intervals
#' HighFreq::calc_endpoints(length=20, step=5, stub=2)
#'
#' @export
calc_endpoints <- function(length, step = 1L, stub = 0L, stubs = TRUE) {
.Call(`_HighFreq_calc_endpoints`, length, step, stub, stubs)
}
#' Calculate a vector of start points by lagging (shifting) a vector of end
#' points.
#'
#' @param \code{endd} An \emph{integer} vector of end points.
#'
#' @param \code{lookb} The length of the look-back interval, equal to the
#' lag (shift) applied to the end points.
#'
#' @return An \emph{integer} vector with the same number of elements as the
#' vector \code{endd}.
#'
#' @details
#' The start points are equal to the values of the vector \code{endd} lagged
#' (shifted) by an amount equal to \code{lookb}. In addition, an extra
#' value of \code{1} is added to them, to avoid data overlaps. The lag
#' operation requires appending a beginning warmup interval containing zeros,
#' so that the vector of start points has the same length as the \code{endd}.
#'
#' For example, consider the end points for a vector of length \code{25}
#' divided into equal intervals of length \code{5}: \code{4, 9, 14, 19, 24}.
#' (In \code{C++} the vector indexing starts at \code{0} not \code{1}, so
#' it's shifted by \code{-1}.)
#' Then the start points for \code{lookb = 2} are equal to: \code{0, 0,
#' 5, 10, 15}. The differences between the end points minus the
#' corresponding start points are equal to \code{9}, except for the warmup
#' interval.
#'
#' @examples
#' # Calculate end points
#' endd <- HighFreq::calc_endpoints(length=55, step=5)
#' # Calculate start points corresponding to the end points
#' startp <- HighFreq::calc_startpoints(endd, lookb=5)
#'
#' @export
calc_startpoints <- function(endd, lookb) {
.Call(`_HighFreq_calc_startpoints`, endd, lookb)
}
#' Count the number of consecutive \code{TRUE} elements in a Boolean vector,
#' and reset the count to zero after every \code{FALSE} element.
#'
#' @param \code{tseries} A \emph{Boolean vector} of data.
#'
#' @return An \emph{integer vector} of the same length as the argument
#' \code{tseries}.
#'
#' @details
#' The function \code{roll_count()} calculates the number of consecutive
#' \code{TRUE} elements in a Boolean vector, and it resets the count to zero
#' after every \code{FALSE} element.
#'
#' For example, the Boolean vector {\code{FALSE}, \code{TRUE}, \code{TRUE},
#' \code{FALSE}, \code{FALSE}, \code{TRUE}, \code{TRUE}, \code{TRUE},
#' \code{TRUE}, \code{TRUE}, \code{FALSE}}, is translated into {\code{0},
#' \code{1}, \code{2}, \code{0}, \code{0}, \code{1}, \code{2}, \code{3},
#' \code{4}, \code{5}, \code{0}}.
#'
#' @examples
#' \dontrun{
#' # Calculate the number of consecutive TRUE elements
#' drop(HighFreq::roll_count(c(FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE)))
#' }
#' @export
roll_count <- function(tseries) {
.Call(`_HighFreq_roll_count`, tseries)
}
#' Calculate the run length encoding of a single-column \emph{time series},
#' \emph{matrix}, or a \emph{vector}.
#'
#' @param \code{tseries} A single-column \emph{time series}, \emph{matrix}, or
#' a \emph{vector}.
#'
#' @return A \emph{list} with two \emph{vectors}: a \emph{vector} of encoded
#' data and an \emph{integer vector} of data counts (repeats).
#'
#' @details
#' The function \code{encode_it()} calculates the run length encoding of a
#' single-column \emph{time series}, \emph{matrix}, or a \emph{vector}.
#'
#' The run length encoding of a \emph{vector} consists of two \emph{vectors}:
#' a \emph{vector} of encoded data (consecutive data values) and of an
#' \emph{integer vector} of the data counts (the number of times the same
#' value repeats in succession).
#'
#' Run length encoding (RLE) is a data compression algorithm which encodes
#' the data in two \emph{vectors}: the consecutive data values and their
#' counts. If a data value occurs several times in succession then it is
#' recorded only once and its corresponding count is equal to the number of
#' times it occurs. Run-length encoding is different from a contingency
#' table.
#'
#' @examples
#' \dontrun{
#' # Create a vector of data
#' datav <- sample(5, 31, replace=TRUE)
#' # Calculate the run length encoding of datav
#' HighFreq::encode_it(datav)
#' }
#'
#' @export
encode_it <- function(tseries) {
.Call(`_HighFreq_encode_it`, tseries)
}
#' Calculate the \emph{vector} of data from its run length encoding.
#'
#' @param \code{encodel} A \emph{list} with two \emph{vectors}: a \emph{numeric
#' vector} of encoded data and an \emph{integer vector} of data counts
#' (repeats).
#'
#' @return A \code{numeric vector}.
#'
#' @details
#' The function \code{decode_it()} the \emph{vector} of data from its run
#' length encoding.
#'
#' The run length encoding of a \emph{vector} consists of two \emph{vectors}:
#' a \emph{numeric vector} of encoded data (consecutive data values) and of
#' an \emph{integer vector} of the data counts (the number of times the same
#' value repeats in succession).
#'
#' Run length encoding (RLE) is a data compression algorithm which encodes
#' the data in two \emph{vectors}: the consecutive data values and their
#' counts. If a data value occurs several times in succession then it is
#' recorded only once and its corresponding count is equal to the number of
#' times it occurs. Run-length encoding is different from a contingency
#' table.
#'
#' @examples
#' \dontrun{
#' # Create a vector of data
#' datav <- sample(5, 31, replace=TRUE)
#' # Calculate the run length encoding of datav
#' rle <- HighFreq::encode_it(datav)
#' # Decode the data from its run length encoding
#' decodev <- HighFreq::decode_it(rle)
#' all.equal(datav, decodev)
#' }
#'
#' @export
decode_it <- function(encodel) {
.Call(`_HighFreq_decode_it`, encodel)
}
#' Calculate the ranks of the elements of a single-column \emph{time series},
#' \emph{matrix}, or a \emph{vector} using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A single-column \emph{time series}, \emph{matrix}, or
#' a \emph{vector}.
#'
#' @return An \emph{integer vector} with the ranks of the elements of the
#' \code{tseries}.
#'
#' @details
#' The function \code{calc_ranks()} calculates the ranks of the elements of a
#' single-column \emph{time series}, \emph{matrix}, or a \emph{vector}.
#'
#' The permutation index is an integer vector which sorts a given vector into
#' ascending order.
#' The permutation index of the permutation index is the \emph{reverse}
#' permutation index, because it sorts the vector from ascending order back
#' into its original unsorted order.
#' The ranks of the elements are equal to the \emph{reverse} permutation
#' index. The function \code{calc_ranks()} calculates the \emph{reverse}
#' permutation index.
#'
#' The ranks produced by \code{calc_ranks()} start at zero, following the
#' \code{C++} convention.
#'
#' The \code{Armadillo} function \code{arma::sort_index()} calculates the
#' permutation index which sorts a given vector into an ascending order.
#' Applying the function \code{arma::sort_index()} twice:\cr
#' \code{arma::sort_index(arma::sort_index())},\cr
#' calculates the \emph{reverse} permutation index to sort the vector from
#' ascending order back into its original unsorted order.
#'
#' The function \code{calc_ranks()} calls the \code{Armadillo} function
#' \code{arma::sort_index()} twice to calculate the \emph{reverse}
#' permutation index, to sort the vector from ascending order back into its
#' original unsorted order.
#'
#' @examples
#' \dontrun{
#' # Create a vector of data
#' datav <- rnorm(1e3)
#' # Calculate the ranks of the elements using R code and RcppArmadillo
#' all.equal(rank(datav), drop(HighFreq::calc_ranks(datav))+1)
#' # Compare the speed of R code with RcppArmadillo
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcode=rank(datav),
#' Rcpp=calc_ranks(datav),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
calc_ranks <- function(tseries) {
.Call(`_HighFreq_calc_ranks`, tseries)
}
#' Calculate the ranks of the elements of a single-column \emph{time series},
#' \emph{matrix}, or a \emph{vector} using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A single-column \emph{time series}, \emph{matrix}, or
#' a \emph{vector}.
#'
#' @return An \emph{integer vector} with the ranks of the elements of
#' \code{tseries}.
#'
#' @details
#' The function \code{calc_ranks_stl()} calculates the ranks of the elements
#' of a single-column \emph{time series}, \emph{matrix}, or a \emph{vector}.
#' The function \code{calc_ranks_stl()} is slightly faster than the function
#' \code{calc_ranks()}.
#'
#' The permutation index is an integer vector which sorts a given vector into
#' ascending order.
#' The permutation index of the permutation index is the \emph{reverse}
#' permutation index, because it sorts the vector from ascending order back
#' into its original unsorted order.
#' The ranks of the elements are equal to the \emph{reverse} permutation
#' index. The function \code{calc_ranks()} calculates the \emph{reverse}
#' permutation index.
#'
#' The ranks produced by \code{calc_ranks_stl()} start at zero, following the
#' \code{C++} convention.
#'
#' The \code{STL} \code{C++} function \code{std::sort()} sorts a vector into
#' ascending order. It can also be used to calculate the permutation index
#' which sorts the vector into an ascending order.
#'
#' The function \code{calc_ranks_stl()} calls the function \code{std::sort()}
#' twice:
#' First, it calculates the permutation index which sorts the vector
#' \code{tseries} into ascending order.
#' Second, it calculates the permutation index of the permutation index,
#' which are the ranks (the \emph{reverse} permutation index) of the vector
#' \code{tseries}.
#'
#' @examples
#' \dontrun{
#' # Create a vector of data
#' datav <- rnorm(1e3)
#' # Calculate the ranks of the elements using R code and RcppArmadillo
#' all.equal(rank(datav), drop(HighFreq::calc_ranks_stl(datav))+1)
#' # Compare the speed of R code with RcppArmadillo
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcode=rank(datav),
#' Rcpp=HighFreq::calc_ranks_stl(datav),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
calc_ranks_stl <- function(tseries) {
.Call(`_HighFreq_calc_ranks_stl`, tseries)
}
remove_dup <- function(stringv) {
.Call(`_HighFreq_remove_dup`, stringv)
}
#' Multiply element-wise the rows or columns of a \emph{matrix} times a
#' \emph{vector}.
#'
#' @param \code{vector} A \emph{numeric} \emph{vector}.
#'
#' @param \code{matrix} A \emph{numeric} \emph{matrix}.
#'
#' @param \code{byrow} A \emph{Boolean} argument: if \code{TRUE} then multiply
#' the rows of \code{matrix} by \code{vector}, otherwise multiply the columns
#' (the default is \code{byrow = TRUE}.)
#'
#' @return A \emph{matrix} equal to the product of the elements of
#' \code{matrix} times \code{vector}, with the same dimensions as the
#' argument \code{matrix}.
#'
#' @details
#' The function \code{mult_mat()} multiplies element-wise the rows or columns
#' of a \emph{matrix} times a \emph{vector}.
#'
#' If \code{byrow = TRUE} (the default), then function \code{mult_mat()}
#' multiplies the rows of the argument \code{matrix} times the argument
#' \code{vector}.
#' Otherwise it multiplies the columns of \code{matrix}.
#'
#' In \code{R}, \emph{matrix} multiplication is performed by columns.
#' Performing multiplication by rows is often required, for example when
#' multiplying asset returns by portfolio weights.
#' But performing multiplication by rows requires explicit loops in \code{R},
#' or it requires \emph{matrix} transpose. And both are slow.
#'
#' The function \code{mult_mat()} uses \code{RcppArmadillo} \code{C++}
#' code, so when multiplying large \emph{matrix} columns it's several times
#' faster than vectorized \code{R} code, and it's even much faster compared
#' to \code{R} when multiplying the \emph{matrix} rows.
#'
#' The function \code{mult_mat()} performs loops over the \emph{matrix} rows
#' and columns using the \code{Armadillo} operators \code{each_row()} and
#' \code{each_col()}, instead of performing explicit \code{for()} loops (both
#' methods are equally fast).
#'
#' @examples
#' \dontrun{
#' # Create vector and matrix data
#' matrixv <- matrix(round(runif(25e4), 2), nc=5e2)
#' vectorv <- round(runif(5e2), 2)
#'
#' # Multiply the matrix rows using R
#' matrixr <- t(vectorv*t(matrixv))
#' # Multiply the matrix rows using C++
#' matrixp <- HighFreq::mult_mat(vectorv, matrixv, byrow=TRUE)
#' all.equal(matrixr, matrixp)
#' # Compare the speed of Rcpp with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::mult_mat(vectorv, matrixv, byrow=TRUE),
#' Rcode=t(vectorv*t(matrixv)),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#'
#' # Multiply the matrix columns using R
#' matrixr <- vectorv*matrixv
#' # Multiply the matrix columns using C++
#' matrixp <- HighFreq::mult_mat(vectorv, matrixv, byrow=FALSE)
#' all.equal(matrixr, matrixp)
#' # Compare the speed of Rcpp with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::mult_mat(vectorv, matrixv, byrow=FALSE),
#' Rcode=vectorv*matrixv,
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
mult_mat <- function(vectorv, matrixv, byrow = TRUE) {
.Call(`_HighFreq_mult_mat`, vectorv, matrixv, byrow)
}
#' Multiply the rows or columns of a \emph{matrix} times a \emph{vector},
#' element-wise and in place, without copying the data in memory.
#'
#' @param \code{vectorv} A \emph{numeric} \emph{vector}.
#'
#' @param \code{matrixv} A \emph{numeric} \emph{matrix}.
#'
#' @param \code{byrow} A \emph{Boolean} argument: if \code{TRUE} then multiply
#' the rows of \code{matrixv} by \code{vectorv}, otherwise multiply the columns
#' (the default is \code{byrow = TRUE}.)
#'
#' @return Void (no return value - modifies the data in place).
#'
#' @details
#' The function \code{mult_mat_ref()} multiplies the rows or columns of a
#' \emph{matrix} times a \emph{vector}, element-wise and in place, without
#' copying the data in memory.
#'
#' It accepts a \emph{pointer} to the argument \code{matrixv}, and it
#' overwrites the old \code{matrix} values with the new values. It performs
#' the calculation in place, without copying the \emph{matrix} in memory,
#' which can significantly increase the computation speed for large matrices.
#'
#' If \code{byrow = TRUE} (the default), then function \code{mult_mat_ref()}
#' multiplies the rows of the argument \code{matrixv} times the argument
#' \code{vectorv}.
#' Otherwise it multiplies the columns of \code{matrixv}.
#'
#' In \code{R}, \emph{matrix} multiplication is performed by columns.
#' Performing multiplication by rows is often required, for example when
#' multiplying asset returns by portfolio weights.
#' But performing multiplication by rows requires explicit loops in \code{R},
#' or it requires \emph{matrix} transpose. And both are slow.
#'
#' The function \code{mult_mat_ref()} uses \code{RcppArmadillo} \code{C++}
#' code, so when multiplying large \emph{matrix} columns it's several times
#' faster than vectorized \code{R} code, and it's even much faster compared
#' to \code{R} when multiplying the \emph{matrix} rows.
#'
#' The function \code{mult_mat_ref()} performs loops over the \emph{matrix}
#' rows and columns using the \code{Armadillo} operators \code{each_row()}
#' and \code{each_col()}, instead of performing explicit \code{for()} loops
#' (both methods are equally fast).
#'
#' @examples
#' \dontrun{
#' # Create vector and matrix data
#' matrixv <- matrix(round(runif(25e4), 2), nc=5e2)
#' vectorv <- round(runif(5e2), 2)
#'
#' # Multiply the matrix rows using R
#' matrixr <- t(vectorv*t(matrixv))
#' # Multiply the matrix rows using C++
#' HighFreq::mult_mat_ref(vectorv, matrixv, byrow=TRUE)
#' all.equal(matrixr, matrixv)
#' # Compare the speed of Rcpp with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::mult_mat_ref(vectorv, matrixv, byrow=TRUE),
#' Rcode=t(vectorv*t(matrixv)),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#'
#' # Multiply the matrix columns using R
#' matrixr <- vectorv*matrixv
#' # Multiply the matrix columns using C++
#' HighFreq::mult_mat_ref(vectorv, matrixv, byrow=FALSE)
#' all.equal(matrixr, matrixv)
#' # Compare the speed of Rcpp with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::mult_mat_ref(vectorv, matrixv, byrow=FALSE),
#' Rcode=vectorv*matrixv,
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
mult_mat_ref <- function(vectorv, matrixv, byrow = TRUE) {
invisible(.Call(`_HighFreq_mult_mat_ref`, vectorv, matrixv, byrow))
}
#' Calculate the eigen decomposition of a square, symmetric matrix using
#' \code{RcppArmadillo}.
#'
#' @param \code{matrixv} A square, symmetric matrix.
#'
#' @param \code{eigenval} A \emph{vector} of eigen values.
#'
#' @param \code{eigenvec} A \emph{matrix} of eigen vectors.
#'
#' @return Void (no return value - passes the eigen values and eigen vectors by
#' reference).
#'
#' @details
#' The function \code{calc_eigen()} calculates the eigen decomposition of a
#' square, symmetric matrix using \code{RcppArmadillo}. It calls the
#' \code{Armadillo} function \code{arma::eig_sym()} to calculate the eigen
#' decomposition.
#'
#' For small matrices, the function \code{calc_eigen()} is several times
#' faster than the \code{R} function \code{eigen()}, since
#' \code{calc_eigen()} has no overhead in \code{R} code. But for large
#' matrices, they are about the same, since both call \code{C++} code.
#'
#' @examples
#' \dontrun{
#' # Create random positive semi-definite matrix
#' matrixv <- matrix(runif(25), nc=5)
#' matrixv <- t(matrixv) %*% matrixv
#' # Calculate the eigen decomposition using RcppArmadillo
#' eigenval <- numeric(5) # Allocate eigen values
#' eigenvec <- matrix(numeric(25), nc=5) # Allocate eigen vectors
#' HighFreq::calc_eigen(matrixv, eigenval, eigenvec)
#' # Calculate the eigen decomposition using R
#' eigenr <- eigen(matrixv)
#' # Compare the eigen decompositions
#' all.equal(eigenr$values, drop(eigenval))
#' all.equal(abs(eigenr$vectors), abs(eigenvec))
#' # Compare the speed of Rcpp with R code
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_eigen(matrixv, eigenval, eigenvec),
#' Rcode=eigen(matrixv),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
calc_eigen <- function(matrixv, eigenval, eigenvec) {
invisible(.Call(`_HighFreq_calc_eigen`, matrixv, eigenval, eigenvec))
}
#' Calculate the partial eigen decomposition of a dense symmetric matrix using
#' \code{RcppArmadillo}.
#'
#' @param \code{matrixv} A square matrix.
#'
#' @param \code{neigen} An \emph{integer} equal to the number of eigenvalues
#' to be calculated.
#'
#' @return A list with two elements: a \emph{vector} of eigenvalues (named
#' "values"), and a \emph{matrix} of eigenvectors (named "vectors").
#'
#' @details
#' The function \code{calc_eigenp()} calculates the partial eigen
#' decomposition (the lowest order principal components, with the largest
#' eigenvalues) of a dense matrix using RcppArmadillo. It calls the internal
#' \code{Armadillo} eigen solver \code{SymEigsSolver} in the namespace
#' \code{arma::newarp} to calculate the partial eigen decomposition.
#'
#' The eigen solver \code{SymEigsSolver} uses the Implicitly Restarted
#' Lanczos Method (IRLM) which was adapted from the
#' \href{https://en.wikipedia.org/wiki/ARPACK}{ARPACK} library. The eigen
#' solver \code{SymEigsSolver} was implemented by
#' \href{https://github.com/yixuan/arpack-arma}{Yixuan Qiu}.
#'
#' The function \code{arma::eigs_sym()} also calculates the partial eigen
#' decomposition using the eigen solver \code{SymEigsSolver}, but it only
#' works for sparse matrices which are not standard R matrices.
#'
#' For matrices smaller than \code{100} rows, the function
#' \code{calc_eigenp()} is slower than the function \code{calc_eigen()} which
#' calculates the full eigen decomposition. But it's faster for very large
#' matrices.
#'
#' @examples
#' \dontrun{
#' # Create random positive semi-definite matrix
#' matrixv <- matrix(runif(100), nc=10)
#' matrixv <- t(matrixv) %*% matrixv
#' # Calculate the partial eigen decomposition
#' neigen <- 5
#' eigenp <- HighFreq::calc_eigenp(matrixv, neigen)
#' # Calculate the eigen decomposition using RcppArmadillo
#' eigenval <- numeric(10) # Allocate eigen values
#' eigenvec <- matrix(numeric(100), nc=10) # Allocate eigen vectors
#' HighFreq::calc_eigen(matrixv, eigenval, eigenvec)
#' # Compare the eigen decompositions
#' all.equal(eigenp$values[1:neigen], eigenval[1:neigen])
#' all.equal(abs(eigenp$vectors), abs(eigenvec[, 1:neigen]))
#' # Compare the speed of partial versus full decomposition
#' summary(microbenchmark(
#' partial=HighFreq::calc_eigenp(matrixv, neigen),
#' full=HighFreq::calc_eigen(matrixv, eigenval, eigenvec),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
calc_eigenp <- function(matrixv, neigen) {
.Call(`_HighFreq_calc_eigenp`, matrixv, neigen)
}
#' Calculate the \emph{reduced inverse} of a symmetric \emph{matrix} of
#' data using eigen decomposition.
#'
#' @param \code{matrixv} A symmetric \emph{matrix} of data.
#'
#' @param \code{dimax} An \emph{integer} equal to the number of \emph{eigen
#' values} used for calculating the \emph{reduced inverse} of the matrix
#' \code{matrixv} (the default is \code{dimax = 0} - standard matrix inverse
#' using all the \emph{eigen values}).
#'
#' @param \code{singmin} A \emph{numeric} threshold level for discarding
#' small \emph{eigen values} in order to regularize the inverse of the matrix
#' \code{matrixv} (the default is \code{0.0}).
#'
#' @return A \emph{matrix} equal to the \emph{reduced inverse} of the
#' matrix \code{matrixv}.
#'
#' @details
#' The function \code{calc_inv()} calculates the \emph{reduced inverse}
#' of the matrix \code{matrixv} using eigen decomposition.
#'
#' The function \code{calc_inv()} first performs eigen decomposition of the
#' matrix \code{matrixv}.
#' The eigen decomposition of a matrix \eqn{\strong{C}} is defined as the
#' factorization:
#' \deqn{
#' \strong{C} = \strong{O} \, \Sigma \, \strong{O}^T
#' } Where \eqn{\strong{O}} is the matrix of \emph{eigen vectors} and
#' \eqn{\Sigma} is a diagonal matrix of \emph{eigen values}.
#'
#' The inverse \eqn{\strong{C}^{-1}} of the matrix \eqn{\strong{C}} can be
#' calculated from the eigen decomposition as:
#' \deqn{
#' \strong{C}^{-1} = \strong{O} \, \Sigma^{-1} \, \strong{O}^T
#' }
#'
#' The \emph{reduced inverse} of the matrix \eqn{\strong{C}} is obtained
#' by removing \emph{eigen vectors} with very small \emph{eigen values}:
#' \deqn{
#' \strong{C}^{-1} = \strong{O}_{dimax} \, \Sigma^{-1}_{dimax} \, \strong{O}^T_{dimax}
#' }
#' Where \eqn{\strong{O}_{dimax}} is the matrix of \emph{eigen vectors}
#' that correspond to the largest \emph{eigen values} \eqn{\Sigma_{dimax}}.
#'
#' The function \code{calc_inv()} applies regularization to the matrix
#' inverse using the arguments \code{dimax} and \code{singmin}.
#'
#' The function \code{calc_inv()} applies regularization by discarding the
#' smallest \emph{eigen values} \eqn{\Sigma_i} that are less than the
#' threshold level \code{singmin} times the sum of all the \emph{eigen
#' values}: \deqn{\Sigma_i < eigen\_thresh \cdot (\sum{\Sigma_i})}
#'
#' It also discards additional \emph{eigen vectors} so that only the highest
#' order \emph{eigen vectors} remain, up to order \code{dimax}.
#' It calculates the \emph{reduced inverse} from the eigen decomposition
#' using only the largest \emph{eigen values} up to \code{dimax}. For
#' example, if
#' \code{dimax = 3} then it only uses the \code{3} highest order \emph{eigen
#' vectors}, with the largest \emph{eigen values}. This has the effect of
#' dimension reduction.
#'
#' If the matrix \code{matrixv} has a large number of small \emph{eigen
#' values}, then the number of remaining \emph{eigen values} may be less than
#' \code{dimax}.
#'
#' @examples
#' \dontrun{
#' # Calculate ETF returns
#' retp <- na.omit(rutils::etfenv$returns[, c("VTI", "TLT", "DBC")])
#' # Calculate covariance matrix
#' covmat <- cov(retp)
#' # Calculate matrix inverse using RcppArmadillo
#' invmat <- HighFreq::calc_inv(covmat)
#' # Calculate matrix inverse in R
#' invr <- solve(covmat)
#' all.equal(invmat, invr, check.attributes=FALSE)
#' # Calculate reduced inverse using RcppArmadillo
#' invmat <- HighFreq::calc_inv(covmat, dimax=3)
#' # Calculate reduced inverse using eigen decomposition in R
#' eigend <- eigen(covmat)
#' dimax <- 1:3
#' invr <- eigend$vectors[, dimax] %*% (t(eigend$vectors[, dimax])/eigend$values[dimax])
#' # Compare RcppArmadillo with R
#' all.equal(invmat, invr)
#' }
#'
#' @examples
calc_inv <- function(matrixv, dimax = 0L, singmin = 0.0) {
.Call(`_HighFreq_calc_inv`, matrixv, dimax, singmin)
}
#' Calculate the \emph{reduced inverse} of a \emph{matrix} of data using
#' Singular Value Decomposition (\emph{SVD}).
#'
#' @param \code{matrixv} A \emph{matrix} of data.
#'
#' @param \code{dimax} An \emph{integer} equal to the number of \emph{singular
#' values} used for calculating the \emph{reduced inverse} of the matrix
#' \code{matrixv} (the default is \code{dimax = 0} - standard matrix inverse
#' using all the \emph{singular values}).
#'
#' @param \code{singmin} A \emph{numeric} threshold level for discarding
#' small \emph{singular values} in order to regularize the inverse of the
#' matrix \code{matrixv} (the default is \code{0.0}).
#'
#' @return A \emph{matrix} equal to the \emph{reduced inverse} of the
#' matrix \code{matrixv}.
#'
#' @details
#' The function \code{calc_invsvd()} calculates the \emph{reduced
#' inverse} of the matrix \code{matrixv} using Singular Value Decomposition
#' (\emph{SVD}).
#'
#' The function \code{calc_invsvd()} first performs Singular Value
#' Decomposition (\emph{SVD}) of the matrix \code{matrixv}.
#' The \emph{SVD} of a matrix \eqn{\strong{C}} is defined as the
#' factorization:
#' \deqn{
#' \strong{C} = \strong{U} \, \Sigma \, \strong{V}^T
#' }
#' Where \eqn{\strong{U}} and \eqn{\strong{V}} are the left and right
#' \emph{singular matrices}, and \eqn{\Sigma} is a diagonal matrix of
#' \emph{singular values}.
#'
#' The inverse \eqn{\strong{C}^{-1}} of the matrix \eqn{\strong{C}} can be
#' calculated from the \emph{SVD} matrices as:
#' \deqn{
#' \strong{C}^{-1} = \strong{V} \, \Sigma^{-1} \, \strong{U}^T
#' }
#'
#' The \emph{reduced inverse} of the matrix \eqn{\strong{C}} is obtained
#' by removing \emph{singular vectors} with very small \emph{singular values}:
#' \deqn{
#' \strong{C}^{-1} = \strong{V}_n \, \Sigma_n^{-1} \, \strong{U}_n^T
#' }
#' Where \eqn{\strong{U}_n}, \eqn{\strong{V}_n} and \eqn{\Sigma_n} are the
#' \emph{SVD} matrices with the rows and columns corresponding to very small
#' \emph{singular values} removed.
#'
#' The function \code{calc_invsvd()} applies regularization to the matrix
#' inverse using the arguments \code{dimax} and \code{singmin}.
#'
#' The function \code{calc_invsvd()} applies regularization by discarding the
#' smallest \emph{singular values} \eqn{\sigma_i} that are less than the
#' threshold level \code{singmin} times the sum of all the
#' \emph{singular values}: \deqn{\sigma_i < eigen\_thresh \cdot
#' (\sum{\sigma_i})}
#'
#' It also discards additional \emph{singular vectors} so that only the
#' highest order \emph{singular vectors} remain, up to order \code{dimax}. It
#' calculates the \emph{reduced inverse} from the \emph{SVD} matrices
#' using only the largest \emph{singular values} up to order \code{dimax}.
#' For example, if \code{dimax = 3} then it only uses the \code{3} highest
#' order \emph{singular vectors}, with the largest \emph{singular values}.
#' This has the effect of dimension reduction.
#'
#' If the matrix \code{matrixv} has a large number of small \emph{singular
#' values}, then the number of remaining \emph{singular values} may be less
#' than \code{dimax}.
#'
#' @examples
#' \dontrun{
#' # Calculate ETF returns
#' retp <- na.omit(rutils::etfenv$returns[, c("VTI", "TLT", "DBC")])
#' # Calculate covariance matrix
#' covmat <- cov(retp)
#' # Calculate matrix inverse using RcppArmadillo
#' invmat <- HighFreq::calc_invsvd(covmat)
#' # Calculate matrix inverse in R
#' invr <- solve(covmat)
#' all.equal(invmat, invr, check.attributes=FALSE)
#' # Calculate reduced inverse using RcppArmadillo
#' invmat <- HighFreq::calc_invsvd(covmat, dimax=3)
#' # Calculate reduced inverse from SVD in R
#' svdec <- svd(covmat)
#' dimax <- 1:3
#' invr <- svdec$v[, dimax] %*% (t(svdec$u[, dimax])/svdec$d[dimax])
#' # Compare RcppArmadillo with R
#' all.equal(invmat, invr)
#' }
#'
#' @export
calc_invsvd <- function(matrixv, dimax = 0L, singmin = 0.0) {
.Call(`_HighFreq_calc_invsvd`, matrixv, dimax, singmin)
}
#' Calculate the approximate inverse of a square \emph{matrix} recursively
#' using the Schulz formula (without copying the data in memory).
#'
#' @param \code{matrixv} A \emph{matrix} of data to be inverted.
#'
#' @param \code{invmat} A \emph{matrix} of data equal to the starting point for
#' the recursion.
#'
#' @param \code{niter} An \emph{integer} equal to the number of recursion
#' iterations.
#'
#' @return No return value.
#'
#' @details
#' The function \code{calc_invrec()} calculates the approximate inverse
#' \eqn{\strong{A}^{-1}} of a square \emph{matrix} \eqn{\strong{A}}
#' recursively using the Schulz formula:
#' \deqn{
#' \strong{A}_{i+1}^{-1} \leftarrow 2 \strong{A}_i^{-1} - \strong{A}_i^{-1} \strong{A} \strong{A}_i^{-1}
#' }
#' The Schulz formula is repeated \code{niter} times.
#' The Schulz formula is useful for updating the inverse when the matrix
#' \eqn{\strong{A}} changes only slightly. For example, for updating the
#' inverse of the covariance matrix as it changes slowly over time.
#'
#' The function \code{calc_invrec()} accepts a \emph{pointer} to the argument
#' \code{invmat} (which is the initial value of the inverse matrix for the
#' recursion), and it overwrites the old inverse matrix values with the
#' updated inverse values.
#'
#' The function \code{calc_invrec()} performs the calculation in place,
#' without copying the matrix in memory, which can significantly increase the
#' computation speed for large matrices.
#'
#' The function \code{calc_invrec()} doesn't return a value.
#' The function \code{calc_invrec()} performs the calculations using
#' \code{C++} \code{Armadillo} code.
#'
#' @examples
#' \dontrun{
#' # Calculate a random matrix
#' matrixv <- matrix(rnorm(100), nc=10)
#' # Define the initial value of the inverse matrix
#' invmat <- solve(matrixv) + matrix(rnorm(100, sd=0.1), nc=10)
#' # Calculate the inverse in place using RcppArmadillo
#' HighFreq::calc_invrec(matrixv, invmat, 3)
#' # Multiply the matrix times its inverse
#' multmat <- matrixv %*% invmat
#' round(multmat, 4)
#' # Calculate the sum of the off-diagonal elements
#' sum(multmat[upper.tri(multmat)])
#' # Compare RcppArmadillo with R
#' all.equal(invmat, solve(matrixv))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' rcode=solve(matrixv),
#' cppcode=HighFreq::calc_invrec(matrixv, invmat, 3),
#' times=10))[, c(1, 4, 5)]
#' }
#'
#' @export
calc_invrec <- function(matrixv, invmat, niter = 1L) {
invisible(.Call(`_HighFreq_calc_invrec`, matrixv, invmat, niter))
}
#' Calculate the inverse of a square \emph{matrix} in place, without copying
#' the data in memory.
#'
#' @param \code{matrixv} A \emph{matrix} of data to be inverted. (The argument
#' is interpreted as a \emph{pointer} to a \emph{matrix}, and it is
#' overwritten with the inverse matrix.)
#'
#' @return No return value.
#'
#' @details
#' The function \code{calc_invref()} calculates the inverse of a square
#' \emph{matrix} in place, without copying the data in memory. It accepts a
#' \emph{pointer} to the argument \code{matrixv} (which is the \code{matrix}
#' to be inverted), and it overwrites the old matrix values with the inverse
#' matrix values. It performs the calculation in place, without copying the
#' data in memory, which can significantly increase the computation speed for
#' large matrices.
#'
#' The function \code{calc_invref()} doesn't return a value.
#' The function \code{calc_invref()} calls the \code{C++} \code{Armadillo}
#' function \code{arma::inv()} to calculate the matrix inverse.
#'
#' @examples
#' \dontrun{
#' # Calculate a random matrix
#' matrixv <- matrix(rnorm(100), nc=10)
#' # Copy matrixv to a matrix in a different memory location
#' invmat <- matrixv + 0
#' # Calculate the inverse in place using RcppArmadillo
#' HighFreq::calc_invref(invmat)
#' # Multiply the matrix times its inverse
#' multmat <- matrixv %*% invmat
#' round(multmat, 4)
#' # Calculate the sum of the off-diagonal elements
#' sum(multmat[upper.tri(multmat)])
#' # Compare RcppArmadillo with R
#' all.equal(invmat, solve(matrixv))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' rcode=solve(matrixv),
#' cppcode=calc_invref(matrixv),
#' times=10))[, c(1, 4, 5)]
#' }
#'
#' @export
calc_invref <- function(matrixv) {
invisible(.Call(`_HighFreq_calc_invref`, matrixv))
}
#' Standardize (center and scale) the columns of a \emph{time series} of data
#' in place, without copying the data in memory, using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A \emph{time series} or \emph{matrix} of data.
#'
#' @param \code{center} A \emph{Boolean} argument: if \code{TRUE} then center
#' the columns so that they have zero mean or median (the default is
#' \code{TRUE}).
#'
#' @param \code{scale} A \emph{Boolean} argument: if \code{TRUE} then scale the
#' columns so that they have unit standard deviation or MAD (the default is
#' \code{TRUE}).
#'
#' @param \code{use_median} A \emph{Boolean} argument: if \code{TRUE} then the
#' centrality (central tendency) is calculated as the \emph{median} and the
#' dispersion is calculated as the \emph{median absolute deviation}
#' (\emph{MAD}) (the default is \code{FALSE}).
#' If \code{use_median = FALSE} then the centrality is calculated as the
#' \emph{mean} and the dispersion is calculated as the \emph{standard
#' deviation}.
#'
#' @return Void (no return value - modifies the data in place).
#'
#' @details
#' The function \code{calc_scale()} standardizes (centers and scales) the
#' columns of a \emph{time series} of data in place, without copying the data
#' in memory, using \code{RcppArmadillo}.
#'
#' If the arguments \code{center} and \code{scale} are both \code{TRUE} and
#' \code{use_median} is \code{FALSE} (the defaults), then \code{calc_scale()}
#' performs the same calculation as the standard \code{R} function
#' \code{scale()}, and it calculates the centrality (central tendency) as the
#' \emph{mean} and the dispersion as the \emph{standard deviation}.
#'
#' If the arguments \code{center} and \code{scale} are both \code{TRUE} (the
#' defaults), then \code{calc_scale()} standardizes the data.
#' If the argument \code{center} is \code{FALSE} then \code{calc_scale()}
#' only scales the data (divides it by the standard deviations).
#' If the argument \code{scale} is \code{FALSE} then \code{calc_scale()}
#' only demeans the data (subtracts the means).
#'
#' If the argument \code{use_median} is \code{TRUE}, then it calculates the
#' centrality as the \emph{median} and the dispersion as the \emph{median
#' absolute deviation} (\emph{MAD}).
#'
#' If the number of rows of \code{tseries} is less than \code{3} then it
#' does nothing and \code{tseries} is not scaled.
#'
#' The function \code{calc_scale()} accepts a \emph{pointer} to the argument
#' \code{tseries}, and it overwrites the old data with the standardized data.
#' It performs the calculation in place, without copying the data in memory,
#' which can significantly increase the computation speed for large time
#' series.
#'
#' The function \code{calc_scale()} uses \code{RcppArmadillo} \code{C++}
#' code, so on a typical time series it can be over \emph{10} times faster
#' than the function \code{scale()}.
#'
#' @examples
#' \dontrun{
#' # Calculate a time series of returns
#' retp <- zoo::coredata(na.omit(rutils::etfenv$returns[, c("IEF", "VTI")]))
#' # Demean the returns
#' demeaned <- apply(retp, 2, function(x) (x-mean(x)))
#' HighFreq::calc_scale(retp, scale=FALSE)
#' all.equal(demeaned, retp, check.attributes=FALSE)
#' # Calculate a time series of returns
#' retp <- zoo::coredata(na.omit(rutils::etfenv$returns[, c("IEF", "VTI")]))
#' # Standardize the returns
#' retss <- scale(retp)
#' HighFreq::calc_scale(retp)
#' all.equal(retss, retp, check.attributes=FALSE)
#' # Compare the speed of Rcpp with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcode=scale(retp),
#' Rcpp=HighFreq::calc_scale(retp),
#' times=100))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
calc_scale <- function(tseries, center = TRUE, scale = TRUE, use_median = FALSE) {
invisible(.Call(`_HighFreq_calc_scale`, tseries, center, scale, use_median))
}
#' Aggregate a time series of data into a single bar of \emph{OHLC} data.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} with multiple
#' columns of data.
#'
#' @return A \emph{matrix} containing a single row, with the \emph{open},
#' \emph{high}, \emph{low}, and \emph{close} values, and also the total
#' \emph{volume} (if provided as either the second or fifth column of
#' \code{tseries}).
#'
#' @details
#' The function \code{agg_ohlc()} aggregates a time series of data into a
#' single bar of \emph{OHLC} data. It can accept either a single column of
#' data or four columns of \emph{OHLC} data.
#' It can also accept an additional column containing the trading volume.
#'
#' The function \code{agg_ohlc()} calculates the \emph{open} value as equal to
#' the \emph{open} value of the first row of \code{tseries}.
#' The \emph{high} value as the maximum of the \emph{high} column of
#' \code{tseries}.
#' The \emph{low} value as the minimum of the \emph{low} column of
#' \code{tseries}.
#' The \emph{close} value as the \emph{close} of the last row of
#' \code{tseries}.
#' The \emph{volume} value as the sum of the \emph{volume} column of
#' \code{tseries}.
#'
#' For a single column of data, the \emph{open}, \emph{high}, \emph{low}, and
#' \emph{close} values are all the same.
#'
#' @examples
#' \dontrun{
#' # Define matrix of OHLC data
#' ohlc <- coredata(rutils::etfenv$VTI[, 1:5])
#' # Aggregate to single row matrix
#' ohlcagg <- HighFreq::agg_ohlc(ohlc)
#' # Compare with calculation in R
#' all.equal(drop(ohlcagg),
#' c(ohlc[1, 1], max(ohlc[, 2]), min(ohlc[, 3]), ohlc[NROW(ohlc), 4], sum(ohlc[, 5])),
#' check.attributes=FALSE)
#' }
#'
#' @export
agg_ohlc <- function(tseries) {
.Call(`_HighFreq_agg_ohlc`, tseries)
}
#' Aggregate a time series to an \emph{OHLC} time series with lower
#' periodicity.
#'
#' Given a time series of prices at a higher periodicity (say seconds), it
#' calculates the \emph{OHLC} prices at a lower periodicity (say minutes).
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} with multiple
#' columns of data.
#'
#' @param \emph{endd} An \emph{integer vector} of end points.
#'
#' @return A \emph{matrix} with \emph{OHLC} data, with the number of rows equal
#' to the number of \emph{endd} minus one.
#'
#' @details
#' The function \code{roll_ohlc()} performs a loop over the end points
#' \emph{endd}, along the rows of the data \code{tseries}. At each end point,
#' it selects the past rows of the data \code{tseries}, starting at the first
#' bar after the previous end point, and then calls the function
#' \code{agg_ohlc()} on the selected data \code{tseries} to calculate the
#' aggregations.
#'
#' The function \code{roll_ohlc()} can accept either a single column of data
#' or four columns of \emph{OHLC} data.
#' It can also accept an additional column containing the trading volume.
#'
#' The function \code{roll_ohlc()} performs a similar aggregation as the
#' function \code{to.period()} from package
#' \href{https://cran.r-project.org/web/packages/xts/index.html}{xts}.
#'
#' @examples
#' \dontrun{
#' # Define matrix of OHLC data
#' ohlc <- rutils::etfenv$VTI[, 1:5]
#' # Define end points at 25 day intervals
#' endd <- HighFreq::calc_endpoints(NROW(ohlc), step=25)
#' # Aggregate over endd:
#' ohlcagg <- HighFreq::roll_ohlc(tseries=ohlc, endd=endd)
#' # Compare with xts::to.period()
#' ohlcagg_xts <- .Call("toPeriod", ohlc, as.integer(endd+1), TRUE, NCOL(ohlc), FALSE, FALSE, colnames(ohlc), PACKAGE="xts")
#' all.equal(ohlcagg, coredata(ohlcagg_xts), check.attributes=FALSE)
#' }
#'
#' @export
roll_ohlc <- function(tseries, endd) {
.Call(`_HighFreq_roll_ohlc`, tseries, endd)
}
#' Calculate the rolling convolutions (weighted sums) of a \emph{time series}
#' with a single-column \emph{matrix} of weights.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{weightv} A single-column \emph{matrix} of weights.
#'
#' @return A \emph{matrix} with the same dimensions as the input argument
#' \code{tseries}.
#'
#' @details
#' The function \code{roll_conv()} calculates the convolutions of the
#' \emph{matrix} columns with a single-column \emph{matrix} of weights. It
#' performs a loop over the \emph{matrix} rows and multiplies the past
#' (higher) values by the weights. It calculates the rolling weighted sums
#' of the past data.
#'
#' The function \code{roll_conv()} uses the \code{Armadillo} function
#' \code{arma::conv2()}. It performs a similar calculation to the standard
#' \code{R} function \cr\code{filter(x=tseries, filter=weightv,
#' method="convolution", sides=1)}, but it's over \code{6} times faster, and
#' it doesn't produce any leading \code{NA} values.
#'
#' @examples
#' \dontrun{
#' # First example
#' # Calculate a time series of returns
#' retp <- na.omit(rutils::etfenv$returns[, c("IEF", "VTI")])
#' # Create simple weights equal to a 1 value plus zeros
#' weightv <- c(1, rep(0, 10))
#' # Calculate rolling weighted sums
#' retf <- HighFreq::roll_conv(retp, weightv)
#' # Compare with original
#' all.equal(coredata(retp), retf, check.attributes=FALSE)
#' # Second example
#' # Calculate exponentially decaying weights
#' weightv <- exp(-0.2*(1:11))
#' weightv <- weightv/sum(weightv)
#' # Calculate rolling weighted sums
#' retf <- HighFreq::roll_conv(retp, weightv)
#' # Calculate rolling weighted sums using filter()
#' retc <- filter(x=retp, filter=weightv, method="convolution", sides=1)
#' # Compare both methods
#' all.equal(retc[-(1:11), ], retf[-(1:11), ], check.attributes=FALSE)
#' }
#'
#' @export
roll_conv <- function(tseries, weightv) {
.Call(`_HighFreq_roll_conv`, tseries, weightv)
}
#' Calculate the rolling sums over a \emph{time series} or a \emph{matrix}
#' using \emph{Rcpp}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#'
#' @param \code{lookb} The length of the look-back interval, equal to the
#' number of data points included in calculating the rolling sum (the default
#' is \code{lookb = 1}).
#'
#' @param \code{weightv} A single-column \emph{matrix} of weights (the default
#' is \code{weightv = 0}).
#'
#' @return A \emph{matrix} with the same dimensions as the input argument
#' \code{tseries}.
#'
#' @details
#' The function \code{roll_sum()} calculates the rolling \emph{weighted} sums
#' over the columns of the data \code{tseries}.
#'
#' If the argument \code{weightv} is equal to zero (the default), then the
#' function \code{roll_sum()} calculates the simple rolling sums of the
#' \emph{time series} data \eqn{p_t} over the look-back interval \eqn{\Delta}:
#' \deqn{
#' \bar{p}_t = \sum_{j=(t-\Delta+1)}^{t} p_j
#' }
#'
#' If the \code{weightv} argument has the same number of rows as the argument
#' \code{tseries}, then the function \code{roll_sum()} calculates rolling
#' \emph{weighted} sums of the \emph{time series} data \eqn{p_t} in two steps.
#'
#' It first calculates the rolling sums of the products of the weights
#' \eqn{w_t} times the \emph{time series} data \eqn{p_t} over the look-back
#' interval \eqn{\Delta}:
#' \deqn{
#' \bar{w}_t = \sum_{j=(t-\Delta+1)}^{t} w_j
#' }
#' \deqn{
#' \bar{p}^w_t = \sum_{j=(t-\Delta+1)}^{t} w_j p_j
#' }
#'
#' It then calculates the rolling \emph{weighted} sums \eqn{\bar{p}_t} as the
#' ratio of the sum products of the weights and the data, divided by the sums
#' of the weights:
#' \deqn{
#' \bar{p}_t = \frac{\bar{p}^w_t}{\bar{w}_t}
#' }
#'
#' The function \code{roll_sum()} returns a \emph{matrix} with the same
#' dimensions as the input argument \code{tseries}.
#'
#' The function \code{roll_sum()} is written in \code{C++} \code{Armadillo}
#' code, so it's much faster than equivalent \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("VTI", "IEF")])
#' # Define parameters
#' lookb <- 11
#' # Calculate rolling sums and compare with rutils::roll_sum()
#' sumc <- HighFreq::roll_sum(retp, lookb)
#' sumr <- rutils::roll_sum(retp, lookb)
#' all.equal(sumc, coredata(sumr), check.attributes=FALSE)
#' # Calculate rolling sums using R code
#' sumr <- apply(zoo::coredata(retp), 2, cumsum)
#' sumlag <- rbind(matrix(numeric(2*lookb), nc=2), sumr[1:(NROW(sumr) - lookb), ])
#' sumr <- (sumr - sumlag)
#' all.equal(sumc, sumr, check.attributes=FALSE)
#' # Calculate weights equal to the trading volumes
#' weightv <- quantmod::Vo(rutils::etfenv$VTI)
#' weightv <- weightv[zoo::index(retp)]
#' # Calculate rolling weighted sums
#' sumc <- HighFreq::roll_sum(retp, lookb, 1/weightv)
#' # Plot dygraph of the weighted sums
#' datav <- cbind(retp$VTI, sumc[, 1])
#' colnames(datav) <- c("VTI", "Weighted")
#' endd <- rutils::calc_endpoints(datav, interval="weeks")
#' dygraphs::dygraph(cumsum(datav)[endd], main=colnames(datav)) %>%
#' dyOptions(colors=c("blue", "red"), strokeWidth=2) %>%
#' dyLegend(width=300)
#' }
#'
#' @export
roll_sum <- function(tseries, lookb = 1L, weightv = 0L) {
.Call(`_HighFreq_roll_sum`, tseries, lookb, weightv)
}
#' Calculate the rolling sums at the end points of a \emph{time series} or a
#' \emph{matrix}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#'
#' @param \code{startp} An \emph{integer} vector of start points (the default
#' is \code{startp = 0}).
#'
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#' \code{endd = 0}).
#'
#' @param \code{step} The number of time periods between the end points (the
#' default is \code{step = 1}).
#'
#' @param \code{lookb} The number of end points in the look-back interval
#' (the default is \code{lookb = 1}).
#'
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#' calculating the end points.
#'
#' @return A \emph{matrix} with the same number of columns as the input time
#' series \code{tseries}, and the number of rows equal to the number of end
#' points.
#'
#' @details
#' The function \code{roll_sumep()} calculates the rolling sums at the end
#' points of the \emph{time series} \code{tseries}.
#'
#' The function \code{roll_sumep()} is implemented in \code{RcppArmadillo}
#' \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("VTI", "IEF")])
#' # Define end points at 25 day intervals
#' endd <- HighFreq::calc_endpoints(NROW(retp), step=25)
#' # Define start points as 75 day lag of end points
#' startp <- HighFreq::calc_startpoints(endd, lookb=3)
#' # Calculate rolling sums using Rcpp
#' sumc <- HighFreq::roll_sumep(retp, startp=startp, endd=endd)
#' # Calculate rolling sums using R code
#' sumr <- sapply(1:NROW(endd), function(ep) {
#' colSums(retp[(startp[ep]+1):(endd[ep]+1), ])
#' }) # end sapply
#' sumr <- t(sumr)
#' all.equal(sumc, sumr, check.attributes=FALSE)
#' }
#'
#' @export
roll_sumep <- function(tseries, startp = 0L, endd = 0L, step = 1L, lookb = 1L, stub = 0L) {
.Call(`_HighFreq_roll_sumep`, tseries, startp, endd, step, lookb, stub)
}
#' Calculate the rolling weighted sums over a \emph{time series} or a
#' \emph{matrix} using \emph{Rcpp}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#'
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#' \code{endd = NULL}).
#'
#' @param \code{lookb} The length of the look-back interval, equal to the
#' number of data points included in calculating the rolling sum (the default
#' is \code{lookb = 1}).
#'
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#' calculating the end points (the default is \code{stub = NULL}).
#'
#' @param \code{weightv} A single-column \emph{matrix} of weights (the default
#' is \code{weightv = NULL}).
#'
#' @return A \emph{matrix} with the same dimensions as the input argument
#' \code{tseries}.
#'
#' @details
#' The function \code{roll_sumw()} calculates the rolling weighted sums over
#' the columns of the data \code{tseries}.
#'
#' The function \code{roll_sumw()} calculates the rolling weighted sums as
#' convolutions of the columns of \code{tseries} with the \emph{column
#' vector} of weights using the \code{Armadillo} function
#' \code{arma::conv2()}. It performs a similar calculation to the standard
#' \code{R} function \cr\code{stats::filter(x=retp, filter=weightv,
#' method="convolution", sides=1)}, but it can be many times faster, and it
#' doesn't produce any leading \code{NA} values.
#'
#' The function \code{roll_sumw()} returns a \emph{matrix} with the same
#' dimensions as the input argument \code{tseries}.
#'
#' The arguments \code{weightv}, \code{endd}, and \code{stub} are
#' optional.
#'
#' If the argument \code{weightv} is not supplied, then simple sums are
#' calculated, not weighted sums.
#'
#' If either the \code{stub} or \code{endd} arguments are supplied,
#' then the rolling sums are calculated at the end points.
#'
#' If only the argument \code{stub} is supplied, then the end points are
#' calculated from the \code{stub} and \code{lookb} arguments. The first
#' end point is equal to \code{stub} and the end points are spaced
#' \code{lookb} periods apart.
#'
#' If the arguments \code{weightv}, \code{endd}, and \code{stub} are
#' not supplied, then the sums are calculated over a number of data points
#' equal to \code{lookb}.
#'
#' The function \code{roll_sumw()} is also several times faster than
#' \code{rutils::roll_sum()} which uses vectorized \code{R} code.
#'
#' Technical note:
#' The function \code{roll_sumw()} has arguments with default values equal to
#' \code{NULL}, which are implemented in \code{Rcpp} code.
#'
#' @examples
#' \dontrun{
#' # First example
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("VTI", "IEF")])
#' # Define parameters
#' lookb <- 11
#' # Calculate rolling sums and compare with rutils::roll_sum()
#' sumc <- HighFreq::roll_sum(retp, lookb)
#' sumr <- rutils::roll_sum(retp, lookb)
#' all.equal(sumc, coredata(sumr), check.attributes=FALSE)
#' # Calculate rolling sums using R code
#' sumr <- apply(zoo::coredata(retp), 2, cumsum)
#' sumlag <- rbind(matrix(numeric(2*lookb), nc=2), sumr[1:(NROW(sumr) - lookb), ])
#' sumr <- (sumr - sumlag)
#' all.equal(sumc, sumr, check.attributes=FALSE)
#'
#' # Calculate rolling sums at end points
#' stubv <- 21
#' sumc <- HighFreq::roll_sumw(retp, lookb, stub=stubv)
#' endd <- (stubv + lookb*(0:(NROW(retp) %/% lookb)))
#' endd <- endd[endd < NROW(retp)]
#' sumr <- apply(zoo::coredata(retp), 2, cumsum)
#' sumr <- sumr[endd+1, ]
#' sumlag <- rbind(numeric(2), sumr[1:(NROW(sumr) - 1), ])
#' sumr <- (sumr - sumlag)
#' all.equal(sumc, sumr, check.attributes=FALSE)
#'
#' # Calculate rolling sums at end points - pass in endd
#' sumc <- HighFreq::roll_sumw(retp, endd=endd)
#' all.equal(sumc, sumr, check.attributes=FALSE)
#'
#' # Create exponentially decaying weights
#' weightv <- exp(-0.2*(1:11))
#' weightv <- matrix(weightv/sum(weightv), nc=1)
#' # Calculate rolling weighted sum
#' sumc <- HighFreq::roll_sumw(retp, weightv=weightv)
#' # Calculate rolling weighted sum using filter()
#' retc <- filter(x=retp, filter=weightv, method="convolution", sides=1)
#' all.equal(sumc[-(1:11), ], retc[-(1:11), ], check.attributes=FALSE)
#'
#' # Calculate rolling weighted sums at end points
#' sumc <- HighFreq::roll_sumw(retp, endd=endd, weightv=weightv)
#' all.equal(sumc, retc[endd+1, ], check.attributes=FALSE)
#'
#' # Create simple weights equal to a 1 value plus zeros
#' weightv <- matrix(c(1, rep(0, 10)), nc=1)
#' # Calculate rolling weighted sum
#' weighted <- HighFreq::roll_sumw(retp, weightv=weightv)
#' # Compare with original
#' all.equal(coredata(retp), weighted, check.attributes=FALSE)
#' }
#'
#' @export
roll_sumw <- function(tseries, endd = NULL, lookb = 1L, stub = NULL, weightv = NULL) {
.Call(`_HighFreq_roll_sumw`, tseries, endd, lookb, stub, weightv)
}
#' Calculate the exponential moving average (EMA) of streaming \emph{time
#' series} data using an online recursive formula.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#'
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'
#' @param \code{weightv} A single-column \emph{matrix} of weights.
#'
#' @return A \emph{matrix} with the same dimensions as the input argument
#' \code{tseries}.
#'
#' @details
#' The function \code{run_mean()} calculates the exponential moving average
#' (EMA) of the streaming \emph{time series} data \eqn{p_t} by recursively
#' weighting present and past values using the decay factor \eqn{\lambda}. If
#' the \code{weightv} argument is equal to zero, then the function
#' \code{run_mean()} simply calculates the exponentially weighted moving
#' average value of the streaming \emph{time series} data \eqn{p_t}:
#' \deqn{
#' \bar{p}_t = \lambda \bar{p}_{t-1} + (1-\lambda) p_t = (1-\lambda) \sum_{j=0}^{n} \lambda^j p_{t-j}
#' }
#'
#' Some applications require applying additional weight factors, like for
#' example the volume-weighted average price indicator (VWAP). Then the
#' streaming prices can be multiplied by the streaming trading volumes.
#'
#' If the argument \code{weightv} has the same number of rows as the argument
#' \code{tseries}, then the function \code{run_mean()} calculates the
#' exponential moving average (EMA) in two steps.
#'
#' First it calculates the trailing mean weights \eqn{\bar{w}_t}:
#' \deqn{
#' \bar{w}_t = \lambda \bar{w}_{t-1} + (1-\lambda) w_t
#' }
#'
#' Second it calculates the trailing mean products \eqn{\bar{w p}_t} of the
#' weights \eqn{w_t} and the data \eqn{p_t}:
#' \deqn{
#' \bar{w p}_t = \lambda \bar{w p}_{t-1} + (1-\lambda) w_t p_t
#' }
#' Where \eqn{p_t} is the streaming data, \eqn{w_t} are the streaming
#' weights, \eqn{\bar{w}_t} are the trailing mean weights, and \eqn{\bar{w p}_t}
#' are the trailing mean products of the data and the weights.
#'
#' The trailing mean weighted value \eqn{\bar{p}_t} is equal to the ratio of the
#' data and weights products, divided by the mean weights:
#' \deqn{
#' \bar{p}_t = \frac{\bar{w p}_t}{\bar{w}_t}
#' }
#'
#' The above online recursive formulas are convenient for processing live
#' streaming data because they don't require maintaining a buffer of past
#' data.
#' The formulas are equivalent to a convolution with exponentially decaying
#' weights, but they're much faster to calculate.
#' Using exponentially decaying weights is more natural than using a sliding
#' look-back interval, because it gradually "forgets" about the past data.
#'
#' The value of the decay factor \eqn{\lambda} must be in the range between
#' \code{0} and \code{1}.
#' If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#' values have a greater weight, and the trailing mean values have a stronger
#' dependence on past data. This is equivalent to a long look-back
#' interval.
#' If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#' past values have a smaller weight, and the trailing mean values have a
#' weaker dependence on past data. This is equivalent to a short look-back
#' interval.
#'
#' The function \code{run_mean()} performs the same calculation as the
#' standard \code{R} function\cr\code{stats::filter(x=series, filter=lambda,
#' method="recursive")}, but it's several times faster.
#'
#' The function \code{run_mean()} returns a \emph{matrix} with the same
#' dimensions as the input argument \code{tseries}.
#'
#' @examples
#' \dontrun{
#' # Calculate historical prices
#' ohlc <- rutils::etfenv$VTI
#' closep <- quantmod::Cl(ohlc)
#' # Calculate the trailing means
#' lambdaf <- 0.9
#' meanv <- HighFreq::run_mean(closep, lambda=lambdaf)
#' # Calculate the trailing means using R code
#' pricef <- (1-lambdaf)*filter(closep,
#' filter=lambdaf, init=as.numeric(closep[1, 1])/(1-lambdaf),
#' method="recursive")
#' all.equal(drop(meanv), unclass(pricef), check.attributes=FALSE)
#'
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::run_mean(closep, lambda=lambdaf),
#' Rcode=filter(closep, filter=lambdaf, init=as.numeric(closep[1, 1])/(1-lambdaf), method="recursive"),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#'
#' # Calculate weights equal to the trading volumes
#' weightv <- quantmod::Vo(ohlc)
#' # Calculate the exponential moving average (EMA)
#' meanw <- HighFreq::run_mean(closep, lambda=lambdaf, weightv=weightv)
#' # Plot dygraph of the EMA
#' datav <- xts(cbind(meanv, meanw), zoo::index(ohlc))
#' colnames(datav) <- c("means trailing", "means weighted")
#' dygraphs::dygraph(datav, main="Trailing Means") %>%
#' dyOptions(colors=c("blue", "red"), strokeWidth=2) %>%
#' dyLegend(show="always", width=300)
#' }
#'
#' @export
run_mean <- function(tseries, lambda, weightv = 0L) {
.Call(`_HighFreq_run_mean`, tseries, lambda, weightv)
}
#' Calculate the trailing maximum values of streaming \emph{time series} data
#' using an online recursive formula.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#'
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'
#' @return A \emph{matrix} with the same dimensions as the input argument
#' \code{tseries}.
#'
#' @details
#' The function \code{run_max()} calculates the trailing maximum values of
#' streaming \emph{time series} data by recursively weighting present and past
#' values using the decay factor \eqn{\lambda}.
#'
#' It calculates the trailing maximum values \eqn{p^{max}_t} of the streaming
#' data \eqn{p_t} as follows:
#' \deqn{
#' p^{max}_t = max(p_t, \lambda p^{max}_{t-1} + (1-\lambda) p_t)
#' }
#' The first term in the sum is the maximum value multiplied by the decay
#' factor \eqn{\lambda}, so that the past maximum value is gradually
#' "forgotten". The second term pulls the maximum value to the current value
#' \eqn{p_t}.
#'
#' The value of the decay factor \eqn{\lambda} must be in the range between
#' \code{0} and \code{1}.
#' If \eqn{\lambda} is close to \code{1} then the past maximum values persist
#' for longer. This is equivalent to a long look-back interval.
#' If \eqn{\lambda} is much less than \code{1} then the past maximum values
#' decay quickly, and the trailing maximum depends on the more recent
#' streaming data. This is equivalent to a short look-back interval.
#'
#' The above formula can also be expressed as:
#' \deqn{
#' p^{max}_t = \lambda max(p_t, p^{max}_{t-1}) + (1-\lambda) p_t
#' }
#' The first term is the maximum value multiplied by the decay factor
#' \eqn{\lambda}, so that the past maximum value is gradually "forgotten".
#' The second term pulls the maximum value to the current value \eqn{p_t}.
#'
#' The above recursive formula is convenient for processing live streaming
#' data because it doesn't require maintaining a buffer of past data.
#'
#' The function \code{run_max()} returns a \emph{matrix} with the same
#' dimensions as the input argument \code{tseries}.
#'
#' @examples
#' \dontrun{
#' # Calculate historical prices
#' closep <- zoo::coredata(quantmod::Cl(rutils::etfenv$VTI))
#' # Calculate the trailing maximums
#' lambdaf <- 0.9
#' pricmax <- HighFreq::run_max(closep, lambda=lambdaf)
#' # Plot dygraph of VTI prices and trailing maximums
#' datav <- cbind(quantmod::Cl(rutils::etfenv$VTI), pricmax)
#' colnames(datav) <- c("prices", "max")
#' colnamev <- colnames(datav)
#' dygraphs::dygraph(datav, main="VTI Prices and Trailing Maximums") %>%
#' dySeries(label=colnamev[1], strokeWidth=2, col="blue") %>%
#' dySeries(label=colnamev[2], strokeWidth=2, col="red")
#' }
#'
#' @export
run_max <- function(tseries, lambda) {
.Call(`_HighFreq_run_max`, tseries, lambda)
}
#' Calculate the trailing minimum values of streaming \emph{time series} data
#' using an online recursive formula.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#'
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'
#' @return A \emph{matrix} with the same dimensions as the input argument
#' \code{tseries}.
#'
#' @details
#' The function \code{run_min()} calculates the trailing minimum values of
#' streaming \emph{time series} data by recursively weighting present and past
#' values using the decay factor \eqn{\lambda}.
#'
#' It calculates the trailing minimum values \eqn{p^{min}_t} of the streaming
#' data \eqn{p_t} as follows:
#' \deqn{
#' p^{min}_t = min(p_t, \lambda p^{min}_{t-1} + (1-\lambda) p_t)
#' }
#' The first term in the sum is the minimum value multiplied by the decay
#' factor \eqn{\lambda}, so that the past minimum value is gradually
#' "forgotten". The second term pulls the minimum value to the current value
#' \eqn{p_t}.
#'
#' The value of the decay factor \eqn{\lambda} must be in the range between
#' \code{0} and \code{1}.
#' If \eqn{\lambda} is close to \code{1} then the past minimum values persist
#' for longer. This is equivalent to a long look-back interval.
#' If \eqn{\lambda} is much less than \code{1} then the past minimum values
#' decay quickly, and the trailing minimum depends on the more recent
#' streaming data. This is equivalent to a short look-back interval.
#'
#' The above formula can also be expressed as:
#' \deqn{
#' p^{min}_t = \lambda min(p_t, p^{min}_{t-1}) + (1-\lambda) p_t
#' }
#' The first term is the minimum value multiplied by the decay factor
#' \eqn{\lambda}, so that the past minimum value is gradually "forgotten".
#' The second term pulls the minimum value to the current value \eqn{p_t}.
#'
#' The above recursive formula is convenient for processing live streaming
#' data because it doesn't require maintaining a buffer of past data.
#'
#' The function \code{run_min()} returns a \emph{matrix} with the same
#' dimensions as the input argument \code{tseries}.
#'
#' @examples
#' \dontrun{
#' # Calculate historical prices
#' closep <- zoo::coredata(quantmod::Cl(rutils::etfenv$VTI))
#' # Calculate the trailing minimums
#' lambdaf <- 0.9
#' pricmin <- HighFreq::run_min(closep, lambda=lambdaf)
#' # Plot dygraph of VTI prices and trailing minimums
#' datav <- cbind(quantmod::Cl(rutils::etfenv$VTI), pricmin)
#' colnames(datav) <- c("prices", "min")
#' colnamev <- colnames(datav)
#' dygraphs::dygraph(datav, main="VTI Prices and Trailing Minimums") %>%
#' dySeries(label=colnamev[1], strokeWidth=1, col="blue") %>%
#' dySeries(label=colnamev[2], strokeWidth=1, col="red")
#' }
#'
#' @export
run_min <- function(tseries, lambda) {
.Call(`_HighFreq_run_min`, tseries, lambda)
}
#' Calculate the trailing variance of streaming \emph{time series} of data
#' using an online recursive formula.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'
#' @return A \emph{matrix} with the same dimensions as the input argument
#' \code{tseries}.
#'
#' @details
#' The function \code{run_var()} calculates the trailing variance of
#' streaming \emph{time series} of data \eqn{r_t}, by recursively weighting
#' the past variance estimates \eqn{\sigma^2_{t-1}}, with the squared
#' differences of the data minus its trailing means \eqn{(r_t -
#' \bar{r}_t)^2}, using the decay factor \eqn{\lambda}:
#' \deqn{
#' \bar{r}_t = \lambda \bar{r}_{t-1} + (1-\lambda) r_t
#' }
#' \deqn{
#' \sigma^2_t = \lambda \sigma^2_{t-1} + (1-\lambda) (r_t - \bar{r}_t)^2
#' }
#' Where \eqn{r_t} are the streaming data, \eqn{\bar{r}_t} are the trailing
#' means, and \eqn{\sigma^2_t} are the trailing variance estimates.
#'
#' The above online recursive formulas are convenient for processing live
#' streaming data because they don't require maintaining a buffer of past
#' data.
#' The formulas are equivalent to a convolution with exponentially decaying
#' weights, but they're much faster to calculate.
#' Using exponentially decaying weights is more natural than using a sliding
#' look-back interval, because it gradually "forgets" about the past data.
#'
#' The value of the decay factor \eqn{\lambda} must be in the range between
#' \code{0} and \code{1}.
#' If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#' values have a greater weight, and the trailing variance values have a
#' stronger dependence on past data. This is equivalent to a long
#' look-back interval.
#' If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#' past values have a smaller weight, and the trailing variance values have a
#' weaker dependence on past data. This is equivalent to a short look-back
#' interval.
#'
#' The function \code{run_var()} performs the same calculation as the
#' standard \code{R} function\cr\code{stats::filter(x=series,
#' filter=weightv, method="recursive")}, but it's several times faster.
#'
#' The function \code{run_var()} returns a \emph{matrix} with the same
#' dimensions as the input argument \code{tseries}.
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- zoo::coredata(na.omit(rutils::etfenv$returns$VTI))
#' # Calculate the trailing variance
#' lambdaf <- 0.9
#' vars <- HighFreq::run_var(retp, lambda=lambdaf)
#' # Calculate centered returns
#' retc <- (retp - HighFreq::run_mean(retp, lambda=lambdaf))
#' # Calculate the trailing variance using R code
#' retc2 <- (1-lambdaf)*filter(retc^2, filter=lambdaf,
#' init=as.numeric(retc[1, 1])^2/(1-lambdaf),
#' method="recursive")
#' all.equal(vars, unclass(retc2), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::run_var(retp, lambda=lambdaf),
#' Rcode=filter(retc^2, filter=lambdaf, init=as.numeric(retc[1, 1])^2/(1-lambdaf), method="recursive"),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
run_var <- function(tseries, lambda) {
.Call(`_HighFreq_run_var`, tseries, lambda)
}
#' Calculate the trailing means, volatilities, and z-scores of a streaming
#' \emph{time series} of data using an online recursive formula.
#'
#' @param \code{tseries} A single \emph{time series} or a single column
#' \emph{matrix} of data.
#'
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'
#' @return A \emph{matrix} with three columns (means, volatilities, and
#' z-scores) and the same number of rows as the input argument
#' \code{tseries}.
#'
#' @details
#' The function \code{run_zscores()} calculates the trailing means,
#' volatilities, and z-scores of a single streaming \emph{time series} of
#' data \eqn{r_t}, by recursively weighting the past variance estimates
#' \eqn{\sigma^2_{t-1}}, with the squared differences of the data minus its
#' trailing means \eqn{(r_t - \bar{r}_t)^2}, using the decay factor
#' \eqn{\lambda}:
#' \deqn{
#' \bar{r}_t = \lambda \bar{r}_{t-1} + (1-\lambda) r_t
#' }
#' \deqn{
#' \sigma^2_t = \lambda \sigma^2_{t-1} + (1-\lambda) (r_t - \bar{r}_t)^2
#' }
#' \deqn{
#' z_t = \frac{r_t - \bar{r}_t}{\sigma_t}
#' }
#' Where \eqn{r_t} are the streaming data, \eqn{\bar{r}_t} are the trailing
#' means, \eqn{\sigma^2_t} are the trailing variance estimates, and \eqn{z_t}
#' are the z-scores.
#'
#' The above online recursive formulas are convenient for processing live
#' streaming data because they don't require maintaining a buffer of past
#' data.
#' The formulas are equivalent to a convolution with exponentially decaying
#' weights, but they're much faster to calculate.
#' Using exponentially decaying weights is more natural than using a sliding
#' look-back interval, because it gradually "forgets" about the past data.
#'
#' The value of the decay factor \eqn{\lambda} must be in the range between
#' \code{0} and \code{1}.
#' If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#' values have a greater weight, and the trailing variance values have a
#' stronger dependence on past data. This is equivalent to a long
#' look-back interval.
#' If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#' past values have a smaller weight, and the trailing variance values have a
#' weaker dependence on past data. This is equivalent to a short look-back
#' interval.
#'
#' The function \code{run_zscores()} returns a \emph{matrix} with three
#' columns (means, volatilities, and z-scores) and the same number of rows as
#' the input argument \code{tseries}.
#'
#' @examples
#' \dontrun{
#' # Calculate historical VTI log prices
#' pricev <- log(na.omit(rutils::etfenv$prices$VTI))
#' # Calculate the trailing variance and z-scores of prices
#' lambdaf <- 0.9 # Decay factor
#' zscores <- HighFreq::run_zscores(pricev, lambda=lambdaf)
#' datav <- cbind(pricev, zscores[, 3])
#' colnames(datav) <- c("VTI", "Zscores")
#' colnamev <- colnames(datav)
#' dygraphs::dygraph(datav, main="VTI Prices and Z-scores") %>%
#' dyAxis("y", label=colnamev[1], independentTicks=TRUE) %>%
#' dyAxis("y2", label=colnamev[2], independentTicks=TRUE) %>%
#' dySeries(axis="y", label=colnamev[1], strokeWidth=2, col="blue") %>%
#' dySeries(axis="y2", label=colnamev[2], strokeWidth=2, col="red") %>%
#' dyLegend(show="always", width=300)
#' }
#'
#' @export
run_zscores <- function(tseries, lambda) {
.Call(`_HighFreq_run_zscores`, tseries, lambda)
}
#' Calculate the trailing variance of streaming \emph{OHLC} price data using an
#' online recursive formula.
#'
#' @param \code{ohlc} A \emph{time series} or a \emph{matrix} with \emph{OHLC}
#' price data.
#'
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'
#' @return A single-column \emph{matrix} of variance estimates, with the same
#' number of rows as the input \code{ohlc} price data.
#'
#' @details
#' The function \code{run_var_ohlc()} calculates a single-column
#' \emph{matrix} of variance estimates of streaming \emph{OHLC} price data.
#'
#' The function \code{run_var_ohlc()} calculates the variance from the
#' differences between the \emph{Open}, \emph{High}, \emph{Low}, and
#' \emph{Close} prices, using the \emph{Yang-Zhang} range volatility
#' estimator:
#' \deqn{
#' \sigma^2_t = (1-\lambda) ((O_t - C_{t-1})^2 + 0.134 (C_t - O_t)^2 +
#' 0.866 ((H_i - O_i) (H_i - C_i) + (L_i - O_i) (L_i - C_i))) +
#' \lambda \sigma^2_{t-1}
#' }
#' It recursively weighs the current variance estimate with the past
#' estimates \eqn{\sigma^2_{t-1}}, using the decay factor \eqn{\lambda}.
#'
#' The above recursive formula is convenient for processing live streaming
#' data because it doesn't require maintaining a buffer of past data.
#' The formula is equivalent to a convolution with exponentially decaying
#' weights, but it's faster.
#' Using exponentially decaying weights is more natural than using a sliding
#' look-back interval, because it gradually "forgets" about the past data.
#'
#' The function \code{run_var_ohlc()} does not calculate the logarithm of
#' the prices.
#' So if the argument \code{ohlc} contains dollar prices then
#' \code{run_var_ohlc()} calculates the dollar variance.
#' If the argument \code{ohlc} contains the log prices then
#' \code{run_var_ohlc()} calculates the percentage variance.
#'
#' The function \code{run_var_ohlc()} is implemented in \code{RcppArmadillo}
#' \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Extract the log OHLC prices of VTI
#' ohlc <- log(rutils::etfenv$VTI)
#' # Calculate the trailing variance
#' vart <- HighFreq::run_var_ohlc(ohlc, lambda=0.8)
#' # Calculate the rolling variance
#' varoll <- HighFreq::roll_var_ohlc(ohlc, lookb=5, method="yang_zhang", scale=FALSE)
#' datav <- cbind(vart, varoll)
#' colnames(datav) <- c("trailing", "rolling")
#' colnamev <- colnames(datav)
#' datav <- xts::xts(datav, index(ohlc))
#' # dygraph plot of VTI trailing versus rolling volatility
#' dygraphs::dygraph(sqrt(datav[-(1:111), ]), main="Trailing and Rolling Volatility of VTI") %>%
#' dyOptions(colors=c("red", "blue"), strokeWidth=2) %>%
#' dyLegend(show="always", width=300)
#' # Compare the speed of trailing versus rolling volatility
#' library(microbenchmark)
#' summary(microbenchmark(
#' trailing=HighFreq::run_var_ohlc(ohlc, lambda=0.8),
#' rolling=HighFreq::roll_var_ohlc(ohlc, lookb=5, method="yang_zhang", scale=FALSE),
#' times=10))[, c(1, 4, 5)]
#' }
#' @export
run_var_ohlc <- function(ohlc, lambda) {
.Call(`_HighFreq_run_var_ohlc`, ohlc, lambda)
}
#' Calculate the correlation matrix from the covariance matrix.
#'
#' @param \code{covmat} A \emph{matrix} of covariances.
#'
#' @return Void (no return value - modifies the covariance matrix in place).
#'
#' @details
#' The function \code{push_cov2cor()} calculates the correlation matrix from
#' the covariance matrix, in place, without copying the data in memory.
#'
#' The function \code{push_cov2cor()} accepts a \emph{pointer} to the
#' covariance matrix, and it overwrites it with the correlation matrix.
#'
#' The function \code{push_cov2cor()} is written in \code{RcppArmadillo}
#' \code{C++} so it's much faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Calculate a time series of returns
#' retp <- na.omit(rutils::etfenv$returns[, c("IEF", "VTI", "DBC")])
#' # Calculate the covariance matrix of returns
#' covmat <- cov(retp)
#' # Calculate the correlation matrix of returns
#' push_cov2cor(covmat)
#' all.equal(covmat, cor(retp))
#' }
#'
#' @export
push_cov2cor <- function(covmat) {
invisible(.Call(`_HighFreq_push_cov2cor`, covmat))
}
#' Update the trailing covariance matrix of streaming asset returns,
#' with a row of new returns using an online recursive formula.
#'
#' @param \code{retsn} A \emph{vector} of new asset returns.
#'
#' @param \code{covmat} A trailing covariance \emph{matrix} of asset returns.
#'
#' @param \code{meanv} A \emph{vector} of trailing means of asset returns.
#'
#' @param \code{lambdacov} A decay factor which multiplies the past covariance.
#'
#' @return Void (no return value - modifies the trailing covariance matrix
#' and the return means in place).
#'
#' @details
#' The function \code{push_covar()} updates the trailing covariance matrix of
#' streaming asset returns, with a row of new returns. It updates the
#' covariance matrix in place, without copying the data in memory.
#'
#' The streaming asset returns \eqn{r_t} contain multiple columns and the
#' parameter \code{retsn} represents a single row of \eqn{r_t} - the asset
#' returns at time \eqn{t}. The elements of the vectors \code{retsn} and
#' \code{meanv} represent single rows of data with multiple columns.
#'
#' The function \code{push_covar()} accepts \emph{pointers} to the arguments
#' \code{covmat} and \code{meanv},
#' and it overwrites the old values with the new values. It performs the
#' calculation in place, without copying the data in memory, which can
#' significantly increase the computation speed for large matrices.
#'
#' First, the function \code{push_covar()} updates the trailing means
#' \eqn{\bar{r}_t} of the streaming asset returns \eqn{r_t} by recursively
#' weighting present and past values using the decay factor \eqn{\lambda}:
#' \deqn{
#' \bar{r}_t = \lambda \bar{r}_{t-1} + (1-\lambda) r_t
#' }
#' This recursive formula is equivalent to the exponentially weighted moving
#' average of the streaming asset returns \eqn{r_t}.
#'
#' It then calculates the demeaned returns:
#' \deqn{
#' \hat{r}_t = r_t - \bar{r}_t
#' }
#'
#' Finally, it updates the trailing covariance matrix of the returns:
#' \deqn{
#' {cov}_t = \lambda {cov}_{t-1} + (1-\lambda) \hat{r}^T_t \hat{r}_t
#' }
#'
#' The decay factor \eqn{\lambda} determines the strength of the updates,
#' with smaller \eqn{\lambda} values giving more weight to the new data. If
#' the asset returns are not stationary, then applying more weight to the new
#' returns reduces the bias of the trailing covariance matrix, but it also
#' increases its variance. Simulation can be used to find the value of the
#' \eqn{\lambda} parameter to achieve the best bias-variance tradeoff.
#'
#' The function \code{push_covar()} is written in \code{RcppArmadillo}
#' \code{C++} so it's much faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Calculate a time series of returns
#' retp <- na.omit(rutils::etfenv$returns[, c("IEF", "VTI", "DBC")])
#' # Calculate the returns without last row
#' nrows <- NROW(retp)
#' retss <- retp[-nrows]
#' # Calculate the covariance of returns
#' meanv <- colMeans(retss)
#' covmat <- cov(retss)
#' # Update the covariance of returns
#' HighFreq::push_covar(retsn=retp[nrows], covmat=covmat, meanv=meanv, lambdacov=0.9)
#' }
#'
#' @export
push_covar <- function(retsn, covmat, meanv, lambdacov) {
invisible(.Call(`_HighFreq_push_covar`, retsn, covmat, meanv, lambdacov))
}
#' Update the trailing eigen values and eigen vectors of streaming asset return
#' data, with a row of new returns.
#'
#' @param \code{retsn} A \emph{vector} of new asset returns.
#'
#' @param \code{covmat} A trailing covariance \emph{matrix} of asset returns.
#'
#' @param \code{eigenval} A \emph{vector} of eigen values.
#'
#' @param \code{eigenvec} A \emph{matrix} of eigen vectors.
#'
#' @param \code{eigenret} A \emph{vector} of eigen portfolio returns.
#'
#' @param \code{meanv} A \emph{vector} of trailing means of asset returns.
#'
#' @param \code{lambdacov} A decay factor which multiplies the past covariance.
#'
#' @return Void (no return value - modifies the trailing eigen values, eigen
#' vectors, the eigen portfolio returns, and the return means in place).
#'
#' @details
#' The function \code{push_eigen()} updates the trailing eigen values, eigen
#' vectors, and the eigen portfolio returns of streaming asset returns, with
#' a row of new data. It updates the eigenelements in place, without copying
#' the data in memory.
#'
#' The streaming asset returns \eqn{r_t} contain multiple columns and the
#' parameter \code{retsn} represents a single row of \eqn{r_t} - the asset
#' returns at time \eqn{t}. The elements of the vectors \code{retsn},
#' \code{eigenret}, and \code{meanv} represent single rows of data with
#' multiple columns.
#'
#' The function \code{push_eigen()} accepts \emph{pointers} to the arguments
#' \code{eigenval}, \code{eigenval}, \code{eigenvec}, \code{meanv}, and
#' \code{eigenret}, and it overwrites the old values with the new values. It
#' performs the calculation in place, without copying the data in memory,
#' which can significantly increase the computation speed for large matrices.
#'
#' First, the function \code{push_eigen()} calls the function
#' \code{HighFreq::push_covar()} to update the trailing covariance matrix of
#' streaming asset returns, with a row of new returns. It updates the
#' covariance matrix in place, without copying the data in memory.
#'
#' It then calls the \code{Armadillo} function \code{arma::eig_sym} to
#' calculate the eigen decomposition of the trailing covariance matrix.
#'
#' The function \code{push_eigen()} calculates the eigen portfolio returns by
#' multiplying the scaled asset returns times the eigen vectors
#' \eqn{\strong{v}_{t-1}}:
#' \deqn{
#' r^{eigen}_t = \strong{v}_{t-1} \frac{r_t}{\sigma_{t-1}}
#' }
#' Where \eqn{\strong{v}_{t-1}} is the matrix of previous eigen vectors that
#' are passed by reference through the parameter \code{eigenvec}. The eigen
#' returns \eqn{r^{eigen}_t} are the returns of the eigen portfolios, with
#' weights equal to the eigen vectors \eqn{\strong{v}_{t-1}}. The eigen
#' weights are applied to the asset returns scaled by their volatilities.
#' The eigen returns \eqn{r^{eigen}_t} are passed by reference through the
#' parameter \code{eigenret}.
#'
#' The decay factor \eqn{\lambda} determines the strength of the updates,
#' with smaller \eqn{\lambda} values giving more weight to the new data. If
#' the asset returns are not stationary, then applying more weight to the new
#' returns reduces the bias of the trailing covariance matrix, but it also
#' increases its variance. Simulation can be used to find the value of the
#' \eqn{\lambda} parameter to achieve the best bias-variance tradeoff.
#'
#' The function \code{push_eigen()} is written in \code{RcppArmadillo}
#' \code{C++} so it's much faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Calculate a time series of returns
#' retp <- na.omit(rutils::etfenv$returns[, c("IEF", "VTI", "DBC")])
#' # Calculate the returns without the last row
#' nrows <- NROW(retp)
#' retss <- retp[-nrows]
#' # Calculate the previous covariance of returns
#' meanv <- colMeans(retss)
#' covmat <- cov(retss)
#' # Update the covariance of returns
#' eigenret <- numeric(NCOL(retp))
#' HighFreq::push_eigen(retsn=retp[nrows], covmat=covmat,
#' eigenval=eigenval, eigenvec=eigenvec,
#' eigenret=eigenret, meanv=meanv, lambdacov=0.9)
#' }
#'
#' @export
push_eigen <- function(retsn, covmat, eigenval, eigenvec, eigenret, meanv, lambdacov) {
invisible(.Call(`_HighFreq_push_eigen`, retsn, covmat, eigenval, eigenvec, eigenret, meanv, lambdacov))
}
#' Update the trailing eigen values and eigen vectors of streaming asset return
#' data, with a row of new returns, using the \emph{SGA} algorithm.
#'
#' @param \code{retsn} A \emph{vector} of new asset returns.
#'
#' @param \code{eigenval} A \emph{vector} of eigen values.
#'
#' @param \code{eigenvec} A \emph{matrix} of eigen vectors.
#'
#' @param \code{eigenret} A \emph{vector} of eigen portfolio returns.
#'
#' @param \code{meanv} A \emph{vector} of trailing means of asset returns.
#'
#' @param \code{varv} A \emph{vector} of the trailing asset variances.
#'
#' @param \code{lambda} A decay factor which multiplies the past mean and
#' variance.
#'
#' @param \code{gamma} A \emph{numeric} gain factor which multiplies the past
#' eigenelements.
#'
#' @return Void (no return value - modifies the trailing eigen values, eigen
#' vectors, the return means, and the return variances in place).
#'
#' @details
#' The function \code{push_sga()} updates the trailing eigen values, eigen
#' vectors, and the eigen portfolio returns of streaming asset returns, with
#' a row of new data, using the \emph{SGA} algorithm. It updates the
#' eigenelements in place, without copying the data in memory.
#'
#' The streaming asset returns \eqn{r_t} contain multiple columns and the
#' parameter \code{retsn} represents a single row of \eqn{r_t} - the asset
#' returns at time \eqn{t}. The elements of the vectors \code{retsn},
#' \code{meanv}, and \code{varv} represent single rows of data with multiple
#' columns.
#'
#' The function \code{push_sga()} accepts \emph{pointers} to the arguments
#' \code{eigenval}, \code{eigenvec}, \code{meanv}, and \code{varv},
#' and it overwrites the old values with the new values. It performs the
#' calculation in place, without copying the data in memory, which can
#' significantly increase the computation speed for large matrices.
#'
#' First, the function \code{push_sga()} updates the trailing means
#' \eqn{\bar{r}_t} and variances \eqn{\sigma^2_t} of the streaming asset
#' returns \eqn{r_t} by recursively weighting present and past values
#' using the decay factor \eqn{\lambda}:
#' \deqn{
#' \bar{r}_t = \lambda \bar{r}_{t-1} + (1-\lambda) r_t
#' }
#' \deqn{
#' \sigma^2_t = \lambda \sigma^2_{t-1} + (1-\lambda) (r_t - \bar{r}_t)^2
#' }
#' The past values \eqn{\bar{r}_{t-1}} and \eqn{\sigma^2_{t-1}} are passed in
#' by reference through the variables \code{meanv} and \code{varv}. The
#' updated values are then passed out by reference.
#'
#' These recursive formulas are equivalent to the exponentially weighted
#' moving averages of the streaming asset returns \eqn{r_t}.
#'
#' It then calculates a vector of the eigen portfolio returns:
#' \deqn{
#' r^{eigen}_t = \strong{v}_{t-1} \frac{r_t}{\sigma_{t-1}}
#' }
#' Where \eqn{\strong{v}_{t-1}} is the matrix of previous eigen vectors that
#' are passed by reference through the parameter \code{eigenvec}. The eigen
#' returns \eqn{r^{eigen}_t} are the returns of the eigen portfolios, with
#' weights equal to the eigen vectors \eqn{\strong{v}_{t-1}}. The eigen
#' weights are applied to the asset returns scaled by their volatilities.
#' The eigen returns \eqn{r^{eigen}_t} are passed by reference through the
#' parameter \code{eigenret}.
#'
#' The function \code{push_sga()} then standardizes the columns of the new
#' returns:
#' \deqn{
#' \hat{r}_t = \frac{r_t - \bar{r}_t}{\sigma_t}
#' }
#'
#' Finally, the vector of eigen values \eqn{\Lambda_{j, t}} and the matrix of
#' eigen vectors \eqn{\strong{v}_{j, t}} (\eqn{j} is the column index) are
#' then updated using the \emph{SGA} algorithm:
#' \deqn{
#' \Lambda_{j, t} = (1-\gamma) \Lambda_{j, t-1} + \gamma \phi_{j, t-1}
#' }
#' \deqn{
#' \strong{v}_{j, t} = \strong{v}_{j, t-1} + \gamma \phi_{j, t-1} (\hat{r}_{t} - \phi_{j, t-1} \strong{v}_{j, t-1} - 2 \sum_{i=1}^{j-1} \phi_{i, t-1} \strong{v}_{i, t-1})
#' }
#' Where \eqn{\phi_{j, t-1} = \hat{r}_{t} \strong{v}_{j, t-1}} are the matrix
#' products of the new data times the previous eigen vectors.
#'
#' The gain factor \eqn{\gamma} determines the strength of the updates, with
#' larger \eqn{\gamma} values giving more weight to the new data. If the
#' asset returns are not stationary, then applying more weight to the new
#' returns reduces the bias of the trailing eigen vectors, but it also
#' increases their variance. Simulation can be used to find the value of the
#' \eqn{\gamma} parameter to achieve the best bias-variance tradeoff.
#'
#' A description of the \emph{SGA} algorithm can be found in the package
#' \href{https://cran.r-project.org/web/packages/onlinePCA/index.html}{onlinePCA} and in the
#' \href{https://paperswithcode.com/paper/online-principal-component-analysis-in-high}{Online PCA paper}.
#'
#' The function \code{push_sga()} is written in \code{RcppArmadillo}
#' \code{C++} code and it calls the \code{Armadillo} function
#' \code{arma::qr_econ()} to perform the QR decomposition, to calculate the
#' eigen vectors.
#'
#' @examples
#' \dontrun{
#' # Calculate a time series of returns
#' retp <- na.omit(rutils::etfenv$returns[, c("IEF", "VTI", "DBC")])
#' # Calculate the covariance of returns without the last row
#' nrows <- NROW(retp)
#' retss <- retp[-nrows]
#' HighFreq::calc_scale(retss)
#' meanv <- colMeans(retss)
#' varv <- sapply(retss, var)
#' covmat <- cov(retss)
#' ncols <- NCOL(retss)
#' # Calculate the eigen decomposition using RcppArmadillo
#' eigenval <- numeric(ncols) # Allocate eigen values
#' eigenvec <- matrix(numeric(ncols^2), nc=ncols) # Allocate eigen vectors
#' HighFreq::calc_eigen(covmat, eigenval, eigenvec)
#' # Update the eigen decomposition using SGA
#' eigenret <- numeric(NCOL(retp))
#' HighFreq::push_sga(retsn=retp[nrows],
#' eigenval=eigenval, eigenvec=eigenvec,
#' eigenret=eigenret, meanv=meanv, varv=varv, lambda=0.9, gamma=0.1)
#' }
#'
#' @export
push_sga <- function(retsn, eigenval, eigenvec, eigenret, meanv, varv, lambda, gamma) {
invisible(.Call(`_HighFreq_push_sga`, retsn, eigenval, eigenvec, eigenret, meanv, varv, lambda, gamma))
}
#' Calculate the trailing covariances of two streaming \emph{time series} of
#' returns using an online recursive formula.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} with two
#' columns of returns data.
#'
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'
#' @return A \emph{matrix} with five columns of data: the trailing covariances,
#' the variances, and the mean values of the two columns of the argument
#' \code{tseries}.
#'
#' @details
#' The function \code{run_covar()} calculates the trailing covariances of two
#' streaming \emph{time series} of returns, by recursively weighting the past
#' covariance estimates \eqn{{cov}_{t-1}}, with the products of their
#' returns minus their means, using the decay factor \eqn{\lambda}:
#' \deqn{
#' \bar{x}_t = \lambda \bar{x}_{t-1} + (1-\lambda) x_t
#' }
#' \deqn{
#' \bar{y}_t = \lambda \bar{y}_{t-1} + (1-\lambda) y_t
#' }
#' \deqn{
#' \sigma^2_{x t} = \lambda \sigma^2_{x t-1} + (1-\lambda) (x_t - \bar{x}_t)^2
#' }
#' \deqn{
#' \sigma^2_{y t} = \lambda \sigma^2_{y t-1} + (1-\lambda) (y_t - \bar{y}_t)^2
#' }
#' \deqn{
#' {cov}_t = \lambda {cov}_{t-1} + (1-\lambda) (x_t - \bar{x}_t) (y_t - \bar{y}_t)
#' }
#' Where \eqn{{cov}_t} is the trailing covariance estimate at time \eqn{t},
#' \eqn{\sigma^2_{x t}}, \eqn{\sigma^2_{y t}}, \eqn{\bar{x}_t} and
#' \eqn{\bar{x}_t} are the trailing variances and means of the returns, and
#' \eqn{x_t} and \eqn{y_t} are the two streaming returns data.
#'
#' The above online recursive formulas are convenient for processing live
#' streaming data because they don't require maintaining a buffer of past
#' data. The formulas are equivalent to a convolution with exponentially
#' decaying weights, but they're much faster to calculate.
#' Using exponentially decaying weights is more natural than using a sliding
#' look-back interval, because it gradually "forgets" about the past data.
#'
#' The value of the decay factor \eqn{\lambda} must be in the range between
#' \code{0} and \code{1}.
#' If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#' values have a greater weight, and the trailing covariance values have a
#' stronger dependence on past data. This is equivalent to a long
#' look-back interval.
#' If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#' past values have a smaller weight, and the trailing covariance values have
#' a weaker dependence on past data. This is equivalent to a short
#' look-back interval.
#'
#' The function \code{run_covar()} returns five columns of data: the trailing
#' covariances, the variances, and the mean values of the two columns of the
#' argument \code{tseries}. This allows calculating the trailing
#' correlations, betas, and alphas.
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- zoo::coredata(na.omit(rutils::etfenv$returns[, c("IEF", "VTI")]))
#' # Calculate the trailing covariance
#' lambdaf <- 0.9
#' covars <- HighFreq::run_covar(retp, lambda=lambdaf)
#' # Calculate the trailing correlation
#' correl <- covars[, 1]/sqrt(covars[, 2]*covars[, 3])
#' # Calculate the trailing covariance using R code
#' nrows <- NROW(retp)
#' retm <- matrix(numeric(2*nrows), nc=2)
#' retm[1, ] <- retp[1, ]
#' retd <- matrix(numeric(2*nrows), nc=2)
#' covarr <- numeric(nrows)
#' covarr[1] <- retp[1, 1]*retp[1, 2]
#' for (it in 2:nrows) {
#' retm[it, ] <- lambdaf*retm[it-1, ] + (1-lambdaf)*(retp[it, ])
#' retd[it, ] <- retp[it, ] - retm[it, ]
#' covarr[it] <- lambdaf*covarr[it-1] + (1-lambdaf)*retd[it, 1]*retd[it, 2]
#' } # end for
#' all.equal(covars[, 1], covarr, check.attributes=FALSE)
#' }
#'
#' @export
run_covar <- function(tseries, lambda) {
.Call(`_HighFreq_run_covar`, tseries, lambda)
}
#' Calculate the trailing autocovariance of a \emph{time series} of returns
#' using an online recursive formula.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} with a single
#' column of returns data.
#'
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'
#' @param \code{lagg} An \emph{integer} equal to the number of periods to lag.
#' (The default is \code{lagg = 1}.)
#'
#' @return A \emph{matrix} with three columns of data: the trailing
#' autocovariances, the variances, and the mean values of the argument
#' \code{tseries}.
#'
#' @details
#' The function \code{run_autocovar()} calculates the trailing
#' autocovariance of a streaming \emph{time series} of returns, by
#' recursively weighting the past autocovariance estimates \eqn{{cov}_{t-1}},
#' with the products of their returns minus their means, using the decay
#' factor \eqn{\lambda}:
#' \deqn{
#' \bar{x}_t = \lambda \bar{x}_{t-1} + (1-\lambda) x_t
#' }
#' \deqn{
#' \sigma^2_{t} = \lambda \sigma^2_{t-1} + (1-\lambda) (x_t - \bar{x}_t)^2
#' }
#' \deqn{
#' {cov}_t = \lambda {cov}_{t-1} + (1-\lambda) (x_t - \bar{x}_t) (x_{t-l} - \bar{x}_{t-l})
#' }
#' Where \eqn{{cov}_t} is the trailing autocovariance estimate at time
#' \eqn{t}, with \code{lagg=l}.
#' And \eqn{\sigma^2_{t}} and \eqn{\bar{x}_t} are the trailing variances and
#' means of the streaming data.
#'
#' The above online recursive formulas are convenient for processing live
#' streaming data because they don't require maintaining a buffer of past
#' data. The formulas are equivalent to a convolution with exponentially
#' decaying weights, but they're much faster to calculate.
#' Using exponentially decaying weights is more natural than using a sliding
#' look-back interval, because it gradually "forgets" about the past data.
#'
#' The value of the decay factor \eqn{\lambda} must be in the range between
#' \code{0} and \code{1}.
#' If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#' values have a greater weight, and the trailing covariance values have a
#' stronger dependence on past data. This is equivalent to a long
#' look-back interval.
#' If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#' past values have a smaller weight, and the trailing covariance values have
#' a weaker dependence on past data. This is equivalent to a short
#' look-back interval.
#'
#' The function \code{run_autocovar()} returns three columns of data: the
#' trailing autocovariances, the variances, and the mean values of the
#' argument \code{tseries}. This allows calculating the trailing
#' autocorrelations.
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- zoo::coredata(na.omit(rutils::etfenv$returns$VTI))
#' # Calculate the trailing autocovariance
#' lambdaf <- 0.9
#' lagg <- 3
#' covars <- HighFreq::run_autocovar(retp, lambda=lambdaf, lagg=lagg)
#' # Calculate the trailing autocorrelation
#' correl <- covars[, 1]/covars[, 2]
#' # Calculate the trailing autocovariance using R code
#' nrows <- NROW(retp)
#' retm <- numeric(nrows)
#' retm[1] <- retp[1, ]
#' retd <- numeric(nrows)
#' covarr <- numeric(nrows)
#' covarr[1] <- retp[1, ]^2
#' for (it in 2:nrows) {
#' retm[it] <- lambdaf*retm[it-1] + (1-lambdaf)*(retp[it])
#' retd[it] <- retp[it] - retm[it]
#' covarr[it] <- lambdaf*covarr[it-1] + (1-lambdaf)*retd[it]*retd[max(it-lagg, 1)]
#' } # end for
#' all.equal(covarr, covars[, 1])
#' }
#'
#' @export
run_autocovar <- function(tseries, lambda, lagg = 1L) {
.Call(`_HighFreq_run_autocovar`, tseries, lambda, lagg)
}
#' Perform regressions on the streaming \emph{time series} of response and
#' predictor data, and calculate the regression coefficients, the residuals,
#' and the forecasts, using online recursive formulas.
#'
#' @param \code{respv} A single-column \emph{time series} or a single-column
#' \emph{matrix} of response data.
#'
#' @param \code{predm} A \emph{time series} or a \emph{matrix} of predictor
#' data.
#'
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'
#' @param \code{controlv} A \emph{list} of model parameters (see Details).
#'
#'
#' @return A \emph{matrix} with the regression coefficients, the residuals, and
#' the forecasts (in that order - see details), with the same number of rows
#' as the predictor argument \code{predm}.
#'
#' @details
#' The function \code{run_reg()} performs regressions on the streaming \emph{time
#' series} of response \eqn{r_t} and predictor \eqn{p_t} data:
#' \deqn{
#' r_t = \beta_t p_t + \epsilon_t
#' }
#' Where \eqn{\beta_t} are the trailing regression coefficients and
#' \eqn{\epsilon_t} are the residuals.
#'
#' It recursively updates the covariance matrix \eqn{{cov}_t} between the
#' response and the predictor data, and the covariance matrix
#' \eqn{{cov}_{pt}} between the predictors, using the decay factor
#' \eqn{\lambda}:
#' \deqn{
#' {cov}_t = \lambda {cov}_{t-1} + (1-\lambda) r^T_t p_t
#' }
#' \deqn{
#' {cov}_{p t} = \lambda {cov}_{p (t-1)} + (1-\lambda) p^T_t p_t
#' }
#'
#' It calculates the regression coefficients \eqn{\beta_t} as equal to the
#' covariance matrix between the response and the predictor data
#' \eqn{{cov}_t}, divided by the covariance matrix between the predictors
#' \eqn{{cov}_{pt}}:
#' \deqn{
#' \beta_t = {cov}_t \, {cov}^{-1}_{p t}
#' }
#'
#' It calculates the residuals \eqn{\epsilon_t} as the difference between the
#' response \eqn{r_t} minus the fitted values \eqn{\beta_t p_t}:
#' \deqn{
#' \epsilon_t = r_t - \beta_t p_t
#' }
#'
#' And the residual variance \eqn{\sigma^2_t} as:
#' \deqn{
#' \bar{\epsilon}_t = \lambda \bar{\epsilon}_{t-1} + (1-\lambda) \epsilon_t
#' }
#' \deqn{
#' \sigma^2_t = \lambda \sigma^2_{t-1} + (1-\lambda) (\epsilon_t - \bar{\epsilon}_t)^2
#' }
#'
#' It then calculates the regression forecasts \eqn{f_t}, as equal to the
#' past regression coefficients \eqn{\beta_{t-1}} times the current predictor
#' data \eqn{p_t}:
#' \deqn{
#' f_t = \beta_{t-1} p_t
#' }
#'
#' It finally calculates the forecast errors as the difference between the
#' response minus the regression forecasts: \eqn{r_t - f_t}.
#'
#' The coefficient matrix \eqn{\beta} and the residuals \eqn{\epsilon} have
#' the same number of rows as the predictor argument \code{predm}.
#'
#' The function \code{run_reg()} accepts a list of regression model
#' parameters through the argument \code{controlv}.
#' The argument \code{controlv} contains the parameters \code{regmod} and
#' \code{residscale}.
#' Below is a description of how these parameters work.
#' The list of model parameters can be created using the function
#' \code{param_reg()}.
#'
#' The number of regression coefficients is equal to the number of columns of
#' the predictor matrix \code{n}.
#' If the predictor matrix contains a unit intercept column then the first
#' regression coefficient is equal to the alpha value \eqn{\alpha}.
#'
#' If \code{regmod = "least_squares"} (the default) then it performs the
#' standard least squares regression. This is currently the only option.
#'
#' The \emph{residuals} and the the \emph{forecast errors} may be scaled by
#' their volatilities to obtain the \emph{z-scores}.
#' The default is \code{residscale = "none"} - no scaling.
#' If the argument \code{residscale = "scale"} then the \emph{residuals}
#' \eqn{\epsilon_t} are divided by their volatilities \eqn{\sigma_t}
#' without subtracting their means:
#' \deqn{
#' \epsilon_t = \frac{\epsilon_t}{\sigma_t}
#' }
#' If the argument \code{residscale = "standardize"} then the residual means
#' \eqn{\bar{\epsilon}} are subtracted from the \emph{residuals}, and then
#' they are divided by their volatilities \eqn{\sigma_t}:
#' \deqn{
#' \epsilon_t = \frac{\epsilon_t - \bar{\epsilon}}{\sigma_t}
#' }
#' Which are equal to the \emph{z-scores}.
#'
#' The \emph{forecast errors} are also scaled in the same way as the
#' \emph{residuals}, according to the argument\code{residscale}.
#'
#' The above online recursive formulas are convenient for processing live
#' streaming data because they don't require maintaining a buffer of past
#' data.
#' The above recursive formulas are equivalent to a convolution with
#' exponentially decaying weights, but they're much faster to calculate.
#' Using exponentially decaying weights is more natural than using a sliding
#' look-back interval, because it gradually "forgets" about the past data.
#'
#' The value of the decay factor \eqn{\lambda} must be in the range between
#' \code{0} and \code{1}.
#' If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#' values have a greater weight, so the trailing values have a greater
#' dependence on past data. This is equivalent to a long look-back
#' interval.
#' If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#' past values have a smaller weight, so the trailing values have a weaker
#' dependence on past data. This is equivalent to a short look-back
#' interval.
#'
#' The function \code{run_reg()} returns multiple columns of data, with the
#' same number of rows as the predictor matrix \code{predm}. If the predictor
#' matrix \code{predm} has \code{n} columns then \code{run_reg()} returns a
#' matrix with \code{n+2} columns.
#' The first \code{n} columns contain the regression coefficients (with the
#' first column equal to the alpha value \eqn{\alpha}).
#' The last \code{2} columns are the regression residuals and the forecast
#' errors.
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("XLF", "VTI", "IEF")])
#' # Response equals XLF returns
#' respv <- retp[, 1]
#' # Predictor matrix equals VTI and IEF returns
#' predm <- retp[, -1]
#' # Add unit intercept column to the predictor matrix
#' predm <- cbind(rep(1, NROW(predm)), predm)
#' # Calculate the trailing regressions
#' lambdaf <- 0.9
#' # Create a list of regression parameters
#' controlv <- HighFreq::param_reg(residscale="standardize")
#' regs <- HighFreq::run_reg(respv=respv, predm=predm, lambda=lambda, controlv=controlv)
#' # Plot the trailing residuals
#' datav <- cbind(cumsum(respv), regs[, NCOL(regs)])
#' colnames(datav) <- c("XLF", "residuals")
#' colnamev <- colnames(datav)
#' dygraphs::dygraph(datav["2008/2009"], main="Residuals of XLF Versus VTI and IEF") %>%
#' dyAxis("y", label=colnamev[1], independentTicks=TRUE) %>%
#' dyAxis("y2", label=colnamev[2], independentTicks=TRUE) %>%
#' dySeries(axis="y", strokeWidth=2, col="blue") %>%
#' dySeries(axis="y2", strokeWidth=2, col="red") %>%
#' dyLegend(show="always", width=300)
#'
#' # Calculate the trailing regressions using R code
#' lambda1 <- (1-lambdaf)
#' respv <- zoo::coredata(respv)
#' predm <- zoo::coredata(predm)
#' nrows <- NROW(predm)
#' ncols <- NCOL(predm)
#' covrespred <- respv[1, ]*predm[1, ]
#' covpred <- outer(predm[1, ], predm[1, ])
#' betas <- matrix(numeric(nrows*ncols), nc=ncols)
#' betas[1, ] <- covrespred %*% MASS::ginv(covpred)
#' resids <- numeric(nrows)
#' residm <- 0
#' residv <- 0
#' for (it in 2:nrows) {
#' covrespred <- lambdaf*covrespred + lambda1*respv[it, ]*predm[it, ]
#' covpred <- lambdaf*covpred + lambda1*outer(predm[it, ], predm[it, ])
#' betas[it, ] <- covrespred %*% MASS::ginv(covpred)
#' resids[it] <- respv[it, ] - (betas[it, ] %*% predm[it, ])
#' residm <- lambdaf*residm + lambda1*resids[it]
#' residv <- lambdaf*residv + lambda1*(resids[it] - residm)^2
#' resids[it] <- (resids[it] - residm)/sqrt(residv)
#' } # end for
#' # Compare values, excluding warmup period
#' all.equal(regs[-(1:1e3), ], cbind(betas, resids)[-(1:1e3), ], check.attributes=FALSE)
#'
#' }
#'
#' @export
run_reg <- function(respv, predm, lambda, controlv) {
.Call(`_HighFreq_run_reg`, respv, predm, lambda, controlv)
}
#' Calculate the mean (location) of the columns of a \emph{time series} or a
#' \emph{matrix} using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{method} A \emph{character string} specifying the type of the
#' mean (location) model (the default is \code{method = "moment"} - see
#' Details).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#' returns (the default is \code{confl = 0.75}).
#'
#' @return A single-row matrix with the mean (location) of the columns of
#' \code{tseries}.
#'
#' @details
#' The function \code{calc_mean()} calculates the mean (location) values of
#' the columns of the \emph{time series} \code{tseries} using \code{C++}
#' \code{RcppArmadillo} code.
#'
#' If \code{method = "moment"} (the default) then \code{calc_mean()}
#' calculates the location as the mean - the first moment of the data.
#'
#' If \code{method = "quantile"} then it calculates the location
#' \eqn{\bar{r}} as the average of the quantiles as follows:
#' \deqn{
#' \bar{r} = \frac{q_{\alpha} + q_{1-\alpha}}{2}
#' }
#' Where \eqn{\alpha} is the confidence level for calculating the quantiles
#' (argument \code{confl}).
#'
#' If \code{method = "nonparametric"} then it calculates the location as the
#' median.
#'
#' The code examples below compare the function \code{calc_mean()} with the
#' mean (location) calculated using \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("XLP", "VTI")])
#' # Calculate the column means in RcppArmadillo
#' HighFreq::calc_mean(retp)
#' # Calculate the column means in R
#' sapply(retp, mean)
#' # Compare the values
#' all.equal(drop(HighFreq::calc_mean(retp)),
#' sapply(retp, mean), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_mean(retp),
#' Rcode=sapply(retp, mean),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' # Calculate the quantile mean (location)
#' HighFreq::calc_mean(retp, method="quantile", confl=0.9)
#' # Calculate the quantile mean (location) in R
#' colSums(sapply(retp, quantile, c(0.9, 0.1), type=5))
#' # Compare the values
#' all.equal(drop(HighFreq::calc_mean(retp, method="quantile", confl=0.9)),
#' colSums(sapply(retp, quantile, c(0.9, 0.1), type=5)),
#' check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_mean(retp, method="quantile", confl=0.9),
#' Rcode=colSums(sapply(retp, quantile, c(0.9, 0.1), type=5)),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' # Calculate the column medians in RcppArmadillo
#' HighFreq::calc_mean(retp, method="nonparametric")
#' # Calculate the column medians in R
#' sapply(retp, median)
#' # Compare the values
#' all.equal(drop(HighFreq::calc_mean(retp, method="nonparametric")),
#' sapply(retp, median), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_mean(retp, method="nonparametric"),
#' Rcode=sapply(retp, median),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
calc_mean <- function(tseries, method = "moment", confl = 0.75) {
.Call(`_HighFreq_calc_mean`, tseries, method, confl)
}
#' Calculate the variance of a single-column \emph{time series} or a
#' \emph{vector} using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A single-column \emph{time series} or a \emph{vector}.
#'
#' @return A \emph{numeric} value equal to the variance of the \emph{vector}.
#'
#' @details
#' The function \code{calc_varvec()} calculates the variance of a
#' \emph{vector} using \code{RcppArmadillo} \code{C++} code, so it's
#' significantly faster than the \code{R} function \code{var()}.
#'
#' @examples
#' \dontrun{
#' # Create a vector of random returns
#' retp <- rnorm(1e6)
#' # Compare calc_varvec() with standard var()
#' all.equal(HighFreq::calc_varvec(retp), var(retp))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_varvec(retp),
#' Rcode=var(retp),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
calc_varvec <- function(tseries) {
.Call(`_HighFreq_calc_varvec`, tseries)
}
#' Calculate the dispersion (variance) of the columns of a \emph{time series}
#' or a \emph{matrix} using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{method} A \emph{character string} specifying the type of the
#' dispersion model (the default is \code{method = "moment"} - see Details).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#' returns (the default is \code{confl = 0.75}).
#'
#' @return A row vector equal to the dispersion of the columns of the matrix
#' \code{tseries}.
#'
#' @details
#' The function \code{calc_var()} calculates the dispersion of the
#' columns of a \emph{time series} or a \emph{matrix} of data using
#' \code{RcppArmadillo} \code{C++} code.
#'
#' The dispersion is a measure of the variability of the data. Examples of
#' dispersion are the variance and the Median Absolute Deviation (\emph{MAD}).
#'
#' If \code{method = "moment"} (the default) then \code{calc_var()}
#' calculates the dispersion as the second moment of the data (the variance).
#' Then \code{calc_var()} performs the same calculation as the function
#' \code{colVars()} from package
#' \href{https://cran.r-project.org/web/packages/matrixStats/index.html}{matrixStats},
#' but it's much faster because it uses \code{RcppArmadillo} \code{C++} code.
#'
#' If \code{method = "quantile"} then it calculates the dispersion as the
#' difference between the quantiles as follows:
#' \deqn{
#' \sigma = q_{\alpha} - q_{1-\alpha}
#' }
#' Where \eqn{\alpha} is the confidence level for calculating the quantiles.
#'
#' If \code{method = "nonparametric"} then it calculates the dispersion as the
#' Median Absolute Deviation (\emph{MAD}):
#' \deqn{
#' MAD = median(abs(x - median(x)))
#' }
#' It also multiplies the \emph{MAD} by a factor of \code{1.4826}, to make it
#' comparable to the standard deviation.
#'
#' If \code{method = "nonparametric"} then \code{calc_var()} performs the
#' same calculation as the function \code{stats::mad()}, but it's much faster
#' because it uses \code{RcppArmadillo} \code{C++} code.
#'
#' If the number of rows of \code{tseries} is less than \code{3} then it
#' returns zeros.
#'
#' @examples
#' \dontrun{
#' # Calculate VTI and XLF returns
#' retp <- na.omit(rutils::etfenv$returns[, c("VTI", "XLF")])
#' # Compare HighFreq::calc_var() with standard var()
#' all.equal(drop(HighFreq::calc_var(retp)),
#' apply(retp, 2, var), check.attributes=FALSE)
#' # Compare HighFreq::calc_var() with matrixStats
#' all.equal(drop(HighFreq::calc_var(retp)),
#' matrixStats::colVars(retp), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with matrixStats and with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_var(retp),
#' matrixStats=matrixStats::colVars(retp),
#' Rcode=apply(retp, 2, var),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' # Compare HighFreq::calc_var() with stats::mad()
#' all.equal(drop(HighFreq::calc_var(retp, method="nonparametric")),
#' sapply(retp, mad), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with stats::mad()
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_var(retp, method="nonparametric"),
#' Rcode=sapply(retp, mad),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
calc_var <- function(tseries, method = "moment", confl = 0.75) {
.Call(`_HighFreq_calc_var`, tseries, method, confl)
}
#' Calculate the covariance matrix of the columns of a \emph{time series}
#' using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{method} A \emph{character string} specifying the type of the
#' covariance model (the default is \code{method = "moment"} - see Details).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#' returns (the default is \code{confl = 0.75}).
#'
#' @return A square matrix with the covariance coefficients of the columns of
#' the \emph{time series} \code{tseries}.
#'
#' @details
#' The function \code{calc_covar()} calculates the covariance matrix of the
#' columns of a \emph{time series} or a \emph{matrix} of data using
#' \code{RcppArmadillo} \code{C++} code.
#' The covariance is a measure of the codependency of the data.
#'
#' If \code{method = "moment"} (the default) then \code{calc_covar()}
#' calculates the covariance as the second co-moment:
#' \deqn{
#' \sigma_{xy} = \frac{1}{n-1} \sum_{i=1}^n (x_i - \bar{x}) (y_i - \bar{y})
#' }
#' Then \code{calc_covar()} performs the same calculation as the \code{R}
#' function \code{stats::cov()}.
#'
#' If \code{method = "quantile"} then it calculates the covariance as the
#' difference between the quantiles as follows:
#' \deqn{
#' \mu = q_{\alpha} - q_{1-\alpha}
#' }
#' Where \eqn{\alpha} is the confidence level for calculating the quantiles.
#'
#' If \code{method = "nonparametric"} then it calculates the covariance as the
#' Median Absolute Deviation (\emph{MAD}):
#' \deqn{
#' MAD = median(abs(x - median(x)))
#' }
#' It also multiplies the \emph{MAD} by a factor of \code{1.4826}, to make it
#' comparable to the standard deviation.
#'
#' If \code{method = "nonparametric"} then \code{calc_covar()} performs the
#' same calculation as the function \code{stats::mad()}, but it's much faster
#' because it uses \code{RcppArmadillo} \code{C++} code.
#'
#' If the number of rows of \code{tseries} is less than \code{3} then it
#' returns zeros.
#'
#' @examples
#' \dontrun{
#' # Calculate VTI and XLF returns
#' retp <- na.omit(rutils::etfenv$returns[, c("VTI", "XLF")])
#' # Compare HighFreq::calc_covar() with standard var()
#' all.equal(drop(HighFreq::calc_covar(retp)),
#' cov(retp), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with matrixStats and with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_covar(retp),
#' Rcode=cov(retp),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' # Compare HighFreq::calc_covar() with stats::mad()
#' all.equal(drop(HighFreq::calc_covar(retp, method="nonparametric")),
#' sapply(retp, mad), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with stats::mad()
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_covar(retp, method="nonparametric"),
#' Rcode=sapply(retp, mad),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
calc_covar <- function(tseries, method = "moment", confl = 0.75) {
.Call(`_HighFreq_calc_covar`, tseries, method, confl)
}
#' Calculate the variance of returns aggregated over the end points.
#'
#' @param \code{pricev} A \emph{time series} or a \emph{matrix} of prices.
#'
#' @param \code{step} The number of time periods in each interval between
#' neighboring end points (the default is \code{step = 1}).
#'
#' @return The variance of aggregated returns.
#'
#' @details
#' The function \code{calc_var_ag()} calculates the variance of returns
#' aggregated over the end points.
#'
#' It first calculates the end points spaced apart by the number of periods
#' equal to the argument \code{step}. Then it calculates the aggregated
#' returns by differencing the prices \code{pricev} calculated at the end
#' points. Finally it calculates the variance of the returns.
#'
#' The choice of the first end point is arbitrary, so \code{calc_var_ag()}
#' calculates the different end points for all the possible starting points.
#' It then calculates the variance values for all the different end points
#' and averages them.
#'
#' The aggregated volatility \eqn{\sigma_t} increases with the length of the
#' aggregation interval \eqn{\Delta t}.
#' The aggregated volatility increases as the length of the aggregation
#' interval \eqn{\Delta t} raised to the power of the \emph{Hurst exponent}
#' \eqn{H}:
#' \deqn{
#' \sigma_t = \sigma {\Delta t}^H
#' }
#' Where \eqn{\sigma} is the daily return volatility.
#'
#' The function \code{calc_var_ag()} can therefore be used to calculate the
#' \emph{Hurst exponent} from the variance ratio.
#'
#' @examples
#' \dontrun{
#' # Calculate the prices
#' closep <- na.omit(rutils::etfenv$prices[, c("XLP", "VTI")])
#' closep <- log(closep)
#' # Calculate the variance of daily returns
#' calc_var_ag(prices, step=1)
#' # Calculate the variance using R
#' sapply(rutils::diffit(closep), var)
#' # Calculate the variance of returns aggregated over 21 days
#' calc_var_ag(prices, step=21)
#' # The variance over 21 days is approximately 21 times the daily variance
#' 21*calc_var_ag(prices, step=1)
#' }
#'
#' @export
calc_var_ag <- function(pricev, step = 1L) {
.Call(`_HighFreq_calc_var_ag`, pricev, step)
}
#' Calculate the variance of returns from \emph{OHLC} prices using different
#' price range estimators.
#'
#' @param \code{ohlc} A \emph{time series} or a \emph{matrix} of \emph{OHLC}
#' prices.
#'
#' @param \code{method} A \emph{character string} representing the price range
#' estimator for calculating the variance. The estimators include:
#' \itemize{
#' \item "close" close-to-close estimator,
#' \item "rogers_satchell" Rogers-Satchell estimator,
#' \item "garman_klass" Garman-Klass estimator,
#' \item "garman_klass_yz" Garman-Klass with account for close-to-open price jumps,
#' \item "yang_zhang" Yang-Zhang estimator,
#' }
#' (The default is the \code{method = "yang_zhang"}.)
#'
#' @param \code{closel} A \emph{vector} with the lagged \emph{close} prices
#' of the \emph{OHLC time series}. This is an optional argument. (The
#' default is \code{closel = 0}).
#'
#' @param \code{scale} \emph{Boolean} argument: Should the returns be divided
#' by the time index, the number of seconds in each period? (The default is
#' \code{scale = TRUE}).
#'
#' @param \code{index} A \emph{vector} with the time index of the \emph{time
#' series}. This is an optional argument (the default is \code{index = 0}).
#'
#' @return A single \emph{numeric} value equal to the variance of the
#' \emph{OHLC time series}.
#'
#' @details
#' The function \code{calc_var_ohlc()} calculates the variance from all the
#' different intra-day and day-over-day returns (defined as the differences
#' of \emph{OHLC} prices), using several different variance estimation
#' methods.
#'
#' The function \code{calc_var_ohlc()} does not calculate the logarithm of
#' the prices.
#' So if the argument \code{ohlc} contains dollar prices then
#' \code{calc_var_ohlc()} calculates the dollar variance.
#' If the argument \code{ohlc} contains the log prices then
#' \code{calc_var_ohlc()} calculates the percentage variance.
#'
#' The default \code{method} is \emph{"yang_zhang"}, which theoretically
#' has the lowest standard error among unbiased estimators.
#' The methods \emph{"close"}, \emph{"garman_klass_yz"}, and
#' \emph{"yang_zhang"} do account for \emph{close-to-open} price jumps, while
#' the methods \emph{"garman_klass"} and \emph{"rogers_satchell"} do not
#' account for \emph{close-to-open} price jumps.
#'
#' If \code{scale} is \code{TRUE} (the default), then the returns are
#' divided by the differences of the time index (which scales the variance to
#' the units of variance per second squared). This is useful when calculating
#' the variance from minutes bar data, because dividing returns by the
#' number of seconds decreases the effect of overnight price jumps. If the
#' time index is in days, then the variance is equal to the variance per day
#' squared.
#'
#' If the number of rows of \code{ohlc} is less than \code{3} then it
#' returns zero.
#'
#' The optional argument \code{index} is the time index of the \emph{time
#' series} \code{ohlc}. If the time index is in seconds, then the
#' differences of the index are equal to the number of seconds in each time
#' period. If the time index is in days, then the differences are equal to
#' the number of days in each time period.
#'
#' The optional argument \code{closel} are the lagged \emph{close} prices
#' of the \emph{OHLC time series}. Passing in the lagged \emph{close} prices
#' speeds up the calculation, so it's useful for rolling calculations.
#'
#' The function \code{calc_var_ohlc()} is implemented in \code{RcppArmadillo}
#' \code{C++} code, and it's over \code{10} times faster than
#' \code{calc_var_ohlc_r()}, which is implemented in \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Extract the log OHLC prices of SPY
#' ohlc <- log(HighFreq::SPY)
#' # Extract the time index of SPY prices
#' indeks <- c(1, diff(xts::.index(ohlc)))
#' # Calculate the variance of SPY returns, with scaling of the returns
#' HighFreq::calc_var_ohlc(ohlc,
#' method="yang_zhang", scale=TRUE, index=indeks)
#' # Calculate variance without accounting for overnight jumps
#' HighFreq::calc_var_ohlc(ohlc,
#' method="rogers_satchell", scale=TRUE, index=indeks)
#' # Calculate the variance without scaling the returns
#' HighFreq::calc_var_ohlc(ohlc, scale=FALSE)
#' # Calculate the variance by passing in the lagged close prices
#' closel <- HighFreq::lagit(ohlc[, 4])
#' all.equal(HighFreq::calc_var_ohlc(ohlc),
#' HighFreq::calc_var_ohlc(ohlc, closel=closel))
#' # Compare with HighFreq::calc_var_ohlc_r()
#' all.equal(HighFreq::calc_var_ohlc(ohlc, index=indeks),
#' HighFreq::calc_var_ohlc_r(ohlc))
#' # Compare the speed of Rcpp with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_var_ohlc(ohlc),
#' Rcode=HighFreq::calc_var_ohlc_r(ohlc),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#' @export
calc_var_ohlc <- function(ohlc, method = "yang_zhang", closel = 0L, scale = TRUE, index = 0L) {
.Call(`_HighFreq_calc_var_ohlc`, ohlc, method, closel, scale, index)
}
#' Calculate the variance of aggregated \emph{OHLC} prices using different
#' price range estimators.
#'
#' @param \code{ohlc} A \emph{time series} or a \emph{matrix} of \emph{OHLC}
#' prices.
#'
#' @param \code{step} The number of time periods in each interval between
#' neighboring end points.
#'
#' @param \code{method} A \emph{character string} representing the price range
#' estimator for calculating the variance. The estimators include:
#' \itemize{
#' \item "close" close-to-close estimator,
#' \item "rogers_satchell" Rogers-Satchell estimator,
#' \item "garman_klass" Garman-Klass estimator,
#' \item "garman_klass_yz" Garman-Klass with account for close-to-open price jumps,
#' \item "yang_zhang" Yang-Zhang estimator,
#' }
#' (The default is the \code{method = "yang_zhang"}.)
#'
#' @param \code{closel} A \emph{vector} with the lagged \emph{close} prices
#' of the \emph{OHLC time series}. This is an optional argument. (The
#' default is \code{closel = 0}).
#'
#' @param \code{scale} \emph{Boolean} argument: Should the returns be divided
#' by the time index, the number of seconds in each period? (The default is
#' \code{scale = TRUE}).
#'
#' @param \code{index} A \emph{vector} with the time index of the \emph{time
#' series}. This is an optional argument (the default is \code{index = 0}).
#'
#' @return The variance of aggregated \emph{OHLC} prices.
#'
#' @details
#' The function \code{calc_var_ohlc_ag()} calculates the variance of
#' \emph{OHLC} prices aggregated over the end points.
#'
#' It first calculates the end points spaced apart by the number of periods
#' equal to the argument \code{step}. Then it aggregates the \emph{OHLC}
#' prices to the end points. Finally it calculates the variance of the
#' aggregated \emph{OHLC} prices.
#'
#' The choice of the first end point is arbitrary, so \code{calc_var_ohlc_ag()}
#' calculates the different end points for all the possible starting points.
#' It then calculates the variance values for all the different end points
#' and averages them.
#'
#' The aggregated volatility \eqn{\sigma_t} increases with the length of the
#' aggregation interval \eqn{\Delta t}.
#' The aggregated volatility increases as the length of the aggregation
#' interval \eqn{\Delta t} raised to the power of the \emph{Hurst exponent}
#' \eqn{H}:
#' \deqn{
#' \sigma_t = \sigma {\Delta t}^H
#' }
#' Where \eqn{\sigma} is the daily return volatility.
#'
#' The function \code{calc_var_ohlc_ag()} can therefore be used to calculate
#' the \emph{Hurst exponent} from the variance ratio.
#'
#' @examples
#' \dontrun{
#' # Calculate the log ohlc prices
#' ohlc <- log(rutils::etfenv$VTI)
#' # Calculate the daily variance of percentage returns
#' calc_var_ohlc_ag(ohlc, step=1)
#' # Calculate the variance of returns aggregated over 21 days
#' calc_var_ohlc_ag(ohlc, step=21)
#' # The variance over 21 days is approximately 21 times the daily variance
#' 21*calc_var_ohlc_ag(ohlc, step=1)
#' }
#'
#' @export
calc_var_ohlc_ag <- function(ohlc, step, method = "yang_zhang", closel = 0L, scale = TRUE, index = 0L) {
.Call(`_HighFreq_calc_var_ohlc_ag`, ohlc, step, method, closel, scale, index)
}
#' Calculate the skewness of the columns of a \emph{time series} or a
#' \emph{matrix} using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{method} A \emph{character string} specifying the type of the
#' skewness model (the default is \code{method = "moment"} - see Details).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#' returns (the default is \code{confl = 0.75}).
#'
#' @return A single-row matrix with the skewness of the columns of
#' \code{tseries}.
#'
#' @details
#' The function \code{calc_skew()} calculates the skewness of the columns of
#' a \emph{time series} or a \emph{matrix} of data using \code{C++}
#' \code{RcppArmadillo} code.
#'
#' If \code{method = "moment"} (the default) then \code{calc_skew()}
#' calculates the skewness as the third moment of the data.
#'
#' If \code{method = "quantile"} then it calculates the skewness
#' \eqn{\varsigma} from the differences between the quantiles of the data as
#' follows:
#' \deqn{
#' \varsigma = \frac{q_{\alpha} + q_{1-\alpha} - 2 q_{0.5}}{q_{\alpha} - q_{1-\alpha}}
#' }
#' Where \eqn{\alpha} is the confidence level for calculating the quantiles.
#'
#' If \code{method = "nonparametric"} then it calculates the skewness as the
#' difference between the mean of the data minus its median, divided by the
#' standard deviation.
#'
#' If the number of rows of \code{tseries} is less than \code{3} then it
#' returns zeros.
#'
#' The code examples below compare the function \code{calc_skew()} with the
#' skewness calculated using \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Define a single-column time series of returns
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Calculate the moment skewness
#' HighFreq::calc_skew(retp)
#' # Calculate the moment skewness in R
#' calc_skewr <- function(x) {
#' x <- (x-mean(x))
#' sum(x^3)/var(x)^1.5/NROW(x)
#' } # end calc_skewr
#' all.equal(HighFreq::calc_skew(retp),
#' calc_skewr(retp), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_skew(retp),
#' Rcode=calc_skewr(retp),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' # Calculate the quantile skewness
#' HighFreq::calc_skew(retp, method="quantile", confl=0.9)
#' # Calculate the quantile skewness in R
#' calc_skewq <- function(x, a = 0.75) {
#' quantiles <- quantile(x, c(1-a, 0.5, a), type=5)
#' (quantiles[3] + quantiles[1] - 2*quantiles[2])/(quantiles[3] - quantiles[1])
#' } # end calc_skewq
#' all.equal(drop(HighFreq::calc_skew(retp, method="quantile", confl=0.9)),
#' calc_skewq(retp, a=0.9), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_skew(retp, method="quantile"),
#' Rcode=calc_skewq(retp),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' # Calculate the nonparametric skewness
#' HighFreq::calc_skew(retp, method="nonparametric")
#' # Compare HighFreq::calc_skew() with R nonparametric skewness
#' all.equal(drop(HighFreq::calc_skew(retp, method="nonparametric")),
#' (mean(retp)-median(retp))/sd(retp),
#' check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_skew(retp, method="nonparametric"),
#' Rcode=(mean(retp)-median(retp))/sd(retp),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
calc_skew <- function(tseries, method = "moment", confl = 0.75) {
.Call(`_HighFreq_calc_skew`, tseries, method, confl)
}
#' Calculate the kurtosis of the columns of a \emph{time series} or a
#' \emph{matrix} using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{method} A \emph{character string} specifying the type of the
#' kurtosis model (the default is \code{method = "moment"} - see Details).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#' returns (the default is \code{confl = 0.75}).
#'
#' @return A single-row matrix with the kurtosis of the columns of
#' \code{tseries}.
#'
#' @details
#' The function \code{calc_kurtosis()} calculates the kurtosis of the columns
#' of the \emph{matrix} \code{tseries} using \code{RcppArmadillo} \code{C++}
#' code.
#'
#' If \code{method = "moment"} (the default) then \code{calc_kurtosis()}
#' calculates the fourth moment of the data.
#' But it doesn't center the columns of \code{tseries} because that requires
#' copying the matrix \code{tseries} in memory, so it's time-consuming.
#'
#' If \code{method = "quantile"} then it calculates the skewness
#' \eqn{\kappa} from the differences between the quantiles of the data as
#' follows:
#' \deqn{
#' \kappa = \frac{q_{\alpha} - q_{1-\alpha}}{q_{0.75} - q_{0.25}}
#' }
#' Where \eqn{\alpha} is the confidence level for calculating the quantiles.
#'
#' If \code{method = "nonparametric"} then it calculates the kurtosis as the
#' difference between the mean of the data minus its median, divided by the
#' standard deviation.
#'
#' If the number of rows of \code{tseries} is less than \code{3} then it
#' returns zeros.
#'
#' The code examples below compare the function \code{calc_kurtosis()} with the
#' kurtosis calculated using \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Define a single-column time series of returns
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Calculate the moment kurtosis
#' HighFreq::calc_kurtosis(retp)
#' # Calculate the moment kurtosis in R
#' calc_kurtr <- function(x) {
#' x <- (x-mean(x))
#' sum(x^4)/var(x)^2/NROW(x)
#' } # end calc_kurtr
#' all.equal(HighFreq::calc_kurtosis(retp),
#' calc_kurtr(retp), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_kurtosis(retp),
#' Rcode=calc_kurtr(retp),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' # Calculate the quantile kurtosis
#' HighFreq::calc_kurtosis(retp, method="quantile", confl=0.9)
#' # Calculate the quantile kurtosis in R
#' calc_kurtq <- function(x, a=0.9) {
#' quantiles <- quantile(x, c(1-a, 0.25, 0.75, a), type=5)
#' (quantiles[4] - quantiles[1])/(quantiles[3] - quantiles[2])
#' } # end calc_kurtq
#' all.equal(drop(HighFreq::calc_kurtosis(retp, method="quantile", confl=0.9)),
#' calc_kurtq(retp, a=0.9), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_kurtosis(retp, method="quantile"),
#' Rcode=calc_kurtq(retp),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' # Calculate the nonparametric kurtosis
#' HighFreq::calc_kurtosis(retp, method="nonparametric")
#' # Compare HighFreq::calc_kurtosis() with R nonparametric kurtosis
#' all.equal(drop(HighFreq::calc_kurtosis(retp, method="nonparametric")),
#' (mean(retp)-median(retp))/sd(retp),
#' check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_kurtosis(retp, method="nonparametric"),
#' Rcode=(mean(retp)-median(retp))/sd(retp),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
calc_kurtosis <- function(tseries, method = "moment", confl = 0.75) {
.Call(`_HighFreq_calc_kurtosis`, tseries, method, confl)
}
#' Calculate the Hurst exponent from the volatility ratio of aggregated returns.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of log prices.
#'
#' @param \code{aggv} A \emph{vector} of aggregation intervals.
#'
#' @return The Hurst exponent calculated from the volatility ratio of
#' aggregated returns. If \code{tseries} contains multiple columns, then the
#' function \code{calc_hurst()} returns a single-row matrix of Hurst
#' exponents.
#'
#' @details
#' The function \code{calc_hurst()} calculates the Hurst exponent from the
#' ratios of the volatilities of aggregated returns.
#'
#' An aggregation interval is equal to the number of time periods between the
#' neighboring aggregation end points.
#'
#' The aggregated volatility \eqn{\sigma_t} increases with the length of the
#' aggregation interval \eqn{\Delta t}.
#' The aggregated volatility increases as the length of the aggregation
#' interval \eqn{\Delta t} raised to the power of the \emph{Hurst exponent}
#' \eqn{H}:
#' \deqn{
#' \sigma_t = \sigma {\Delta t}^H
#' }
#' Where \eqn{\sigma} is the daily return volatility.
#'
#' For a single aggregation interval \eqn{\Delta t}, the \emph{Hurst
#' exponent} \eqn{H} is equal to the logarithm of the ratio of the
#' volatilities divided by the logarithm of the aggregation interval
#' \eqn{\Delta t}:
#' \deqn{
#' H = \frac{\log{\sigma_t} - \log{\sigma}}{\log{\Delta t}}
#' }
#'
#' For a \emph{vector} of aggregation intervals \eqn{\Delta t_i}, the
#' \emph{Hurst exponent} \eqn{H} is equal to the regression slope between the
#' logarithms of the aggregated volatilities \eqn{\sigma_i} versus the
#' logarithms of the aggregation intervals \eqn{\Delta t_i}:
#' \deqn{
#' H = \frac{\code{cov}(\log{\sigma_i}, \log{\Delta t_i})}{\code{var}(\log{\Delta t_i})}
#' }
#'
#' The function \code{calc_hurst()} calls the function \code{calc_var_ag()}
#' to calculate the variance of aggregated returns \eqn{\sigma^2_t}.
#'
#' @examples
#' \dontrun{
#' # Calculate the log prices
#' closep <- na.omit(rutils::etfenv$prices[, c("XLP", "VTI")])
#' closep <- log(closep)
#' # Calculate the Hurst exponents for a 21 day aggregation interval
#' HighFreq::calc_hurst(prices, aggv=21)
#' # Calculate the Hurst exponents for a vector of aggregation intervals
#' aggv <- seq.int(from=3, to=35, length.out=9)^2
#' HighFreq::calc_hurst(prices, aggv=aggv)
#' }
#'
#' @export
calc_hurst <- function(tseries, aggv) {
.Call(`_HighFreq_calc_hurst`, tseries, aggv)
}
#' Calculate the Hurst exponent from the volatility ratio of aggregated
#' \emph{OHLC} prices.
#'
#' @param \code{ohlc} A \emph{time series} or a \emph{matrix} of \emph{OHLC}
#' prices.
#'
#' @param \code{step} The number of time periods in each interval between
#' neighboring end points.
#'
#' @param \code{method} A \emph{character string} representing the price range
#' estimator for calculating the variance. The estimators include:
#' \itemize{
#' \item "close" close-to-close estimator,
#' \item "rogers_satchell" Rogers-Satchell estimator,
#' \item "garman_klass" Garman-Klass estimator,
#' \item "garman_klass_yz" Garman-Klass with account for close-to-open price jumps,
#' \item "yang_zhang" Yang-Zhang estimator,
#' }
#' (The default is the \code{method = "yang_zhang"}.)
#'
#' @param \code{closel} A \emph{vector} with the lagged \emph{close} prices
#' of the \emph{OHLC time series}. This is an optional argument. (The
#' default is \code{closel = 0}).
#'
#' @param \code{scale} \emph{Boolean} argument: Should the returns be divided
#' by the time index, the number of seconds in each period? (The default is
#' \code{scale = TRUE}).
#'
#' @param \code{index} A \emph{vector} with the time index of the \emph{time
#' series}. This is an optional argument (the default is \code{index = 0}).
#'
#' @return The Hurst exponent calculated from the volatility ratio of
#' aggregated \emph{OHLC} prices.
#'
#' @details
#' The function \code{calc_hurst_ohlc()} calculates the Hurst exponent from the
#' ratios of the variances of aggregated \emph{OHLC} prices.
#'
#' The aggregated volatility \eqn{\sigma_t} increases with the length of the
#' aggregation interval \eqn{\Delta t}.
#' The aggregated volatility increases as the length of the aggregation
#' interval \eqn{\Delta t} raised to the power of the \emph{Hurst exponent}
#' \eqn{H}:
#' \deqn{
#' \sigma_t = \sigma {\Delta t}^H
#' }
#' Where \eqn{\sigma} is the daily return volatility.
#'
#' The \emph{Hurst exponent} \eqn{H} is equal to the logarithm of the ratio
#' of the volatilities divided by the logarithm of the time interval
#' \eqn{\Delta t}:
#' \deqn{
#' H = \frac{\log{\sigma_t} - \log{\sigma}}{\log{\Delta t}}
#' }
#'
#' The function \code{calc_hurst_ohlc()} calls the function
#' \code{calc_var_ohlc_ag()} to calculate the aggregated variance
#' \eqn{\sigma^2_t}.
#'
#' @examples
#' \dontrun{
#' # Calculate the log ohlc prices
#' ohlc <- log(rutils::etfenv$VTI)
#' # Calculate the Hurst exponent from 21 day aggregations
#' calc_hurst_ohlc(ohlc, step=21)
#' }
#'
#' @export
calc_hurst_ohlc <- function(ohlc, step, method = "yang_zhang", closel = 0L, scale = TRUE, index = 0L) {
.Call(`_HighFreq_calc_hurst_ohlc`, ohlc, step, method, closel, scale, index)
}
#' Perform multivariate linear regression using least squares and return a
#' named list of regression coefficients, their t-values, and p-values.
#'
#' @param \code{respv} A single-column \emph{time series} or a \emph{vector}
#' of response data.
#'
#' @param \code{predm} A \emph{time series} or a \emph{matrix} of predictor
#' data.
#'
#' @return A named list with three elements: a \emph{matrix} of coefficients
#' (named \emph{"coefficients"}), the \emph{z-score} of the last residual
#' (named \emph{"zscore"}), and a \emph{vector} with the R-squared and
#' F-statistic (named \emph{"stats"}). The numeric \emph{matrix} of
#' coefficients named \emph{"coefficients"} contains the alpha and beta
#' coefficients, and their \emph{t-values} and \emph{p-values}.
#'
#' @details
#' The function \code{calc_lm()} performs the same calculations as the
#' function \code{lm()} from package \emph{stats}.
#' It uses \code{RcppArmadillo} \code{C++} code so it's several times faster
#' than \code{lm()}. The code was inspired by this article (but it's not
#' identical to it):
#' http://gallery.rcpp.org/articles/fast-linear-model-with-armadillo/
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("XLF", "VTI", "IEF")])
#' # Response equals XLF returns
#' respv <- retp[, 1]
#' # Predictor matrix equals VTI and IEF returns
#' predm <- retp[, -1]
#' # Perform multivariate regression using lm()
#' regmod <- lm(respv ~ predm)
#' regsum <- summary(regmod)
#' # Add unit intercept column to the predictor matrix
#' predm <- cbind(rep(1, NROW(predm)), predm)
#' # Perform multivariate regression using calc_lm()
#' regarma <- HighFreq::calc_lm(respv=respv, predm=predm)
#' # Compare the outputs of both functions
#' all.equal(regarma$coefficients[, "coeff"], unname(coef(regmod)))
#' all.equal(unname(regarma$coefficients), unname(regsum$coefficients))
#' all.equal(unname(regarma$stats), c(regsum$r.squared, unname(regsum$fstatistic[1])))
#' # Compare the speed of RcppArmadillo with R code
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_lm(respv=respv, predm=predm),
#' Rcode=lm(respv ~ predm),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
calc_lm <- function(respv, predm) {
.Call(`_HighFreq_calc_lm`, respv, predm)
}
#' Perform multivariate regression using different methods, and return a vector
#' of regression coefficients, their t-values, and the last residual z-score.
#'
#' @param \code{respv} A single-column \emph{time series} or a \emph{vector}
#' of response data.
#'
#' @param \code{predm} A \emph{time series} or a \emph{matrix} of predictor
#' data.
#'
#' @param \code{controlv} A \emph{list} of model parameters (see Details).
#'
#' @return A single-row matrix with the regression coefficients, their
#' t-values, and the last residual z-score.
#'
#' @details
#' The function \code{calc_reg()} performs multivariate regression using
#' different methods, and returns a vector of regression coefficients, their
#' t-values, and the last residual z-score.
#'
#' The function \code{calc_reg()} accepts a list of regression model
#' parameters through the argument \code{controlv}.
#' The list of model parameters can be created using the function
#' \code{param_reg()}. Below is a description of the model parameters.
#'
#' If \code{regmod = "least_squares"} (the default) then it performs the
#' standard least squares regression, the same as the function
#' \code{calc_lm()}, and the function \code{lm()} from the \code{R} package
#' \emph{stats}.
#' But it uses \code{RcppArmadillo} \code{C++} code so it's several times
#' faster than \code{lm()}.
#'
#' If \code{regmod = "regular"} then it performs shrinkage regression. It
#' calculates the \emph{reduced inverse} of the predictor matrix from its
#' singular value decomposition. It performs regularization by selecting
#' only the largest \emph{singular values} equal in number to \code{dimax}.
#'
#' If \code{regmod = "quantile"} then it performs quantile regression (not
#' implemented yet).
#'
#' The length of the return vector depends on the number of columns of the
#' predictor matrix (including the intercept column, if it's been added in
#' \code{R}).
#' The number of regression coefficients is equal to the number of columns of
#' the predictor matrix.
#' The length of the return vector is equal to the number of regression
#' coefficients, plus their t-values, plus the z-score.
#' The number of t-values is equal to the number of coefficients.
#'
#' For example, if the number of columns of the predictor matrix is equal to
#' \code{n}, then \code{calc_reg()} returns a vector with \code{2n+1}
#' elements: \code{n} regression coefficients, \code{n} corresponding
#' t-values, and \code{1} z-score value.
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("XLF", "VTI", "IEF")])
#' # Response equals XLF returns
#' respv <- retp[, 1]
#' # Predictor matrix equals VTI and IEF returns
#' predm <- retp[, -1]
#' # Perform multivariate regression using lm()
#' regmod <- lm(respv ~ predm)
#' regsum <- summary(regmod)
#' coeff <- regsum$coefficients
#' # Create a default list of regression parameters
#' controlv <- HighFreq::param_reg()
#' # Add unit intercept column to the predictor matrix
#' predm <- cbind(rep(1, NROW(predm)), predm)
#' # Perform multivariate regression using calc_reg()
#' regarma <- drop(HighFreq::calc_reg(respv=respv, predm=predm, controlv=controlv))
#' # Compare the outputs of both functions
#' all.equal(regarma[1:(2*NCOL(predm))],
#' c(coeff[, "Estimate"], coeff[, "t value"]), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::calc_reg(respv=respv, predm=predm, controlv=controlv),
#' Rcode=lm(respv ~ predm),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
calc_reg <- function(respv, predm, controlv) {
.Call(`_HighFreq_calc_reg`, respv, predm, controlv)
}
#' Calculate a \emph{matrix} of mean (location) estimates over a rolling
#' look-back interval attached at the end points of a \emph{time series} or a
#' \emph{matrix}.
#'
#' @param \code{lookb} The number of end points in the look-back interval
#' (the default is \code{lookb = 1}).
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{startp} An \emph{integer} vector of start points (the default
#' is \code{startp = 0}).
#'
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#' \code{endd = 0}).
#'
#' @param \code{step} The number of time periods between the end points (the
#' default is \code{step = 1}).
#'
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#' calculating the end points (the default is \code{stub = 0}).
#'
#' @param \code{method} A \emph{character string} representing the type of mean
#' measure of (the default is \code{method = "moment"}).
#'
#' @return A \emph{matrix} of mean (location) estimates with the same number of
#' columns as the input time series \code{tseries}, and the number of rows
#' equal to the number of end points.
#'
#' @details
#' The function \code{roll_mean()} calculates a \emph{matrix} of mean
#' (location) estimates over rolling look-back intervals attached at the end
#' points of the \emph{time series} \code{tseries}.
#'
#' The function \code{roll_mean()} performs a loop over the end points, and at
#' each end point it subsets the time series \code{tseries} over a look-back
#' interval equal to \code{lookb} number of end points.
#'
#' It passes the subset time series to the function \code{calc_mean()}, which
#' calculates the mean (location).
#' See the function \code{calc_mean()} for a description of the mean methods.
#'
#' If the arguments \code{endd} and \code{startp} are not given then it
#' first calculates a vector of end points separated by \code{step} time
#' periods. It calculates the end points along the rows of \code{tseries}
#' using the function \code{calc_endpoints()}, with the number of time
#' periods between the end points equal to \code{step} time periods.
#'
#' For example, the rolling mean at \code{25} day end points, with a
#' \code{75} day look-back, can be calculated using the parameters
#' \code{step = 25} and \code{lookb = 3}.
#'
#' The function \code{roll_mean()} with the parameter \code{step = 1}
#' performs the same calculation as the function \code{roll_mean()} from
#' package
#' \href{https://cran.r-project.org/web/packages/RcppRoll/index.html}{RcppRoll},
#' but it's several times faster because it uses \code{C++}
#' \code{RcppArmadillo} code.
#'
#' The function \code{roll_mean()} is implemented in \code{RcppArmadillo}
#' \code{RcppArmadillo} \code{C++} code, which makes it several times faster
#' than \code{R} code.
#'
#' If only a simple rolling mean is required (not the median) then other
#' functions like \code{roll_sum()} or \code{roll_vec()} may be even faster.
#'
#' @examples
#' \dontrun{
#' # Define time series of returns using package rutils
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Calculate the rolling means at 25 day end points, with a 75 day look-back
#' meanv <- HighFreq::roll_mean(retp, lookb=3, step=25)
#' # Compare the mean estimates over 11-period look-back intervals
#' all.equal(HighFreq::roll_mean(retp, lookb=11)[-(1:10), ],
#' drop(RcppRoll::roll_mean(retp, n=11)), check.attributes=FALSE)
#' # Define end points and start points
#' endd <- HighFreq::calc_endpoints(NROW(retp), step=25)
#' startp <- HighFreq::calc_startpoints(endd, lookb=3)
#' # Calculate the rolling means using RcppArmadillo
#' meanv <- HighFreq::roll_mean(retp, startp=startp, endd=endd)
#' # Calculate the rolling medians using RcppArmadillo
#' medianscpp <- HighFreq::roll_mean(retp, startp=startp, endd=endd, method="nonparametric")
#' # Calculate the rolling medians using R
#' medians = sapply(1:NROW(endd), function(i) {
#' median(retp[startp[i]:endd[i] + 1])
#' }) # end sapply
#' all.equal(medians, drop(medianscpp))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::roll_mean(retp, startp=startp, endd=endd, method="nonparametric"),
#' Rcode=sapply(1:NROW(endd), function(i) {median(retp[startp[i]:endd[i] + 1])}),
#' times=10))[, c(1, 4, 5)]
#' }
#' @export
roll_mean <- function(tseries, lookb = 1L, startp = 0L, endd = 0L, step = 1L, stub = 0L, method = "moment", confl = 0.75) {
.Call(`_HighFreq_roll_mean`, tseries, lookb, startp, endd, step, stub, method, confl)
}
#' Calculate a \emph{vector} of variance estimates over a rolling look-back
#' interval for a single-column \emph{time series} or a single-column
#' \emph{matrix}, using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A single-column \emph{time series} or a single-column
#' \emph{matrix}.
#'
#' @param \code{lookb} The length of the look-back interval, equal to the
#' number of \emph{vector} elements used for calculating a single variance
#' estimate (the default is \code{lookb = 1}).
#'
#' @return A single-column \emph{matrix} with the same number of elements as
#' the input argument \code{tseries}.
#'
#' @details
#' The function \code{roll_varvec()} calculates a \emph{vector} of variance
#' estimates over a rolling look-back interval for a single-column \emph{time
#' series} or a single-column \emph{matrix}, using \code{RcppArmadillo} \code{C++}
#' code.
#'
#' The function \code{roll_varvec()} uses an expanding look-back interval in
#' the initial warmup period, to calculate the same number of elements as the
#' input argument \code{tseries}.
#'
#' The function \code{roll_varvec()} performs the same calculation as the
#' function \code{roll_var()} from package
#' \href{https://cran.r-project.org/web/packages/RcppRoll/index.html}{RcppRoll},
#' but it's several times faster because it uses \code{RcppArmadillo} \code{C++}
#' code.
#'
#' @examples
#' \dontrun{
#' # Create a vector of random returns
#' retp <- rnorm(1e6)
#' # Compare the variance estimates over 11-period look-back intervals
#' all.equal(drop(HighFreq::roll_varvec(retp, lookb=11))[-(1:10)],
#' RcppRoll::roll_var(retp, n=11))
#' # Compare the speed of RcppArmadillo with RcppRoll
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::roll_varvec(retp, lookb=11),
#' RcppRoll=RcppRoll::roll_var(retp, n=11),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#' @export
roll_varvec <- function(tseries, lookb = 1L) {
.Call(`_HighFreq_roll_varvec`, tseries, lookb)
}
#' Calculate a \emph{matrix} of dispersion (variance) estimates over a rolling
#' look-back interval attached at the end points of a \emph{time series} or a
#' \emph{matrix}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{lookb} The number of end points in the look-back interval
#' (the default is \code{lookb = 1}).
#'
#' @param \code{startp} An \emph{integer} vector of start points (the default
#' is \code{startp = 0}).
#'
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#' \code{endd = 0}).
#'
#' @param \code{step} The number of time periods between the end points (the
#' default is \code{step = 1}).
#'
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#' calculating the end points (the default is \code{stub = 0}).
#'
#' @param \code{method} A \emph{character string} representing the type of the
#' measure of dispersion (the default is \code{method = "moment"}).
#'
#' @return A \emph{matrix} dispersion (variance) estimates with the same number
#' of columns as the input time series \code{tseries}, and the number of rows
#' equal to the number of end points.
#'
#' @details
#' The function \code{roll_var()} calculates a \emph{matrix} of dispersion
#' (variance) estimates over rolling look-back intervals attached at the end
#' points of the \emph{time series} \code{tseries}.
#'
#' The function \code{roll_var()} performs a loop over the end points, and at
#' each end point it subsets the time series \code{tseries} over a look-back
#' interval equal to \code{lookb} number of end points.
#'
#' It passes the subset time series to the function \code{calc_var()}, which
#' calculates the dispersion.
#' See the function \code{calc_var()} for a description of the dispersion
#' methods.
#'
#' If the arguments \code{endd} and \code{startp} are not given then it
#' first calculates a vector of end points separated by \code{step} time
#' periods. It calculates the end points along the rows of \code{tseries}
#' using the function \code{calc_endpoints()}, with the number of time
#' periods between the end points equal to \code{step} time periods.
#'
#' For example, the rolling variance at \code{25} day end points, with a
#' \code{75} day look-back, can be calculated using the parameters
#' \code{step = 25} and \code{lookb = 3}.
#'
#' The function \code{roll_var()} with the parameter \code{step = 1}
#' performs the same calculation as the function \code{roll_var()} from
#' package
#' \href{https://cran.r-project.org/web/packages/RcppRoll/index.html}{RcppRoll},
#' but it's several times faster because it uses \code{RcppArmadillo} \code{C++}
#' code.
#'
#' The function \code{roll_var()} is implemented in \code{RcppArmadillo}
#' \code{RcppArmadillo} \code{C++} code, which makes it several times faster
#' than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Define time series of returns using package rutils
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Calculate the rolling variance at 25 day end points, with a 75 day look-back
#' varv <- HighFreq::roll_var(retp, lookb=3, step=25)
#' # Compare the variance estimates over 11-period look-back intervals
#' all.equal(HighFreq::roll_var(retp, lookb=11)[-(1:10), ],
#' drop(RcppRoll::roll_var(retp, n=11)), check.attributes=FALSE)
#' # Compare the speed of HighFreq::roll_var() with RcppRoll::roll_var()
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::roll_var(retp, lookb=11),
#' RcppRoll=RcppRoll::roll_var(retp, n=11),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' # Compare the speed of HighFreq::roll_var() with TTR::runMAD()
#' summary(microbenchmark(
#' Rcpp=HighFreq::roll_var(retp, lookb=11, method="quantile"),
#' TTR=TTR::runMAD(retp, n = 11),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#' @export
roll_var <- function(tseries, lookb = 1L, startp = 0L, endd = 0L, step = 1L, stub = 0L, method = "moment", confl = 0.75) {
.Call(`_HighFreq_roll_var`, tseries, lookb, startp, endd, step, stub, method, confl)
}
#' Calculate a \emph{vector} of variance estimates over a rolling look-back
#' interval attached at the end points of a \emph{time series} or a
#' \emph{matrix} with \emph{OHLC} price data.
#'
#' @param \code{ohlc} A \emph{time series} or a \emph{matrix} with \emph{OHLC}
#' price data.
#'
#' @param \code{startp} An \emph{integer} vector of start points (the default
#' is \code{startp = 0}).
#'
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#' \code{endd = 0}).
#'
#' @param \code{step} The number of time periods between the end points (the
#' default is \code{step = 1}).
#'
#' @param \code{lookb} The number of end points in the look-back interval
#' (the default is \code{lookb = 1}).
#'
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#' calculating the end points (the default is \code{stub = 0}).
#'
#' @param \code{method} A \emph{character string} representing the price range
#' estimator for calculating the variance. The estimators include:
#' \itemize{
#' \item "close" close-to-close estimator,
#' \item "rogers_satchell" Rogers-Satchell estimator,
#' \item "garman_klass" Garman-Klass estimator,
#' \item "garman_klass_yz" Garman-Klass with account for close-to-open price jumps,
#' \item "yang_zhang" Yang-Zhang estimator,
#' }
#' (The default is the \emph{"yang_zhang"} estimator.)
#'
#' @param \code{scale} \emph{Boolean} argument: Should the returns be divided
#' by the time index, the number of seconds in each period? (The default is
#' \code{scale = TRUE}.)
#'
#' @param \code{index} A \emph{vector} with the time index of the \emph{time
#' series}. This is an optional argument (the default is \code{index=0}).
#'
#' @return A column \emph{vector} of variance estimates, with the number of
#' rows equal to the number of end points.
#'
#' @details
#' The function \code{roll_var_ohlc()} calculates a \emph{vector} of variance
#' estimates over a rolling look-back interval attached at the end points of
#' the \emph{time series} \code{ohlc}.
#'
#' The input \emph{OHLC time series} \code{ohlc} is assumed to contain the
#' log prices.
#'
#' The function \code{roll_var_ohlc()} performs a loop over the end points,
#' subsets the previous (past) rows of \code{ohlc}, and passes them into the
#' function \code{calc_var_ohlc()}.
#'
#' At each end point, the variance is calculated over a look-back interval
#' equal to \code{lookb} number of end points.
#' In the initial warmup period, the variance is calculated over an expanding
#' look-back interval.
#'
#' If the arguments \code{endd} and \code{startp} are not given then it
#' first calculates a vector of end points separated by \code{step} time
#' periods. It calculates the end points along the rows of \code{ohlc}
#' using the function \code{calc_endpoints()}, with the number of time
#' periods between the end points equal to \code{step} time periods.
#'
#' For example, the rolling variance at daily end points with an \code{11}
#' day look-back, can be calculated using the parameters \code{step = 1} and
#' \code{lookb = 1} (Assuming the \code{ohlc} data has daily
#' frequency.)
#'
#' Similarly, the rolling variance at \code{25} day end points with a
#' \code{75} day look-back, can be calculated using the parameters
#' \code{step = 25} and \code{lookb = 3} (because \code{3*25 = 75}).
#'
#' The function \code{roll_var_ohlc()} calculates the variance from all the
#' different intra-day and day-over-day returns (defined as the differences
#' between \emph{OHLC} prices), using several different variance estimation
#' methods.
#'
#' The default \code{method} is \emph{"yang_zhang"}, which theoretically
#' has the lowest standard error among unbiased estimators.
#' The methods \emph{"close"}, \emph{"garman_klass_yz"}, and
#' \emph{"yang_zhang"} do account for \emph{close-to-open} price jumps, while
#' the methods \emph{"garman_klass"} and \emph{"rogers_satchell"} do not
#' account for \emph{close-to-open} price jumps.
#'
#' If \code{scale} is \code{TRUE} (the default), then the returns are
#' divided by the differences of the time index (which scales the variance to
#' the units of variance per second squared.) This is useful when calculating
#' the variance from minutes bar data, because dividing returns by the
#' number of seconds decreases the effect of overnight price jumps. If the
#' time index is in days, then the variance is equal to the variance per day
#' squared.
#'
#' The optional argument \code{index} is the time index of the \emph{time
#' series} \code{ohlc}. If the time index is in seconds, then the
#' differences of the index are equal to the number of seconds in each time
#' period. If the time index is in days, then the differences are equal to
#' the number of days in each time period.
#'
#' The function \code{roll_var_ohlc()} is implemented in \code{RcppArmadillo}
#' \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Extract the log OHLC prices of SPY
#' ohlc <- log(HighFreq::SPY)
#' # Extract the time index of SPY prices
#' indeks <- c(1, diff(xts::.index(ohlc)))
#' # Rolling variance at minutes end points, with a 21 minute look-back
#' varoll <- HighFreq::roll_var_ohlc(ohlc,
#' step=1, lookb=21,
#' method="yang_zhang",
#' index=indeks, scale=TRUE)
#' # Daily OHLC prices
#' ohlc <- rutils::etfenv$VTI
#' indeks <- c(1, diff(xts::.index(ohlc)))
#' # Rolling variance at 5 day end points, with a 20 day look-back (20=4*5)
#' varoll <- HighFreq::roll_var_ohlc(ohlc,
#' step=5, lookb=4,
#' method="yang_zhang",
#' index=indeks, scale=TRUE)
#' # Same calculation in R
#' nrows <- NROW(ohlc)
#' closel = HighFreq::lagit(ohlc[, 4])
#' endd <- drop(HighFreq::calc_endpoints(nrows, 3)) + 1
#' startp <- drop(HighFreq::calc_startpoints(endd, 2))
#' npts <- NROW(endd)
#' varollr <- sapply(2:npts, function(it) {
#' rangev <- startp[it]:endd[it]
#' sub_ohlc = ohlc[rangev, ]
#' sub_close = closel[rangev]
#' sub_index = indeks[rangev]
#' HighFreq::calc_var_ohlc(sub_ohlc, closel=sub_close, scale=TRUE, index=sub_index)
#' }) # end sapply
#' varollr <- c(0, varollr)
#' all.equal(drop(var_rolling), varollr)
#' }
#' @export
roll_var_ohlc <- function(ohlc, startp = 0L, endd = 0L, step = 1L, lookb = 1L, stub = 0L, method = "yang_zhang", scale = TRUE, index = 0L) {
.Call(`_HighFreq_roll_var_ohlc`, ohlc, startp, endd, step, lookb, stub, method, scale, index)
}
#' Calculate a \emph{matrix} of skewness estimates over a rolling look-back
#' interval attached at the end points of a \emph{time series} or a
#' \emph{matrix}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{startp} An \emph{integer} vector of start points (the default
#' is \code{startp = 0}).
#'
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#' \code{endd = 0}).
#'
#' @param \code{step} The number of time periods between the end points (the
#' default is \code{step = 1}).
#'
#' @param \code{lookb} The number of end points in the look-back interval
#' (the default is \code{lookb = 1}).
#'
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#' calculating the end points (the default is \code{stub = 0}).
#'
#' @param \code{method} A \emph{character string} specifying the type of the
#' skewness model (the default is \code{method = "moment"} - see Details).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#' returns (the default is \code{confl = 0.75}).
#'
#' @return A \emph{matrix} of skewness estimates with the same number of
#' columns as the input time series \code{tseries}, and the number of rows
#' equal to the number of end points.
#'
#' @details
#' The function \code{roll_skew()} calculates a \emph{matrix} of skewness
#' estimates over rolling look-back intervals attached at the end points of
#' the \emph{time series} \code{tseries}.
#'
#' The function \code{roll_skew()} performs a loop over the end points, and
#' at each end point it subsets the time series \code{tseries} over a
#' look-back interval equal to \code{lookb} number of end points.
#'
#' It passes the subset time series to the function \code{calc_skew()}, which
#' calculates the skewness.
#' See the function \code{calc_skew()} for a description of the skewness
#' methods.
#'
#' If the arguments \code{endd} and \code{startp} are not given then it
#' first calculates a vector of end points separated by \code{step} time
#' periods. It calculates the end points along the rows of \code{tseries}
#' using the function \code{calc_endpoints()}, with the number of time
#' periods between the end points equal to \code{step} time periods.
#'
#' For example, the rolling skewness at \code{25} day end points, with a
#' \code{75} day look-back, can be calculated using the parameters
#' \code{step = 25} and \code{lookb = 3}.
#'
#' The function \code{roll_skew()} is implemented in \code{RcppArmadillo}
#' \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Define time series of returns using package rutils
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Define end points and start points
#' endd <- 1 + HighFreq::calc_endpoints(NROW(retp), step=25)
#' startp <- HighFreq::calc_startpoints(endd, lookb=3)
#' # Calculate the rolling skewness at 25 day end points, with a 75 day look-back
#' skewv <- HighFreq::roll_skew(retp, step=25, lookb=3)
#' # Calculate the rolling skewness using R code
#' skewr <- sapply(1:NROW(endd), function(it) {
#' HighFreq::calc_skew(retp[startp[it]:endd[it], ])
#' }) # end sapply
#' # Compare the skewness estimates
#' all.equal(drop(skewv), skewr, check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::roll_skew(retp, step=25, lookb=3),
#' Rcode=sapply(1:NROW(endd), function(it) {
#' HighFreq::calc_skew(retp[startp[it]:endd[it], ])
#' }),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#' @export
roll_skew <- function(tseries, startp = 0L, endd = 0L, step = 1L, lookb = 1L, stub = 0L, method = "moment", confl = 0.75) {
.Call(`_HighFreq_roll_skew`, tseries, startp, endd, step, lookb, stub, method, confl)
}
#' Calculate a \emph{matrix} of kurtosis estimates over a rolling look-back
#' interval attached at the end points of a \emph{time series} or a
#' \emph{matrix}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{startp} An \emph{integer} vector of start points (the default
#' is \code{startp = 0}).
#'
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#' \code{endd = 0}).
#'
#' @param \code{step} The number of time periods between the end points (the
#' default is \code{step = 1}).
#'
#' @param \code{lookb} The number of end points in the look-back interval
#' (the default is \code{lookb = 1}).
#'
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#' calculating the end points (the default is \code{stub = 0}).
#'
#' @param \code{method} A \emph{character string} specifying the type of the
#' kurtosis model (the default is \code{method = "moment"} - see Details).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#' returns (the default is \code{confl = 0.75}).
#'
#' @return A \emph{matrix} of kurtosis estimates with the same number of
#' columns as the input time series \code{tseries}, and the number of rows
#' equal to the number of end points.
#'
#' @details
#' The function \code{roll_kurtosis()} calculates a \emph{matrix} of kurtosis
#' estimates over rolling look-back intervals attached at the end points of
#' the \emph{time series} \code{tseries}.
#'
#' The function \code{roll_kurtosis()} performs a loop over the end points,
#' and at each end point it subsets the time series \code{tseries} over a
#' look-back interval equal to \code{lookb} number of end points.
#'
#' It passes the subset time series to the function \code{calc_kurtosis()},
#' which calculates the kurtosis. See the function \code{calc_kurtosis()} for
#' a description of the kurtosis methods.
#'
#' If the arguments \code{endd} and \code{startp} are not given then it
#' first calculates a vector of end points separated by \code{step} time
#' periods. It calculates the end points along the rows of \code{tseries}
#' using the function \code{calc_endpoints()}, with the number of time
#' periods between the end points equal to \code{step} time periods.
#'
#' For example, the rolling kurtosis at \code{25} day end points, with a
#' \code{75} day look-back, can be calculated using the parameters
#' \code{step = 25} and \code{lookb = 3}.
#'
#' The function \code{roll_kurtosis()} is implemented in \code{RcppArmadillo}
#' \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Define time series of returns using package rutils
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Define end points and start points
#' endd <- 1 + HighFreq::calc_endpoints(NROW(retp), step=25)
#' startp <- HighFreq::calc_startpoints(endd, lookb=3)
#' # Calculate the rolling kurtosis at 25 day end points, with a 75 day look-back
#' kurtosisv <- HighFreq::roll_kurtosis(retp, step=25, lookb=3)
#' # Calculate the rolling kurtosis using R code
#' kurt_r <- sapply(1:NROW(endd), function(it) {
#' HighFreq::calc_kurtosis(retp[startp[it]:endd[it], ])
#' }) # end sapply
#' # Compare the kurtosis estimates
#' all.equal(drop(kurtosisv), kurt_r, check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::roll_kurtosis(retp, step=25, lookb=3),
#' Rcode=sapply(1:NROW(endd), function(it) {
#' HighFreq::calc_kurtosis(retp[startp[it]:endd[it], ])
#' }),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#' @export
roll_kurtosis <- function(tseries, startp = 0L, endd = 0L, step = 1L, lookb = 1L, stub = 0L, method = "moment", confl = 0.75) {
.Call(`_HighFreq_roll_kurtosis`, tseries, startp, endd, step, lookb, stub, method, confl)
}
#' Perform a rolling regression and calculate a matrix of regression
#' coefficients, their t-values, and z-scores.
#'
#' @param \code{respv} A single-column \emph{time series} or a \emph{vector}
#' of response data.
#'
#' @param \code{predm} A \emph{time series} or a \emph{matrix} of predictor
#' data.
#'
#' @param \code{controlv} A \emph{list} of model parameters (see Details).
#'
#' @param \code{startp} An \emph{integer} vector of start points (the default
#' is \code{startp = 0}).
#'
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#' \code{endd = 0}).
#'
#' @param \code{step} The number of time periods between the end points (the
#' default is \code{step = 1}).
#'
#' @param \code{lookb} The number of end points in the look-back interval
#' (the default is \code{lookb = 1}).
#'
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#' calculating the end points (the default is \code{stub = 0}).
#'
#' @return A \emph{matrix} with the regression coefficients, their t-values,
#' and z-scores, and with the same number of rows as \code{predm} a
#' number of columns equal to \code{2n+1}, where \code{n} is the number of
#' columns of \code{predm}.
#'
#' @details
#' The function \code{roll_reg()} performs a rolling regression over the end
#' points of the predictor matrix, and calculates a \emph{matrix} of
#' regression coefficients, their t-values, and z-scores.
#'
#' The function \code{roll_reg()} performs a loop over the end points, and at
#' each end point it subsets the time series \code{predm} over a look-back
#' interval equal to \code{lookb} number of end points.
#'
#' If the arguments \code{endd} and \code{startp} are not given then it
#' first calculates a vector of end points separated by \code{step} time
#' periods. It calculates the end points along the rows of \code{predm}
#' using the function \code{calc_endpoints()}, with the number of time
#' periods between the end points equal to \code{step} time periods.
#'
#' For example, the rolling regression at \code{25} day end points, with a
#' \code{75} day look-back, can be calculated using the parameters
#' \code{step = 25} and \code{lookb = 3}.
#'
#' It passes the subset time series to the function \code{calc_reg()}, which
#' calculates the regression coefficients, their t-values, and the z-score.
#' The function \code{roll_reg()} accepts a list of model parameters
#' through the argument \code{controlv}, and passes it to the function
#' \code{calc_reg()}.
#' The list of model parameters can be created using the function
#' \code{param_reg()}. See the function \code{param_reg()} for a
#' description of the model parameters.
#'
#' The number of columns of the return matrix depends on the number of
#' columns of the predictor matrix (including the intercept column, if it's
#' been added in \code{R}).
#' The number of regression coefficients is equal to the number of columns of
#' the predictor matrix.
#' If the predictor matrix contains an intercept column then the first
#' regression coefficient is equal to the intercept value \eqn{\alpha}.
#'
#' The number of columns of the return matrix is equal to the number of
#' regression coefficients, plus their t-values, plus the z-score column.
#' The number of t-values is equal to the number of coefficients.
#' If the number of columns of the predictor matrix is equal to \code{n},
#' then \code{roll_reg()} returns a matrix with \code{2n+1} columns: \code{n}
#' regression coefficients, \code{n} corresponding t-values, and \code{1}
#' z-score column.
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' predm <- na.omit(rutils::etfenv$returns[, c("XLP", "VTI")])
#' # Add unit intercept column to the predictor matrix
#' predm <- cbind(rep(1, NROW(predm)), predm)
#' # Define monthly end points and start points
#' endd <- xts::endpoints(predm, on="months")[-1]
#' lookb <- 12
#' startp <- c(rep(1, lookb), endd[1:(NROW(endd)-lookb)])
#' # Create a default list of regression parameters
#' controlv <- HighFreq::param_reg()
#' # Calculate rolling betas using RcppArmadillo
#' regroll <- HighFreq::roll_reg(respv=predm[, 2], predm=predm[, -2], endd=(endd-1), startp=(startp-1), controlv=controlv)
#' betas <- regroll[, 2]
#' # Calculate rolling betas in R
#' betar <- sapply(1:NROW(endd), FUN=function(ep) {
#' datav <- predm[startp[ep]:endd[ep], ]
#' # HighFreq::calc_reg(datav[, 2], datav[, -2], controlv)
#' drop(cov(datav[, 2], datav[, 3])/var(datav[, 3]))
#' }) # end sapply
#' # Compare the outputs of both functions
#' all.equal(betas, betar, check.attributes=FALSE)
#' }
#'
#' @export
roll_reg <- function(respv, predm, controlv, startp = 0L, endd = 0L, step = 1L, lookb = 1L, stub = 0L) {
.Call(`_HighFreq_roll_reg`, respv, predm, controlv, startp, endd, step, lookb, stub)
}
#' Perform a rolling standardization (centering and scaling) of the columns of
#' a \emph{time series} of data using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A \emph{time series} or \emph{matrix} of data.
#'
#' @param \code{lookb} The length of the look-back interval, equal to the
#' number of rows of data used in the scaling.
#'
#' @param \code{center} A \emph{Boolean} argument: if \code{TRUE} then center
#' the columns so that they have zero mean or median (the default is
#' \code{TRUE}).
#'
#' @param \code{scale} A \emph{Boolean} argument: if \code{TRUE} then scale the
#' columns so that they have unit standard deviation or MAD (the default is
#' \code{TRUE}).
#'
#' @param \code{use_median} A \emph{Boolean} argument: if \code{TRUE} then the
#' centrality (central tendency) is calculated as the \emph{median} and the
#' dispersion is calculated as the \emph{median absolute deviation}
#' (\emph{MAD}) (the default is \code{FALSE}).
#' If \code{use_median = FALSE} then the centrality is calculated as the
#' \emph{mean} and the dispersion is calculated as the \emph{standard
#' deviation}.
#'
#' @return A \emph{matrix} with the same dimensions as the input argument
#' \code{tseries}.
#'
#' @details
#' The function \code{roll_scale()} performs a rolling standardization
#' (centering and scaling) of the columns of the \code{tseries} argument
#' using \code{RcppArmadillo}.
#' The function \code{roll_scale()} performs a loop over the rows of
#' \code{tseries}, subsets a number of previous (past) rows equal to
#' \code{lookb}, and standardizes the subset matrix by calling the
#' function \code{calc_scale()}. It assigns the last row of the standardized
#' subset \emph{matrix} to the return matrix.
#'
#' If the arguments \code{center} and \code{scale} are both \code{TRUE} and
#' \code{use_median} is \code{FALSE} (the defaults), then
#' \code{calc_scale()} performs the same calculation as the function
#' \code{roll::roll_scale()}.
#'
#' If the arguments \code{center} and \code{scale} are both \code{TRUE} (the
#' defaults), then \code{calc_scale()} standardizes the data.
#' If the argument \code{center} is \code{FALSE} then \code{calc_scale()}
#' only scales the data (divides it by the standard deviations).
#' If the argument \code{scale} is \code{FALSE} then \code{calc_scale()}
#' only demeans the data (subtracts the means).
#'
#' If the argument \code{use_median} is \code{TRUE}, then it calculates the
#' centrality as the \emph{median} and the dispersion as the \emph{median
#' absolute deviation} (\emph{MAD}).
#'
#' @examples
#' \dontrun{
#' # Calculate a time series of returns
#' retp <- zoo::coredata(na.omit(rutils::etfenv$returns[, c("IEF", "VTI")]))
#' lookb <- 11
#' rolled_scaled <- roll::roll_scale(retp, width=lookb, min_obs=1)
#' rolled_scaled2 <- HighFreq::roll_scale(retp, lookb=lookb)
#' all.equal(rolled_scaled[-(1:2), ], rolled_scaled2[-(1:2), ],
#' check.attributes=FALSE)
#' }
#'
#' @export
roll_scale <- function(matrix, lookb, center = TRUE, scale = TRUE, use_median = FALSE) {
.Call(`_HighFreq_roll_scale`, matrix, lookb, center, scale, use_median)
}
#' Standardize (center and scale) the columns of a \emph{time series} of data
#' over time and in place, without copying the data in memory, using
#' \code{RcppArmadillo}.
#'
#' @param \code{tseries} A \emph{time series} or \emph{matrix} of data.
#'
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'
#' @param \code{center} A \emph{Boolean} argument: if \code{TRUE} then center
#' the columns so that they have zero mean or median (the default is
#' \code{TRUE}).
#'
#' @param \code{scale} A \emph{Boolean} argument: if \code{TRUE} then scale the
#' columns so that they have unit standard deviation or MAD (the default is
#' \code{TRUE}).
#'
#' @return Void (no return value - modifies the data in place).
#'
#' @details
#' The function \code{run_scale()} performs a trailing standardization
#' (centering and scaling) of the columns of the \code{tseries} argument
#' using \code{RcppArmadillo}.
#'
#' The function \code{run_scale()} accepts a \emph{pointer} to the argument
#' \code{tseries}, and it overwrites the old data with the standardized
#' data. It performs the calculation in place, without copying the data in
#' memory, which can significantly increase the computation speed for large
#' time series.
#'
#' The function \code{run_scale()} performs a loop over the rows of
#' \code{tseries}, and standardizes the data using its trailing means and
#' standard deviations.
#'
#' The function \code{run_scale()} calculates the trailing mean and variance
#' of streaming \emph{time series} data \eqn{r_t}, by recursively weighting
#' the past estimates with the new data, using the decay factor \eqn{\lambda}:
#' \deqn{
#' \bar{r}_t = \lambda \bar{r}_{t-1} + (1-\lambda) r_t
#' }
#' \deqn{
#' \sigma^2_t = \lambda \sigma^2_{t-1} + (1-\lambda) (r_t - \bar{r}_t)^2
#' }
#' Where \eqn{\bar{r}_t} is the trailing mean and \eqn{\sigma^2_t} is the
#' trailing variance.
#'
#' It then calculates the standardized data as follows:
#' \deqn{
#' r^{\prime}_t = \frac{r_t - \bar{r}_t}{\sigma_t}
#' }
#'
#' If the arguments \code{center} and \code{scale} are both \code{TRUE} (the
#' defaults), then \code{calc_scale()} standardizes the data.
#' If the argument \code{center} is \code{FALSE} then \code{calc_scale()}
#' only scales the data (divides it by the standard deviations).
#' If the argument \code{scale} is \code{FALSE} then \code{calc_scale()}
#' only demeans the data (subtracts the means).
#'
#' The value of the decay factor \eqn{\lambda} must be in the range between
#' \code{0} and \code{1}.
#' If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#' values have a greater weight, and the trailing variance values have a
#' stronger dependence on past data. This is equivalent to a long
#' look-back interval.
#' If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#' past values have a smaller weight, and the trailing variance values have a
#' weaker dependence on past data. This is equivalent to a short look-back
#' interval.
#'
#' The above online recursive formulas are convenient for processing live
#' streaming data because they don't require maintaining a buffer of past
#' data.
#' The formulas are equivalent to a convolution with exponentially decaying
#' weights, but they're much faster to calculate.
#' Using exponentially decaying weights is more natural than using a sliding
#' look-back interval, because it gradually "forgets" about the past data.
#'
#' The function \code{run_scale()} uses \code{RcppArmadillo} \code{C++} code,
#' so it can be over \code{100} times faster than the equivalent \code{R}
#' code.
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("XLF", "VTI")])
#' # Calculate the trailing standardized returns using R code
#' lambdaf <- 0.9
#' lambda1 <- 1 - lambdaf
#' scaled <- zoo::coredata(retp)
#' meanm <- scaled[1, ];
#' vars <- scaled[1, ]^2;
#' for (it in 2:NROW(retp)) {
#' meanm <- lambdaf*meanm + lambda1*scaled[it, ];
#' vars <- lambdaf*vars + lambda1*(scaled[it, ] - meanm)^2;
#' scaled[it, ] <- (scaled[it, ] - meanm)/sqrt(vars)
#' } # end for
#' # Calculate the trailing standardized returns using C++ code
#' HighFreq::run_scale(retp, lambda=lambdaf)
#' all.equal(zoo::coredata(retp), scaled, check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::run_scale(retp, lambda=lambdaf),
#' Rcode={for (it in 2:NROW(retp)) {
#' meanm <- lambdaf*meanm + lambda1*scaled[it, ];
#' vars <- lambdaf*vars + lambda1*(scaled[it, ] - meanm)^2;
#' scaled[it, ] <- (scaled[it, ] - meanm)/sqrt(vars)
#' }}, # end for
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
run_scale <- function(tseries, lambda, center = TRUE, scale = TRUE) {
invisible(.Call(`_HighFreq_run_scale`, tseries, lambda, center, scale))
}
#' Calculate a \emph{vector} of z-scores of the residuals of rolling
#' regressions at the end points of the predictor matrix.
#'
#' @param \code{respv} A single-column \emph{time series} or a \emph{vector}
#' of response data.
#'
#' @param \code{predm} A \emph{time series} or a \emph{matrix} of predictor
#' data.
#'
#' @param \code{startp} An \emph{integer} vector of start points (the default
#' is \code{startp = 0}).
#'
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#' \code{endd = 0}).
#'
#' @param \code{step} The number of time periods between the end points (the
#' default is \code{step = 1}).
#'
#' @param \code{lookb} The number of end points in the look-back interval
#' (the default is \code{lookb = 1}).
#'
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#' calculating the end points (the default is \code{stub = 0}).
#'
#' @return A column \emph{vector} of the same length as the number of rows of
#' \code{predm}.
#'
#' @details
#' The function \code{roll_zscores()} calculates a \emph{vector} of z-scores
#' of the residuals of rolling regressions at the end points of the
#' \emph{time series} \code{predm}.
#'
#' The function \code{roll_zscores()} performs a loop over the end points,
#' and at each end point it subsets the time series \code{predm} over a
#' look-back interval equal to \code{lookb} number of end points.
#'
#' It passes the subset time series to the function \code{calc_lm()}, which
#' calculates the regression data.
#'
#' If the arguments \code{endd} and \code{startp} are not given then it
#' first calculates a vector of end points separated by \code{step} time
#' periods. It calculates the end points along the rows of \code{predm}
#' using the function \code{calc_endpoints()}, with the number of time
#' periods between the end points equal to \code{step} time periods.
#'
#' For example, the rolling variance at \code{25} day end points, with a
#' \code{75} day look-back, can be calculated using the parameters
#' \code{step = 25} and \code{lookb = 3}.
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("XLF", "VTI", "IEF")])
#' # Response equals XLF returns
#' respv <- retp[, 1]
#' # Predictor matrix equals VTI and IEF returns
#' predm <- retp[, -1]
#' # Calculate Z-scores from rolling time series regression using RcppArmadillo
#' lookb <- 11
#' zscores <- HighFreq::roll_zscores(respv=respv, predm=predm, lookb)
#' # Calculate z-scores in R from rolling multivariate regression using lm()
#' zscoresr <- sapply(1:NROW(predm), function(ro_w) {
#' if (ro_w == 1) return(0)
#' startpoint <- max(1, ro_w-lookb+1)
#' responsi <- response[startpoint:ro_w]
#' predicti <- predictor[startpoint:ro_w, ]
#' regmod <- lm(responsi ~ predicti)
#' residuals <- regmod$residuals
#' residuals[NROW(residuals)]/sd(residuals)
#' }) # end sapply
#' # Compare the outputs of both functions
#' all.equal(zscores[-(1:lookb)], zscoresr[-(1:lookb)],
#' check.attributes=FALSE)
#' }
#'
#' @export
roll_zscores <- function(respv, predm, startp = 0L, endd = 0L, step = 1L, lookb = 1L, stub = 0L) {
.Call(`_HighFreq_roll_zscores`, respv, predm, startp, endd, step, lookb, stub)
}
#' Calculate a \emph{matrix} of moment values over a rolling look-back
#' interval attached at the end points of a \emph{time series} or a
#' \emph{matrix}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{funame} A \emph{character string} specifying the moment
#' function (the default is \code{funame = "calc_mean"}).
#'
#' @param \code{method} A \emph{character string} specifying the type of the
#' model for the moment (the default is \code{method = "moment"}).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#' returns (the default is \code{confl = 0.75}).
#'
#' @param \code{startp} An \emph{integer} vector of start points (the default
#' is \code{startp = 0}).
#'
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#' \code{endd = 0}).
#'
#' @param \code{step} The number of time periods between the end points (the
#' default is \code{step = 1}).
#'
#' @param \code{lookb} The number of end points in the look-back interval
#' (the default is \code{lookb = 1}).
#'
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#' calculating the end points (the default is \code{stub = 0}).
#'
#' @return A \emph{matrix} with the same number of columns as the input time
#' series \code{tseries}, and the number of rows equal to the number of end
#' points.
#'
#' @details
#' The function \code{roll_moment()} calculates a \emph{matrix} of moment
#' values, over rolling look-back intervals attached at the end points of the
#' \emph{time series} \code{tseries}.
#'
#' The function \code{roll_moment()} performs a loop over the end points, and
#' at each end point it subsets the time series \code{tseries} over a
#' look-back interval equal to \code{lookb} number of end points.
#'
#' It passes the subset time series to the function specified by the argument
#' \code{funame}, which calculates the statistic.
#' See the functions \code{calc_*()} for a description of the different
#' moments.
#' The function name must be one of the following:
#' \itemize{
#' \item "calc_mean" for the estimator of the mean (location),
#' \item "calc_var" for the estimator of the dispersion (variance),
#' \item "calc_skew" for the estimator of the skewness,
#' \item "calc_kurtosis" for the estimator of the kurtosis.
#' }
#' (The default is the \code{funame = "calc_mean"}).
#'
#' If the arguments \code{endd} and \code{startp} are not given then it
#' first calculates a vector of end points separated by \code{step} time
#' periods. It calculates the end points along the rows of \code{tseries}
#' using the function \code{calc_endpoints()}, with the number of time
#' periods between the end points equal to \code{step} time periods.
#'
#' For example, the rolling variance at \code{25} day end points, with a
#' \code{75} day look-back, can be calculated using the parameters
#' \code{step = 25} and \code{lookb = 3}.
#'
#' The function \code{roll_moment()} calls the function \code{calc_momptr()}
#' to calculate a pointer to a moment function from the function name
#' \code{funame} (string). The function pointer is used internally in the
#' \code{C++} code, but the function \code{calc_momptr()} is not exported to
#' \code{R}.
#'
#' The function \code{roll_moment()} is implemented in \code{RcppArmadillo}
#' \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Define time series of returns using package rutils
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Calculate the rolling variance at 25 day end points, with a 75 day look-back
#' var_rollfun <- HighFreq::roll_moment(retp, fun="calc_var", step=25, lookb=3)
#' # Calculate the rolling variance using roll_var()
#' var_roll <- HighFreq::roll_var(retp, step=25, lookb=3)
#' # Compare the two methods
#' all.equal(var_rollfun, var_roll, check.attributes=FALSE)
#' # Define end points and start points
#' endd <- HighFreq::calc_endpoints(NROW(retp), step=25)
#' startp <- HighFreq::calc_startpoints(endd, lookb=3)
#' # Calculate the rolling variance using RcppArmadillo
#' var_rollfun <- HighFreq::roll_moment(retp, fun="calc_var", startp=startp, endd=endd)
#' # Calculate the rolling variance using R code
#' var_roll <- sapply(1:NROW(endd), function(it) {
#' var(retp[startp[it]:endd[it]+1, ])
#' }) # end sapply
#' var_roll[1] <- 0
#' # Compare the two methods
#' all.equal(drop(var_rollfun), var_roll, check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::roll_moment(retp, fun="calc_var", startp=startp, endd=endd),
#' Rcode=sapply(1:NROW(endd), function(it) {
#' var(retp[startp[it]:endd[it]+1, ])
#' }),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#' @export
roll_moment <- function(tseries, funame = "calc_mean", method = "moment", confl = 0.75, startp = 0L, endd = 0L, step = 1L, lookb = 1L, stub = 0L) {
.Call(`_HighFreq_roll_moment`, tseries, funame, method, confl, startp, endd, step, lookb, stub)
}
#' Simulate or estimate the rolling variance under a \emph{GARCH(1,1)} process
#' using \emph{Rcpp}.
#'
#' @param \code{omega} Parameter proportional to the long-term average level
#' of variance.
#'
#' @param \code{alpha} The weight associated with recent realized variance
#' updates.
#'
#' @param \code{beta} The weight associated with the past variance estimates.
#'
#' @param \code{innov} A single-column \emph{matrix} of innovations.
#'
#' @param \code{is_random} \emph{Boolean} argument: Are the innovations random
#' numbers or historical returns? (The default is \code{is_random = TRUE}.)
#'
#' @return A \emph{matrix} with two columns and with the same number of rows as
#' the argument \code{innov}. The first column are the simulated returns and
#' the second column is the variance.
#'
#' @details
#' The function \code{sim_garch()} simulates or estimates the rolling variance
#' under a \emph{GARCH(1,1)} process using \emph{Rcpp}.
#'
#' If \code{is_random = TRUE} (the default) then the innovations \code{innov}
#' are treated as random numbers \eqn{\xi_i} and the \emph{GARCH(1,1)}
#' process is given by:
#' \deqn{
#' r_i = \sigma_{i-1} \xi_i
#' }
#' \deqn{
#' \sigma^2_i = \omega + \alpha r^2_i + \beta \sigma_{i-1}^2
#' }
#' Where \eqn{r_i} and \eqn{\sigma^2_i} are the simulated returns and
#' variance, and \eqn{\omega}, \eqn{\alpha}, and \eqn{\beta} are the
#' \emph{GARCH} parameters, and \eqn{\xi_i} are standard normal
#' \emph{innovations}.
#'
#' The long-term equilibrium level of the simulated variance is proportional
#' to the parameter \eqn{\omega}:
#' \deqn{
#' \sigma^2 = \frac{\omega}{1 - \alpha - \beta}
#' }
#' So the sum of \eqn{\alpha} plus \eqn{\beta} should be less than \eqn{1},
#' otherwise the volatility becomes explosive.
#'
#' If \code{is_random = FALSE} then the function \code{sim_garch()}
#' \emph{estimates} the rolling variance from the historical returns. The
#' innovations \code{innov} are equal to the historical returns \eqn{r_i} and
#' the \emph{GARCH(1,1)} process is simply:
#' \deqn{
#' \sigma^2_i = \omega + \alpha r^2_i + \beta \sigma_{i-1}^2
#' }
#' Where \eqn{\sigma^2_i} is the rolling variance.
#'
#' The above should be viewed as a formula for \emph{estimating} the rolling
#' variance from the historical returns, rather than simulating them. It
#' represents exponential smoothing of the squared returns with a decay
#' factor equal to \eqn{\beta}.
#'
#' The function \code{sim_garch()} simulates the \emph{GARCH} process using
#' fast \emph{Rcpp} \code{C++} code.
#'
#' @examples
#' \dontrun{
#' # Define the GARCH model parameters
#' alpha <- 0.79
#' betav <- 0.2
#' om_ega <- 1e-4*(1-alpha-betav)
#' # Calculate matrix of standard normal innovations
#' innov <- matrix(rnorm(1e3))
#' # Simulate the GARCH process using Rcpp
#' garch_data <- HighFreq::sim_garch(omega=om_ega, alpha=alpha, beta=betav, innov=innov)
#' # Plot the GARCH rolling volatility and cumulative returns
#' plot(sqrt(garch_data[, 2]), t="l", main="Simulated GARCH Volatility", ylab="volatility")
#' plot(cumsum(garch_data[, 1]), t="l", main="Simulated GARCH Cumulative Returns", ylab="cumulative returns")
#' # Calculate historical VTI returns
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Estimate the GARCH volatility of VTI returns
#' garch_data <- HighFreq::sim_garch(omega=om_ega, alpha=alpha, beta=betav,
#' innov=retp, is_random=FALSE)
#' # Plot dygraph of the estimated GARCH volatility
#' dygraphs::dygraph(xts::xts(sqrt(garch_data[, 2]), index(retp)),
#' main="Estimated GARCH Volatility of VTI")
#' }
#'
#' @export
sim_garch <- function(omega, alpha, beta, innov, is_random = TRUE) {
.Call(`_HighFreq_sim_garch`, omega, alpha, beta, innov, is_random)
}
#' Simulate an \emph{Ornstein-Uhlenbeck} process using \emph{Rcpp}.
#'
#' @param \code{init_price} The initial price.
#'
#' @param \code{eq_price} The equilibrium price.
#'
#' @param \code{theta} The strength of mean reversion.
#'
#' @param \code{innov} A single-column \emph{matrix} of innovations (random
#' numbers).
#'
#' @return A single-column \emph{matrix} of simulated prices, with the same
#' number of rows as the argument \code{innov}.
#'
#' @details
#' The function \code{sim_ou()} simulates the following
#' \emph{Ornstein-Uhlenbeck} process:
#' \deqn{
#' r_i = p_i - p_{i-1} = \theta \, (\mu - p_{i-1}) + \xi_i
#' }
#' \deqn{
#' p_i = p_{i-1} + r_i
#' }
#' Where \eqn{r_i} and \eqn{p_i} are the simulated returns and prices,
#' \eqn{\theta}, \eqn{\mu}, and \eqn{\sigma} are the
#' \emph{Ornstein-Uhlenbeck} parameters, and \eqn{\xi_i} are the standard
#' \emph{innovations}.
#' The recursion starts with: \eqn{r_1 = \xi_1} and \eqn{p_1 = init\_price}.
#'
#' The function \code{sim_ou()} simulates the percentage returns as equal to
#' the difference between the equilibrium price \eqn{\mu} minus the latest
#' price \eqn{p_{i-1}}, times the mean reversion parameter \eqn{\theta}, plus
#' a random normal innovation. The log prices are calculated as the sum of
#' returns (not compounded), so they can become negative.
#'
#' The function \code{sim_ou()} simulates the \emph{Ornstein-Uhlenbeck}
#' process using fast \emph{Rcpp} \code{C++} code.
#'
#' The function \code{sim_ou()} returns a single-column \emph{matrix}
#' representing the \emph{time series} of simulated prices.
#'
#' @examples
#' \dontrun{
#' # Define the Ornstein-Uhlenbeck model parameters
#' init_price <- 0.0
#' eq_price <- 1.0
#' sigmav <- 0.01
#' thetav <- 0.01
#' innov <- matrix(rnorm(1e3))
#' # Simulate Ornstein-Uhlenbeck process using Rcpp
#' prices <- HighFreq::sim_ou(init_price=init_price, eq_price=eq_price, volat=sigmav, theta=thetav, innov=innov)
#' plot(prices, t="l", main="Simulated Ornstein-Uhlenbeck Prices", ylab="prices")
#' }
#'
#' @export
sim_ou <- function(init_price, eq_price, theta, innov) {
.Call(`_HighFreq_sim_ou`, init_price, eq_price, theta, innov)
}
#' Simulate a \emph{Schwartz} process using \emph{Rcpp}.
#'
#' @param \code{init_price} The initial price.
#'
#' @param \code{eq_price} The equilibrium price.
#'
#' @param \code{theta} The strength of mean reversion.
#'
#' @param \code{innov} A single-column \emph{matrix} of innovations (random
#' numbers).
#'
#' @return A single-column \emph{matrix} of simulated prices, with the same
#' number of rows as the argument \code{innov}.
#'
#' @details
#' The function \code{sim_schwartz()} simulates a \emph{Schwartz} process
#' using fast \emph{Rcpp} \code{C++} code.
#'
#' The \emph{Schwartz} process is the exponential of the
#' \emph{Ornstein-Uhlenbeck} process, and similar comments apply to it.
#' The prices are calculated as the exponentially compounded returns, so they
#' are never negative. The log prices can be obtained by taking the logarithm
#' of the prices.
#'
#' The function \code{sim_schwartz()} simulates the percentage returns as
#' equal to the difference between the equilibrium price \eqn{\mu} minus the
#' latest price \eqn{p_{i-1}}, times the mean reversion parameter
#' \eqn{\theta}, plus a random normal innovation.
#'
#' The function \code{sim_schwartz()} returns a single-column \emph{matrix}
#' representing the \emph{time series} of simulated prices.
#'
#' @examples
#' \dontrun{
#' # Define the Schwartz model parameters
#' init_price <- 1.0
#' eq_price <- 2.0
#' thetav <- 0.01
#' innov <- matrix(rnorm(1e3, sd=0.01))
#' # Simulate Schwartz process using Rcpp
#' prices <- HighFreq::sim_schwartz(init_price=init_price, eq_price=eq_price, theta=thetav, innov=innov)
#' plot(prices, t="l", main="Simulated Schwartz Prices", ylab="prices")
#' }
#'
#' @export
sim_schwartz <- function(init_price, eq_price, theta, innov) {
.Call(`_HighFreq_sim_schwartz`, init_price, eq_price, theta, innov)
}
#' Simulate \emph{autoregressive} returns by recursively filtering a
#' \emph{matrix} of innovations through a \emph{matrix} of
#' \emph{autoregressive} coefficients.
#'
#' @param \code{innov} A single-column \emph{matrix} of innovations.
#'
#' @param \code{coeff} A single-column \emph{matrix} of \emph{autoregressive}
#' coefficients.
#'
#' @return A single-column \emph{matrix} of simulated returns, with the same
#' number of rows as the argument \code{innov}.
#'
#' @details
#' The function \code{sim_ar()} recursively filters the \emph{matrix} of
#' innovations \code{innov} through the \emph{matrix} of
#' \emph{autoregressive} coefficients \code{coeff}, using fast
#' \code{RcppArmadillo} \code{C++} code.
#'
#' The function \code{sim_ar()} simulates an \emph{autoregressive} process
#' \eqn{AR(n)} of order \eqn{n}:
#' \deqn{
#' r_i = \varphi_1 r_{i-1} + \varphi_2 r_{i-2} + \ldots + \varphi_n r_{i-n} + \xi_i
#' }
#' Where \eqn{r_i} is the simulated output time series, \eqn{\varphi_i} are
#' the \emph{autoregressive} coefficients, and \eqn{\xi_i} are the standard
#' normal \emph{innovations}.
#'
#' The order \eqn{n} of the \emph{autoregressive} process \eqn{AR(n)}, is
#' equal to the number of rows of the \emph{autoregressive} coefficients
#' \code{coeff}.
#'
#' The function \code{sim_ar()} performs the same calculation as the standard
#' \code{R} function \cr\code{filter(x=innov, filter=coeff,
#' method="recursive")}, but it's several times faster.
#'
#' @examples
#' \dontrun{
#' # Define AR coefficients
#' coeff <- matrix(c(0.1, 0.3, 0.5))
#' # Calculate matrix of innovations
#' innov <- matrix(rnorm(1e4, sd=0.01))
#' # Calculate recursive filter using filter()
#' innof <- filter(innov, filter=coeff, method="recursive")
#' # Calculate recursive filter using RcppArmadillo
#' retp <- HighFreq::sim_ar(coeff, innov)
#' # Compare the two methods
#' all.equal(as.numeric(retp), as.numeric(innof))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#' Rcpp=HighFreq::sim_ar(coeff, innov),
#' Rcode=filter(innov, filter=coeff, method="recursive"),
#' times=10))[, c(1, 4, 5)] # end microbenchmark summary
#' }
#'
#' @export
sim_ar <- function(coeff, innov) {
.Call(`_HighFreq_sim_ar`, coeff, innov)
}
#' Simulate a \emph{Dickey-Fuller} process using \emph{Rcpp}.
#'
#' @param \code{init_price} The initial price.
#'
#' @param \code{eq_price} The equilibrium price.
#'
#' @param \code{theta} The strength of mean reversion.
#'
#' @param \code{coeff} A single-column \emph{matrix} of \emph{autoregressive}
#' coefficients.
#'
#' @param \code{innov} A single-column \emph{matrix} of innovations (random
#' numbers).
#'
#' @return A single-column \emph{matrix} of simulated prices, with the same
#' number of rows as the argument \code{innov}.
#'
#' @details
#' The function \code{sim_df()} simulates the following \emph{Dickey-Fuller}
#' process:
#' \deqn{
#' r_i = \theta \, (\mu - p_{i-1}) + \varphi_1 r_{i-1} + \ldots + \varphi_n r_{i-n} + \xi_i
#' }
#' \deqn{
#' p_i = p_{i-1} + r_i
#' }
#' Where \eqn{r_i} and \eqn{p_i} are the simulated returns and prices,
#' \eqn{\theta} and \eqn{\mu} are the \emph{Ornstein-Uhlenbeck} parameters,
#' \eqn{\varphi_i} are the \emph{autoregressive} coefficients, and
#' \eqn{\xi_i} are the normal \emph{innovations}.
#' The recursion starts with: \eqn{r_1 = \xi_1} and \eqn{p_1 = init\_price}.
#'
#' The \emph{Dickey-Fuller} process is a combination of an
#' \emph{Ornstein-Uhlenbeck} process and an \emph{autoregressive} process.
#' The order \eqn{n} of the \emph{autoregressive} process \eqn{AR(n)}, is
#' equal to the number of rows of the \emph{autoregressive} coefficients
#' \code{coeff}.
#'
#' The function \code{sim_df()} simulates the \emph{Dickey-Fuller}
#' process using fast \emph{Rcpp} \code{C++} code.
#'
#' The function \code{sim_df()} returns a single-column \emph{matrix}
#' representing the \emph{time series} of prices.
#'
#' @examples
#' \dontrun{
#' # Define the Ornstein-Uhlenbeck model parameters
#' init_price <- 1.0
#' eq_price <- 2.0
#' thetav <- 0.01
#' # Define AR coefficients
#' coeff <- matrix(c(0.1, 0.3, 0.5))
#' # Calculate matrix of standard normal innovations
#' innov <- matrix(rnorm(1e3, sd=0.01))
#' # Simulate Dickey-Fuller process using Rcpp
#' prices <- HighFreq::sim_df(init_price=init_price, eq_price=eq_price, theta=thetav, coeff=coeff, innov=innov)
#' plot(prices, t="l", main="Simulated Dickey-Fuller Prices")
#' }
#'
#' @export
sim_df <- function(init_price, eq_price, theta, coeff, innov) {
.Call(`_HighFreq_sim_df`, init_price, eq_price, theta, coeff, innov)
}
#' Calculate the log-likelihood of a time series of returns assuming a
#' \emph{GARCH(1,1)} process.
#'
#' @param \code{omega} Parameter proportional to the long-term average level
#' of variance.
#'
#' @param \code{alpha} The weight associated with recent realized variance
#' updates.
#'
#' @param \code{beta} The weight associated with the past variance estimates.
#'
#' @param \code{returns} A single-column \emph{matrix} of returns.
#'
#' @param \code{minval} The floor value applied to the variance, to avoid zero
#' values. (The default is \code{minval = 0.000001}.)
#'
#' @return The log-likelihood value.
#'
#' @details
#' The function \code{lik_garch()} calculates the log-likelihood of a time
#' series of returns assuming a \emph{GARCH(1,1)} process.
#'
#' It first estimates the rolling variance of the \code{returns} argument
#' using function \code{sim_garch()}:
#' \deqn{
#' \sigma^2_i = \omega + \alpha r^2_i + \beta \sigma_{i-1}^2
#' }
#' Where \eqn{r_i} is the time series of returns, and \eqn{\sigma^2_i} is the
#' estimated rolling variance.
#' And \eqn{\omega}, \eqn{\alpha}, and \eqn{\beta} are the \emph{GARCH}
#' parameters.
#' It applies the floor value \code{minval} to the variance, to avoid zero
#' values. So the minimum value of the variance is equal to \code{minval}.
#'
#' The function \code{lik_garch()} calculates the log-likelihood assuming a
#' normal distribution of returns conditional on the variance
#' \eqn{\sigma^2_{i-1}} in the previous period, as follows:
#' \deqn{
#' likelihood = - \sum_{i=1}^n (\frac{r^2_i}{\sigma^2_{i-1}} + \log(\sigma^2_{i-1}))
#' }
#'
#' @examples
#' \dontrun{
#' # Define the GARCH model parameters
#' alpha <- 0.79
#' betav <- 0.2
#' om_ega <- 1e-4*(1-alpha-betav)
#' # Calculate historical VTI returns
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Calculate the log-likelihood of VTI returns assuming GARCH(1,1)
#' HighFreq::lik_garch(omega=om_ega, alpha=alpha, beta=betav, returns=retp)
#' }
#'
#' @export
lik_garch <- function(omega, alpha, beta, returns, minval = 0.000001) {
.Call(`_HighFreq_lik_garch`, omega, alpha, beta, returns, minval)
}
#' Simulate a portfolio optimization strategy using online (recursive) updating
#' of the covariance matrix.
#'
#' @param \code{rets} A \emph{time series} or \emph{matrix} of asset returns.
#'
#' @param \code{dimax} An \emph{integer} equal to the number of \emph{eigen
#' values} used for calculating the reduced inverse of the covariance
#' matrix (the default is \code{dimax = 0} - standard matrix inverse using
#' all the \emph{eigen values}).
#'
#' @param \code{lambda} A decay factor which multiplies the past asset returns.
#'
#' @param \code{lambdacov} A decay factor which multiplies the past covariance.
#'
#' @param \code{lambdaw} A decay factor which multiplies the past portfolio
#' weights.
#'
#' @return A \emph{matrix} of strategy returns and the portfolio weights, with
#' the same number of rows as the argument \code{rets}.
#'
#' @details
#' The function \code{sim_portfoptim()} simulates a portfolio optimization
#' strategy. The strategy calculates the maximum Sharpe portfolio weights
#' \emph{in-sample} at every point in time, and applies them in the
#' \emph{out-of-sample} time interval. It updates the trailing covariance
#' matrix recursively, instead of using past batches of data. The function
#' \code{sim_portfoptim()} uses three different decay factors for averaging past
#' values, to reduce the variance of its forecasts.
#'
#' The function \code{sim_portfoptim()} first scales the returns by their
#' trailing volatilities:
#' \deqn{
#' r^s_t = \frac{r_t}{\sigma_{t-1}}
#' }
#' Returns scaled by their volatility are more stationary so they're easier
#' to model.
#'
#' Then at every point in time, the function \code{sim_portfoptim()} calls
#' the function \code{HighFreq::push_covar()} to update the trailing
#' covariance matrix of the returns:
#' \deqn{
#' \bar{r}_t = \lambda_c \bar{r}_{t-1} + (1-\lambda_c) r^s_t
#' }
#' \deqn{
#' \hat{r}_t = r^s_t - \bar{r}_t
#' }
#' \deqn{
#' {cov}_t = \lambda_c {cov}_{t-1} + (1-\lambda_c) \hat{r}^T_t \hat{r}_t
#' }
#' Where \eqn{\lambda_c} is the decay factor which multiplies the past mean
#' and covariance.
#'
#' It then calls the function \code{HighFreq::calc_inv()} to calculate the
#' \emph{reduced inverse} of the covariance matrix using its eigen
#' decomposition:
#' \deqn{
#' \strong{C}^{-1} = \strong{O}_{dimax} \, \Sigma^{-1}_{dimax} \, \strong{O}^T_{dimax}
#' }
#' See the function \code{HighFreq::calc_inv()} for details.
#'
#' It then calculates the \emph{in-sample} weights of the maximum Sharpe
#' portfolio, by multiplying the inverse covariance matrix times the trailing
#' means of the asset returns:
#' \deqn{
#' \bar{r}_t = \lambda \bar{r}_{t-1} + (1-\lambda) r^s_t
#' }
#' \deqn{
#' \strong{w}_t = \strong{C}^{-1} \bar{r}_t
#' }
#' Note that the decay factor \eqn{\lambda} is different from the decay
#' factor \eqn{\lambda_c} used for updating the trailing covariance
#' matrix.
#'
#' It then scales the weights so their sum of squares is equal to one:
#' \deqn{
#' \strong{w}_t = \frac{\strong{w}_t}{\sqrt{\sum{\strong{w}^2_t}}}
#' }
#'
#' It then calculates the trailing mean of the weights:
#' \deqn{
#' \bar{\strong{w}}_t = \lambda_w \bar{\strong{w}}_{t-1} + (1-\lambda_w) \strong{w}_t
#' }
#' Note that the decay factor \eqn{\lambda_w} is different from the decay
#' factor \eqn{\lambda} used for updating the trailing means.
#'
#' It finally calculates the \emph{out-of-sample} portfolio returns by
#' multiplying the trailing mean weights times the scaled asset returns:
#' \deqn{
#' r^p_t = \bar{\strong{w}}_{t-1} r^s_t
#' }
#' Applying weights to scaled returns means trading stock amounts with unit
#' dollar volatility. So if the weight is equal to \code{2} then we should
#' purchase an amount of stock with dollar volatility equal to \code{2}
#' dollars. Trading stock amounts with unit dollar volatility improves
#' portfolio diversification.
#'
#' The function \code{sim_portfoptim()} uses three different decay factors
#' for averaging past values, to reduce the variance of its forecasts. The
#' value of the decay factor \eqn{\lambda} must be in the range between
#' \code{0} and \code{1}.
#' If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#' values have a greater weight, so the trailing values have a greater
#' dependence on past data. This is equivalent to a long look-back
#' interval.
#' If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#' past values have a smaller weight, so the trailing values have a weaker
#' dependence on past data. This is equivalent to a short look-back
#' interval.
#'
#' The function \code{sim_portfoptim()} returns multiple columns of data,
#' with the same number of rows as the input argument \code{rets}. The first
#' column contains the strategy returns and the remaining columns contain the
#' portfolio weights.
#'
#' @examples
#' \dontrun{
#' # Load ETF returns
#' retp <- rutils::etfenv$returns[, c("VTI", "TLT", "DBC", "USO", "XLF", "XLK")]
#' retp <- na.omit(retp)
#' datev <- zoo::index(retp) # dates
#' # Simulate a portfolio optimization strategy
#' dimax <- 6
#' lambdaf <- 0.978
#' lambdacov <- 0.995
#' lambdaw <- 0.9
#' pnls <- HighFreq::sim_portfoptim(retp, dimax, lambdaf, lambdacov, lambdaw)
#' colnames(pnls) <- c("pnls", "VTI", "TLT", "DBC", "USO", "XLF", "XLK")
#' pnls <- xts::xts(pnls, order.by=datev)
#' # Plot dygraph of strategy
#' wealthv <- cbind(retp$VTI, pnls$pnls*sd(retp$VTI)/sd(pnls$pnls))
#' colnames(wealthv) <- c("VTI", "Strategy")
#' endd <- rutils::calc_endpoints(wealthv, interval="weeks")
#' dygraphs::dygraph(cumsum(wealthv)[endd], main="Portfolio Optimization Strategy Returns") %>%
#' dyOptions(colors=c("blue", "red"), strokeWidth=2) %>%
#' dyLegend(width=300)
#' # Plot dygraph of weights
#' symbolv <- "VTI"
#' stockweights <- cbind(cumsum(get(symbolv, retp)), get(symbolv, pnls))
#' colnames(stockweights)[2] <- "Weight"
#' colnamev <- colnames(stockweights)
#' endd <- rutils::calc_endpoints(pnls, interval="weeks")
#' dygraphs::dygraph(stockweights[endd], main="Returns and Weight") %>%
#' dyAxis("y", label=colnamev[1], independentTicks=TRUE) %>%
#' dyAxis("y2", label=colnamev[2], independentTicks=TRUE) %>%
#' dySeries(axis="y", label=colnamev[1], strokeWidth=2, col="blue") %>%
#' dySeries(axis="y2", label=colnamev[2], strokeWidth=2, col="red")
#' }
#'
#' @export
sim_portfoptim <- function(rets, dimax, lambda, lambdacov, lambdaw) {
.Call(`_HighFreq_sim_portfoptim`, rets, dimax, lambda, lambdacov, lambdaw)
}
#' Calculate the optimal portfolio weights using a variety of different
#' objective functions.
#'
#' @param \code{returns} A \emph{time series} or a \emph{matrix} of returns
#' data (the returns in excess of the risk-free rate).
#'
#' @param \code{controlv} A \emph{list} of portfolio optimization model
#' parameters (see Details).
#'
#'
#' @return A column \emph{vector} of the same length as the number of columns
#' of \code{returns}.
#'
#' @details
#' The function \code{calc_weights()} calculates the optimal portfolio
#' weights using a variety of different objective functions.
#'
#' The function \code{calc_weights()} accepts a list of portfolio
#' optimization parameters through the argument \code{controlv}.
#' The list of portfolio optimization parameters can be created using
#' the function \code{param_portf()}. Below is a description of the
#' parameters.
#'
#' If \code{method = "maxsharpe"} (the default) then \code{calc_weights()}
#' calculates the weights of the maximum Sharpe portfolio, by multiplying the
#' \emph{reduced inverse} of the \emph{covariance matrix}
#' \eqn{\strong{C}^{-1}} times the mean column returns \eqn{\bar{r}}:
#' \deqn{
#' \strong{w} = \strong{C}^{-1} \bar{r}
#' }
#'
#' If \code{method = "maxsharpemed"} then \code{calc_weights()} uses the
#' medians instead of the means.
#'
#' If \code{method = "minvarlin"} then it calculates the weights of the
#' minimum variance portfolio under linear constraint, by multiplying the
#' \emph{reduced inverse} of the \emph{covariance matrix} times the unit
#' vector:
#' \deqn{
#' \strong{w} = \strong{C}^{-1} \strong{1}
#' }
#'
#' If \code{method = "minvarquad"} then it calculates the weights of the
#' minimum variance portfolio under quadratic constraint (which is the
#' highest order principal component).
#'
#' If \code{method = "sharpem"} then it calculates the momentum weights equal
#' to the Sharpe ratios (the \code{returns} divided by their standard
#' deviations):
#' \deqn{
#' \strong{w} = \frac{\bar{r}}{\sigma}
#' }
#'
#' If \code{method = "kellym"} then it calculates the momentum weights equal
#' to the Kelly ratios (the \code{returns} divided by their variance):
#' \deqn{
#' \strong{w} = \frac{\bar{r}}{\sigma^2}
#' }
#'
#' \code{calc_weights()} calls the function \code{calc_inv()} to calculate
#' the \emph{reduced inverse} of the \emph{covariance matrix} of
#' \code{returns}. It performs regularization by selecting only the largest
#' eigenvalues equal in number to \code{dimax}.
#'
#' In addition, \code{calc_weights()} applies shrinkage to the columns of
#' \code{returns}, by shrinking their means to their common mean value:
#' \deqn{
#' r^{\prime}_i = (1 - \alpha) \, \bar{r}_i + \alpha \, \mu
#' }
#' Where \eqn{\bar{r}_i} is the mean of column \eqn{i} and \eqn{\mu} is the
#' average of all the column means.
#' The shrinkage intensity \code{alpha} determines the amount of shrinkage
#' that is applied, with \code{alpha = 0} representing no shrinkage (with the
#' column means \eqn{\bar{r}_i} unchanged), and \code{alpha = 1} representing
#' complete shrinkage (with the column means all equal to the single mean of
#' all the columns: \eqn{\bar{r}_i = \mu}).
#'
#' After the weights are calculated, they are scaled, depending on several
#' arguments.
#'
#' If \code{rankw = TRUE} then the weights are converted into their ranks.
#' The default is \code{rankw = FALSE}.
#'
#' If \code{centerw = TRUE} then the weights are centered so that their sum
#' is equal to \code{0}. The default is \code{centerw = FALSE}.
#'
#' If \code{scalew = "voltarget"} (the default) then the weights are
#' scaled (multiplied by a factor) so that the weighted portfolio has an
#' in-sample volatility equal to \code{voltarget}.
#'
#' If \code{scalew = "voleqw"} then the weights are scaled so that the
#' weighted portfolio has the same volatility as the equal weight portfolio.
#'
#' If \code{scalew = "sumone"} then the weights are scaled so that their
#' sum is equal to \code{1}.
#' If \code{scalew = "sumsq"} then the weights are scaled so that their
#' sum of squares is equal to \code{1}.
#' If \code{scalew = "none"} then the weights are not scaled.
#'
#' The function \code{calc_weights()} is written in \code{C++}
#' \code{RcppArmadillo} code.
#'
#' @examples
#' \dontrun{
#' # Calculate covariance matrix and eigen decomposition of ETF returns
#' retp <- na.omit(rutils::etfenv$returns[, 1:16])
#' ncols <- NCOL(retp)
#' eigend <- eigen(cov(retp))
#' # Calculate reduced inverse of covariance matrix
#' dimax <- 3
#' eigenvec <- eigend$vectors[, 1:dimax]
#' eigenval <- eigend$values[1:dimax]
#' invmat <- eigenvec %*% (t(eigenvec) / eigenval)
#' # Define shrinkage intensity and apply shrinkage to the mean returns
#' alpha <- 0.5
#' colmeans <- colMeans(retp)
#' colmeans <- ((1-alpha)*colmeans + alpha*mean(colmeans))
#' # Calculate weights using R
#' weightr <- drop(invmat %*% colmeans)
#' # Apply weights scaling
#' weightr <- weightr*sd(rowMeans(retp))/sd(retp %*% weightr)
#' weightr <- 0.01*weightr/sd(retp %*% weightr)
#' weightr <- weightr/sqrt(sum(weightr^2))
#' # Create a list of portfolio optimization parameters
#' controlv <- HighFreq::param_portf(method="maxsharpe", dimax=dimax, alpha=alpha, scalew="sumsq")
#' # Calculate weights using RcppArmadillo
#' weightcpp <- drop(HighFreq::calc_weights(retp, controlv=controlv))
#' all.equal(weightcpp, weightr)
#' }
#'
#' @export
calc_weights <- function(returns, controlv) {
.Call(`_HighFreq_calc_weights`, returns, controlv)
}
#' Simulate (backtest) a rolling portfolio optimization strategy, using
#' \code{RcppArmadillo}.
#'
#' @param \code{retp} A \emph{time series} or a \emph{matrix} of asset
#' returns data.
#'
#' @param \code{retx} A \emph{time series} or a \emph{matrix} of excess
#' returns data (the returns in excess of the risk-free rate).
#'
#' @param \code{controlv} A \emph{list} of portfolio optimization model
#' parameters (see Details).
#'
#' @param \code{startp} An \emph{integer vector} of start points.
#'
#' @param \code{endd} An \emph{integer vector} of end points.
#'
#' @param \code{lambda} A decay factor which multiplies the past portfolio
#' weights. (The default is \code{lambda = 0} - no memory.)
#'
#' @param \code{coeff} A \emph{numeric} multiplier of the weights. (The
#' default is \code{1})
#'
#' @param \code{bidask} A \emph{numeric} bid-ask spread (the default is
#' \code{0})
#'
#'
#' @return A column \emph{vector} of strategy returns, with the same length as
#' the number of rows of \code{retp}.
#'
#' @details
#' The function \code{back_test()} performs a backtest simulation of a
#' rolling portfolio optimization strategy over a \emph{vector} of the end
#' points \code{endd}.
#'
#' It performs a loop over the end points \code{endd}, and subsets the
#' \emph{matrix} of the excess asset returns \code{retx} along its rows,
#' between the corresponding \emph{start point} and the \emph{end point}.
#'
#' The function \code{back_test()} passes the subset matrix of excess returns
#' into the function \code{calc_weights()}, which calculates the optimal
#' portfolio weights at each \emph{end point}.
#' It also passes to \code{calc_weights()} the argument \code{controlv},
#' which is the list of portfolio optimization parameters.
#' See the function \code{calc_weights()} for more details.
#' The list of portfolio optimization parameters can be created using the
#' function \code{param_portf()}.
#'
#' The function \code{back_test()} then recursively averages the weights
#' \eqn{w_i} at the \emph{end point = i} with the weights \eqn{w_{i-1}} from
#' the previous \emph{end point = (i-1)}, using the decay factor \code{lambda
#' = \eqn{\lambda}}:
#' \deqn{
#' w_i = (1-\lambda) w_i + \lambda w_{i-1}
#' }
#' The purpose of averaging the weights is to reduce their variance, and
#' improve their out-of-sample performance. It is equivalent to extending
#' the portfolio holding period beyond the time interval between neighboring
#' \emph{end points}.
#'
#' The function \code{back_test()} then calculates the out-of-sample strategy
#' returns by multiplying the average weights times the future asset returns.
#'
#' The function \code{back_test()} multiplies the out-of-sample strategy
#' returns by the coefficient \code{coeff} (with default equal to \code{1}),
#' which allows simulating either a trending strategy (if \code{coeff = 1}),
#' or a reverting strategy (if \code{coeff = -1}).
#'
#' The function \code{back_test()} calculates the transaction costs by
#' multiplying the bid-ask spread \code{bidask} times the absolute
#' difference between the current weights minus the weights from the previous
#' period. Then it subtracts the transaction costs from the out-of-sample
#' strategy returns.
#'
#' The function \code{back_test()} returns a \emph{time series} (column
#' \emph{vector}) of strategy returns, of the same length as the number of
#' rows of \code{retp}.
#'
#' @examples
#' \dontrun{
#' # Calculate the ETF daily excess returns
#' retp <- na.omit(rutils::etfenv$returns[, 1:16])
#' # riskf is the daily risk-free rate
#' riskf <- 0.03/260
#' retx <- retp - riskf
#' # Define monthly end points without initial warmup period
#' endd <- rutils::calc_endpoints(retp, interval="months")
#' endd <- endd[endd > 0]
#' nrows <- NROW(endd)
#' # Define 12-month look-back interval and start points over sliding window
#' lookb <- 12
#' startp <- c(rep_len(1, lookb-1), endd[1:(nrows-lookb+1)])
#' # Define return shrinkage and dimension reduction
#' alpha <- 0.5
#' dimax <- 3
#' # Create a list of portfolio optimization parameters
#' controlv <- HighFreq::param_portf(method="maxsharpe", dimax=dimax, alpha=alpha, scalew="sumsq")
#' # Simulate a monthly rolling portfolio optimization strategy
#' pnls <- HighFreq::back_test(retx, retp, controlv=controlv, startp=(startp-1), endd=(endd-1))
#' pnls <- xts::xts(pnls, index(retp))
#' colnames(pnls) <- "strategy"
#' # Plot dygraph of strategy
#' dygraphs::dygraph(cumsum(pnls),
#' main="Cumulative Returns of Max Sharpe Portfolio Strategy")
#' }
#'
#' @export
back_test <- function(retx, retp, controlv, startp, endd, lambda = 0.0, coeff = 1.0, bidask = 0.0) {
.Call(`_HighFreq_back_test`, retx, retp, controlv, startp, endd, lambda, coeff, bidask)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.