R/RcppExports.R

Defines functions back_test calc_weights sim_portfoptim lik_garch sim_df sim_ar sim_schwartz sim_ou sim_garch roll_moment roll_zscores run_scale roll_scale roll_reg roll_kurtosis roll_skew roll_var_ohlc roll_var roll_varvec roll_mean calc_reg calc_lm calc_hurst_ohlc calc_hurst calc_kurtosis calc_skew calc_var_ohlc_ag calc_var_ohlc calc_var_ag calc_covar calc_var calc_varvec calc_mean run_reg run_autocovar run_covar push_sga push_eigen push_covar push_cov2cor run_var_ohlc run_zscores run_var run_min run_max run_mean roll_sumw roll_sumep roll_sum roll_conv roll_ohlc agg_ohlc calc_scale calc_invref calc_invrec calc_invsvd calc_inv calc_eigenp calc_eigen mult_mat_ref mult_mat remove_dup calc_ranks_stl calc_ranks decode_it encode_it roll_count calc_startpoints calc_endpoints diffit diff_vec lagit lag_vec param_portf param_reg

Documented in agg_ohlc back_test calc_covar calc_eigen calc_eigenp calc_endpoints calc_hurst calc_hurst_ohlc calc_inv calc_invrec calc_invref calc_invsvd calc_kurtosis calc_lm calc_mean calc_ranks calc_ranks_stl calc_reg calc_scale calc_skew calc_startpoints calc_var calc_var_ag calc_var_ohlc calc_var_ohlc_ag calc_varvec calc_weights decode_it diffit diff_vec encode_it lagit lag_vec lik_garch mult_mat mult_mat_ref param_portf param_reg push_cov2cor push_covar push_eigen push_sga roll_conv roll_count roll_kurtosis roll_mean roll_moment roll_ohlc roll_reg roll_scale roll_skew roll_sum roll_sumep roll_sumw roll_var roll_var_ohlc roll_varvec roll_zscores run_autocovar run_covar run_max run_mean run_min run_reg run_scale run_var run_var_ohlc run_zscores sim_ar sim_df sim_garch sim_ou sim_portfoptim sim_schwartz

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' Calculate a pointer to a moment function from the function name (string).
#' 
#' @param \code{funame} A \emph{character} \emph{string} specifying the
#'   function name.
#'   
#' @return A pointer to a moment function.
#'
#' @details
#'   The function \code{calc_momptr()} calculates a pointer to a moment
#'   function from the function name (string).
#'   The function pointer is used internally in the \code{C++} code, but it's
#'   not exported to \code{R}.
#'   A moment function takes three arguments:
#'   \itemize{
#'     \item A \emph{time series} or a \emph{matrix} of data,
#'     \item A \emph{character string} specifying the type of the moment,
#'     \item A number specifying the confidence level for calculating the
#'   quantiles of returns.
#'   The function name must be one of the following:
#'   \itemize{
#'     \item "calc_mean" for the estimator of the mean (location),
#'     \item "calc_var" for the estimator of the dispersion (variance),
#'     \item "calc_skew" for the estimator of the skewness,
#'     \item "calc_kurtosis" for the estimator of the kurtosis.
#'    }
#'    (The default is the \code{funame = "calc_mean"}.)
#'    }
#'
#' @export
NULL

#' Create a named list of model parameters that can be passed into regression
#' and machine learning functions.
#' 
#' @param \code{method} A \emph{character string} specifying the type of
#'   regression model (the default is \code{method = "least_squares"}).
#'   
#' @param \code{intercept} A \emph{Boolean} specifying whether an intercept
#'   term should be added to the predictor (the default is \code{intercept =
#'   TRUE}).
#'
#' @param \code{singmin} A \emph{numeric} threshold level for discarding
#'   small \emph{singular values} in order to regularize the inverse of the
#'   predictor matrix (the default is \code{1e-5}).
#'   
#' @param \code{dimax} An \emph{integer} equal to the number of \emph{singular
#'   values} used for calculating the \emph{reduced inverse} of the
#'   predictor matrix (the default is \code{dimax = 0} - standard matrix
#'   inverse using all the \emph{singular values}).
#'   
#' @param \code{confl} The confidence level for calculating the quantiles of
#'   returns (the default is \code{confl = 0.75}).
#'
#' @param \code{alpha} The shrinkage intensity of \code{returns} (with values
#'   between \code{0} and \code{1} - the default is \code{0}).
#'   
#' @return A named list of model parameters that can be passed into regression
#' and machine learning functions.
#'   
#' @details
#'   The function \code{param_reg()} creates a named list of model parameters
#'   that can be passed into regression and machine learning functions.  For
#'   example into the functions \code{calc_reg()} and \code{roll_reg()}.
#'   
#'   The function \code{param_reg()} simplifies the creation of regression
#'   parameter lists.  The users can create a parameter list with the default
#'   values, or they can specify custom parameter values.
#'
#' @examples
#' \dontrun{
#' # Create a default list of regression parameters
#' controlv <- HighFreq::param_reg()
#' unlist(controlv)
#' # Create a custom list of regression parameters
#' controlv <- HighFreq::param_reg(intercept=FALSE, method="regular", dimax=4)
#' unlist(controlv)
#' }
#' 
#' @export
param_reg <- function(regmod = "least_squares", intercept = TRUE, singmin = 1e-5, dimax = 0L, residscale = "none", confl = 0.1, alpha = 0.0) {
    .Call(`_HighFreq_param_reg`, regmod, intercept, singmin, dimax, residscale, confl, alpha)
}

#' Create a named list of model parameters that can be passed into portfolio
#' optimization functions.
#' 
#' @param \code{method} A \emph{character string} specifying the method for
#'   calculating the portfolio weights (the default is \code{method =
#'   "sharpem"}).
#'   
#' @param \code{singmin} A \emph{numeric} threshold level for discarding
#'   small \emph{singular values} in order to regularize the inverse of the
#'   \code{covariance matrix} of \code{returns} (the default is \code{1e-5}).
#'   
#' @param \code{dimax} An \emph{integer} equal to the number of \emph{singular
#'   values} used for calculating the \emph{reduced inverse} of the
#'   \code{covariance matrix} of \code{returns} matrix (the default is
#'   \code{dimax = 0} - standard matrix inverse using all the \emph{singular
#'   values}).
#'   
#' @param \code{confl} The confidence level for calculating the quantiles of
#'   returns (the default is \code{confl = 0.75}).
#'
#' @param \code{alpha} The shrinkage intensity of \code{returns} (with values
#'   between \code{0} and \code{1} - the default is \code{0}).
#' 
#' @param \code{rankw} A \emph{Boolean} specifying whether the weights should
#'   be ranked (the default is \code{rankw = FALSE}).
#'
#' @param \code{centerw} A \emph{Boolean} specifying whether the weights should
#'   be centered (the default is \code{centerw = FALSE}).
#'
#' @param \code{scalew} A \emph{character string} specifying the method for
#'   scaling the weights (the default is \code{scalew = "voltarget"}).
#'
#' @param \code{voltarget} A \emph{numeric} volatility target for scaling the
#'   weights (the default is \code{0.01})
#' 
#' 
#' @return A named list of model parameters that can be passed into portfolio
#'   optimization functions.
#'
#' @details
#'   The function \code{param_portf()} creates a named list of model parameters
#'   that can be passed into portfolio optimization functions.  For example
#'   into the functions \code{calc_weights()} and \code{back_test()}.
#'   See the function \code{calc_weights()} for more details.
#'   
#'   The function \code{param_portf()} simplifies the creation of portfolio
#'   optimization parameter lists.  The users can create a parameter list with
#'   the default values, or they can specify custom parameter values.
#'
#' @examples
#' \dontrun{
#' # Create a default list of portfolio optimization parameters
#' controlv <- HighFreq::param_portf()
#' unlist(controlv)
#' # Create a custom list of portfolio optimization parameters
#' controlv <- HighFreq::param_portf(method="regular", dimax=4)
#' unlist(controlv)
#' }
#' 
#' @export
param_portf <- function(method = "sharpem", singmin = 1e-5, dimax = 0L, confl = 0.1, alpha = 0.0, rankw = FALSE, centerw = FALSE, scalew = "voltarget", voltarget = 0.01) {
    .Call(`_HighFreq_param_portf`, method, singmin, dimax, confl, alpha, rankw, centerw, scalew, voltarget)
}

#' Apply a lag to a single-column \emph{time series} or a \emph{vector} 
#' using \code{RcppArmadillo}.
#' 
#' @param \code{tseries} A single-column \emph{time series} or a
#'   \emph{vector}.
#'
#' @param \code{lagg} An \emph{integer} equal to the number of periods to lag.
#'   (The default is \code{lagg = 1}.)
#'
#' @param \code{pad_zeros} \emph{Boolean} argument: Should the output be padded
#'   with zeros? (The default is \code{pad_zeros = TRUE}.)
#'
#' @return A column \emph{vector} with the same number of elements as the input
#'   time series.
#'
#' @details
#'   The function \code{lag_vec()} applies a lag to the input \emph{time
#'   series} \code{tseries} by shifting its elements by the number equal to the
#'   argument \code{lagg}.  For positive \code{lagg} values, the elements are
#'   shifted forward in time (down), and for negative \code{lagg} values they
#'   are shifted backward (up).
#'   
#'   The output \emph{vector} is padded with either zeros (the default), or
#'   with data from \code{tseries}, so that it has the same number of element
#'   as \code{tseries}.
#'   If the \code{lagg} is positive, then the first element is copied and added
#'   upfront.
#'   If the \code{lagg} is negative, then the last element is copied and added
#'   to the end.
#'   
#'   As a rule, if \code{tseries} contains returns data, then the output
#'   \emph{matrix} should be padded with zeros, to avoid data snooping.
#'   If \code{tseries} contains prices, then the output \emph{matrix} should
#'   be padded with the prices.
#'
#' @examples
#' \dontrun{
#' # Create a vector of random returns
#' retp <- rnorm(1e6)
#' # Compare lag_vec() with rutils::lagit()
#' all.equal(drop(HighFreq::lag_vec(retp)), 
#'   rutils::lagit(retp))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::lag_vec(retp),
#'   Rcode=rutils::lagit(retp),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
lag_vec <- function(tseries, lagg = 1L, pad_zeros = TRUE) {
    .Call(`_HighFreq_lag_vec`, tseries, lagg, pad_zeros)
}

#' Apply a lag to the rows of a \emph{time series} or a \emph{matrix} using
#' \code{RcppArmadillo}.
#' 
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#' 
#' @param \code{lagg} An \emph{integer} equal to the number of periods to lag
#'   (the default is \code{lagg = 1}).
#'
#' @param \code{pad_zeros} \emph{Boolean} argument: Should the output be padded
#'   with zeros? (The default is \code{pad_zeros = TRUE}.)
#'   
#' @return A \emph{matrix} with the same dimensions as the input argument
#'   \code{tseries}.
#'
#' @details
#'   The function \code{lagit()} applies a lag to the input \emph{matrix} by
#'   shifting its rows by the number equal to the argument \code{lagg}. For
#'   positive \code{lagg} values, the rows are shifted \emph{forward} (down),
#'   and for negative \code{lagg} values they are shifted \emph{backward} (up).
#'   
#'   The output \emph{matrix} is padded with either zeros (the default), or
#'   with rows of data from \code{tseries}, so that it has the same dimensions
#'   as \code{tseries}.
#'   If the \code{lagg} is positive, then the first row is copied and added
#'   upfront.
#'   If the \code{lagg} is negative, then the last row is copied and added
#'   to the end.
#'   
#'   As a rule, if \code{tseries} contains returns data, then the output
#'   \emph{matrix} should be padded with zeros, to avoid data snooping.
#'   If \code{tseries} contains prices, then the output \emph{matrix} should
#'   be padded with the prices.
#'
#' @examples
#' \dontrun{
#' # Create a matrix of random returns
#' retp <- matrix(rnorm(5e6), nc=5)
#' # Compare lagit() with rutils::lagit()
#' all.equal(HighFreq::lagit(retp), rutils::lagit(retp))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::lagit(retp),
#'   Rcode=rutils::lagit(retp),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
lagit <- function(tseries, lagg = 1L, pad_zeros = TRUE) {
    .Call(`_HighFreq_lagit`, tseries, lagg, pad_zeros)
}

#' Calculate the differences between the neighboring elements of a
#' single-column \emph{time series} or a \emph{vector}.
#' 
#' @param \code{tseries} A single-column \emph{time series} or a \emph{vector}.
#' 
#' @param \code{lagg} An \emph{integer} equal to the number of time periods to
#'   lag when calculating the differences (the default is \code{lagg = 1}).
#'   
#' @param \code{pad_zeros} \emph{Boolean} argument: Should the output
#'   \emph{vector} be padded (extended) with zeros, in order to return a
#'   \emph{vector} of the same length as the input? (the default is
#'   \code{pad_zeros = TRUE})
#'
#' @return A column \emph{vector} containing the differences between the
#'   elements of the input vector.
#'
#' @details
#'   The function \code{diff_vec()} calculates the differences between the
#'   input \emph{time series} or \emph{vector} and its lagged version.
#'   
#'   The argument \code{lagg} specifies the number of lags.  For example, if
#'   \code{lagg=3} then the differences will be taken between each element
#'   minus the element three time periods before it (in the past).  The default
#'   is \code{lagg = 1}.
#' 
#'   The argument \code{pad_zeros} specifies whether the output \emph{vector}
#'   should be padded (extended) with zeros at the front, in order to
#'   return a \emph{vector} of the same length as the input.  The default is
#'   \code{pad_zeros = TRUE}. The padding operation can be time-consuming,
#'   because it requires the copying the data in memory.
#'   
#'   The function \code{diff_vec()} is implemented in \code{RcppArmadillo}
#'   \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Create a vector of random returns
#' retp <- rnorm(1e6)
#' # Compare diff_vec() with rutils::diffit()
#' all.equal(drop(HighFreq::diff_vec(retp, lagg=3, pad=TRUE)),
#'   rutils::diffit(retp, lagg=3))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::diff_vec(retp, lagg=3, pad=TRUE),
#'   Rcode=rutils::diffit(retp, lagg=3),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
diff_vec <- function(tseries, lagg = 1L, pad_zeros = TRUE) {
    .Call(`_HighFreq_diff_vec`, tseries, lagg, pad_zeros)
}

#' Calculate the row differences of a \emph{time series} or a \emph{matrix}
#' using \emph{RcppArmadillo}.
#' 
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#' 
#' @param \code{lagg} An \emph{integer} equal to the number of rows (time
#'   periods) to lag when calculating the differences (the default is
#'   \code{lagg = 1}).
#'   
#' @param \code{pad_zeros} \emph{Boolean} argument: Should the output
#'   \emph{matrix} be padded (extended) with zero values, in order to return a
#'   \emph{matrix} with the same number of rows as the input? (the default is
#'   \code{pad_zeros = TRUE})
#'
#' @return A \emph{matrix} containing the differences between the rows of the
#'   input \emph{matrix} \code{tseries}.
#'
#' @details
#'   The function \code{diffit()} calculates the differences between the rows
#'   of the input \emph{matrix} \code{tseries} and its lagged version.
#'   
#'   The argument \code{lagg} specifies the number of lags applied to the rows
#'   of the lagged version of \code{tseries}. 
#'   For positive \code{lagg} values, the lagged version of \code{tseries} has
#'   its rows shifted \emph{forward} (down) by the number equal to \code{lagg}
#'   rows. For negative \code{lagg} values, the lagged version of
#'   \code{tseries} has its rows shifted \emph{backward} (up) by the number
#'   equal to \code{-lagg} rows.
#'   For example, if \code{lagg=3} then the lagged version will have its rows
#'   shifted down by \code{3} rows, and the differences will be taken between
#'   each row minus the row three time periods before it (in the past). The
#'   default is \code{lagg = 1}.
#' 
#'   The argument \code{pad_zeros} specifies whether the output \emph{matrix}
#'   should be padded (extended) with zero values in order to return a
#'   \emph{matrix} with the same number of rows as the input \code{tseries}.
#'   The default is \code{pad_zeros = TRUE}. If \code{pad_zeros = FALSE} then
#'   the return \emph{matrix} has a smaller number of rows than the input
#'   \code{tseries}. The padding operation can be time-consuming, because it
#'   requires the copying the data in memory.
#'   
#'   The function \code{diffit()} is implemented in \code{RcppArmadillo}
#'   \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Create a matrix of random data
#' datav <- matrix(sample(15), nc=3)
#' # Calculate differences with lagged rows
#' HighFreq::diffit(datav, lagg=2)
#' # Calculate differences with advanced rows
#' HighFreq::diffit(datav, lagg=-2)
#' # Compare HighFreq::diffit() with rutils::diffit()
#' all.equal(HighFreq::diffit(datav, lagg=2), 
#'   rutils::diffit(datav, lagg=2), 
#'   check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::diffit(datav, lagg=2),
#'   Rcode=rutils::diffit(datav, lagg=2),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
diffit <- function(tseries, lagg = 1L, pad_zeros = TRUE) {
    .Call(`_HighFreq_diffit`, tseries, lagg, pad_zeros)
}

#' Calculate a vector of end points that divides an integer time sequence of
#' time periods into equal time intervals.
#'
#' @param \code{length} An \emph{integer} equal to the length of the time
#'   sequence to be divided into equal intervals.
#'   
#' @param \code{step} The number of time periods in each interval between
#'   neighboring end points (the default is \code{step = 1}).
#' 
#' @param \code{stub} An \emph{integer} equal to the first non-zero end point
#'   (the default is \code{stub = 0}).
#'
#' @param \code{stubs} A \emph{Boolean} specifying whether to include stub
#'   intervals (the default is \code{stubs = TRUE}).
#'
#' @return A vector of equally spaced \emph{integers} representing the end
#'   points.
#'
#' @details
#'   The end points are a vector of integers which divide the sequence of time
#'   periods of length equal to \code{length} into equally spaced time
#'   intervals.
#'   The number of time periods between neighboring end points is equal to the
#'   argument \code{step}.
#'   If a whole number of intervals doesn't fit over the whole sequence, then
#'   \code{calc_endpoints()} adds a stub interval at the end.
#'   A stub interval is one where the number of periods between neighboring end
#'   points is less than the argument \code{step}.
#'   
#'   If \code{stubs = TRUE} (the default) then the first end point is 
#'   equal to \code{0} (since indexing in \code{C++} code starts at \code{0}).
#'   The first non-zero end point is equal to \code{step} or \code{stub} (if
#'   it's not zero).
#'   If \code{stub = 0} (the default) then the first end point is equal to
#'   \code{0} (even if \code{stubs = FALSE}).
#'   If \code{stubs = TRUE} (the default) then the last end point is always
#'   equal to \code{length-1}.
#'   The argument \code{stub} should be less than the \code{step}: \code{stub <
#'   step}.
#'   
#'   If \code{step = 1} and \code{stub = 0} (the default), then the vector of
#'   end points is simply equal to:
#'   \deqn{
#'     \{ 0, 1, 2, ..., length - 1 \}
#'   }
#'
#'   If \code{stub = 0} (the default) and \code{stubs = TRUE} (the default)
#'   then the vector of end points is equal to:
#'   \deqn{
#'     \{ 0, step, 2*step, ..., length - 1 \}
#'   }
#'   
#'   If \code{stub = 0} (the default) and \code{stubs = FALSE} then the vector
#'   of end points is equal to:
#'   \deqn{
#'     \{ 0, step, 2*step, ..., n*step \}
#'   }
#'   
#'   If \code{stub > 0} and \code{stubs = TRUE} (the default), then the vector
#'   of end points is equal to:
#'   \deqn{
#'     \{ 0, stub, stub + step, ..., length - 1 \}
#'   }
#'   
#'   For example, the end points for \code{length = 20}, divided into intervals
#'   of \code{step = 5} are equal to: \code{0, 5, 10, 15, 19}.
#'   
#'   If \code{stub = 1} then the first non-zero end point is equal to \code{1}
#'   and the end points are equal to: \code{0, 1, 6, 11, 16, 19}.
#'   The stub interval at the beginning is equal to \code{2} (including
#'   \code{0} and \code{1}).
#'   The stub interval at the end is equal to \code{3 = 19 - 16}.
#'   
#'   The end points for \code{length = 21} divided into intervals of length
#'   \code{step = 5}, with \code{stub = 0}, are equal to: \code{0, 5, 10, 15,
#'   20}.
#'   The beginning interval is equal to \code{5}.
#'   The end interval is equal to \code{5 = 20 - 15}.
#'   
#'   If \code{stub = 1} then the first non-zero end point is equal to \code{1}
#'   and the end points are equal to: \code{0, 1, 6, 11, 16, 20}.
#'   The beginning stub interval is equal to \code{2}.
#'   The end stub interval is equal to \code{4 = 20 - 16}.
#'   
#'   The function \code{calc_endpoints()} is similar to the function
#'   \code{rutils::calc_endpoints()} from package
#'   \href{https://github.com/algoquant/rutils}{rutils}.
#'   
#'   But the end points are shifted by \code{-1} compared to \code{R} code
#'   because indexing starts at \code{0} in \code{C++} code, while it starts at
#'   \code{1} in \code{R} code. So if \code{calc_endpoints()} is used in
#'   \code{R} code then \code{1} should be added to it.
#'   
#' @examples
#' # Calculate the end points without a stub interval
#' HighFreq::calc_endpoints(length=20, step=5)
#' # Calculate the end points with a final stub interval
#' HighFreq::calc_endpoints(length=23, step=5)
#' # Calculate the end points with initial and final stub intervals
#' HighFreq::calc_endpoints(length=20, step=5, stub=2)
#'
#' @export
calc_endpoints <- function(length, step = 1L, stub = 0L, stubs = TRUE) {
    .Call(`_HighFreq_calc_endpoints`, length, step, stub, stubs)
}

#' Calculate a vector of start points by lagging (shifting) a vector of end
#' points.
#'
#' @param \code{endd} An \emph{integer} vector of end points.
#'   
#' @param \code{lookb} The length of the look-back interval, equal to the
#'   lag (shift) applied to the end points.
#'   
#' @return An \emph{integer} vector with the same number of elements as the
#'   vector \code{endd}.
#'
#' @details
#'   The start points are equal to the values of the vector \code{endd} lagged
#'   (shifted) by an amount equal to \code{lookb}.  In addition, an extra
#'   value of \code{1} is added to them, to avoid data overlaps.  The lag
#'   operation requires appending a beginning warmup interval containing zeros,
#'   so that the vector of start points has the same length as the \code{endd}.
#'   
#'   For example, consider the end points for a vector of length \code{25}
#'   divided into equal intervals of length \code{5}: \code{4, 9, 14, 19, 24}.
#'   (In \code{C++} the vector indexing starts at \code{0} not \code{1}, so
#'   it's shifted by \code{-1}.)
#'   Then the start points for \code{lookb = 2} are equal to: \code{0, 0, 
#'   5, 10, 15}.  The differences between the end points minus the
#'   corresponding start points are equal to \code{9}, except for the warmup
#'   interval.
#'   
#' @examples
#' # Calculate end points
#' endd <- HighFreq::calc_endpoints(length=55, step=5)
#' # Calculate start points corresponding to the end points
#' startp <- HighFreq::calc_startpoints(endd, lookb=5)
#'
#' @export
calc_startpoints <- function(endd, lookb) {
    .Call(`_HighFreq_calc_startpoints`, endd, lookb)
}

#' Count the number of consecutive \code{TRUE} elements in a Boolean vector,
#' and reset the count to zero after every \code{FALSE} element.
#' 
#' @param \code{tseries} A \emph{Boolean vector} of data.
#'
#' @return An \emph{integer vector} of the same length as the argument
#'   \code{tseries}.
#'
#' @details
#'   The function \code{roll_count()} calculates the number of consecutive
#'   \code{TRUE} elements in a Boolean vector, and it resets the count to zero
#'   after every \code{FALSE} element.
#'   
#'   For example, the Boolean vector {\code{FALSE}, \code{TRUE}, \code{TRUE},
#'   \code{FALSE}, \code{FALSE}, \code{TRUE}, \code{TRUE}, \code{TRUE},
#'   \code{TRUE}, \code{TRUE}, \code{FALSE}}, is translated into {\code{0},
#'   \code{1}, \code{2}, \code{0}, \code{0}, \code{1}, \code{2}, \code{3},
#'   \code{4}, \code{5}, \code{0}}.
#'   
#' @examples
#' \dontrun{
#' # Calculate the number of consecutive TRUE elements
#' drop(HighFreq::roll_count(c(FALSE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE)))
#' }
#' @export
roll_count <- function(tseries) {
    .Call(`_HighFreq_roll_count`, tseries)
}

#' Calculate the run length encoding of a single-column \emph{time series},
#' \emph{matrix}, or a \emph{vector}.
#' 
#' @param \code{tseries} A single-column \emph{time series}, \emph{matrix}, or
#'   a \emph{vector}.
#'
#' @return A \emph{list} with two \emph{vectors}: a \emph{vector} of encoded
#'   data and an \emph{integer vector} of data counts (repeats).
#'
#' @details
#'   The function \code{encode_it()} calculates the run length encoding of a
#'   single-column \emph{time series}, \emph{matrix}, or a \emph{vector}.
#'   
#'   The run length encoding of a \emph{vector} consists of two \emph{vectors}:
#'   a \emph{vector} of encoded data (consecutive data values) and of an
#'   \emph{integer vector} of the data counts (the number of times the same
#'   value repeats in succession).
#'   
#'   Run length encoding (RLE) is a data compression algorithm which encodes
#'   the data in two \emph{vectors}: the consecutive data values and their
#'   counts.  If a data value occurs several times in succession then it is
#'   recorded only once and its corresponding count is equal to the number of
#'   times it occurs. Run-length encoding is different from a contingency
#'   table.
#'   
#' @examples
#' \dontrun{
#' # Create a vector of data
#' datav <- sample(5, 31, replace=TRUE)
#' # Calculate the run length encoding of datav
#' HighFreq::encode_it(datav)
#' }
#' 
#' @export
encode_it <- function(tseries) {
    .Call(`_HighFreq_encode_it`, tseries)
}

#' Calculate the \emph{vector} of data from its run length encoding.
#' 
#' @param \code{encodel} A \emph{list} with two \emph{vectors}: a \emph{numeric
#'   vector} of encoded data and an \emph{integer vector} of data counts
#'   (repeats).
#'
#' @return A \code{numeric vector}.
#' 
#' @details
#'   The function \code{decode_it()} the \emph{vector} of data from its run
#'   length encoding.
#'   
#'   The run length encoding of a \emph{vector} consists of two \emph{vectors}:
#'   a \emph{numeric vector} of encoded data (consecutive data values) and of
#'   an \emph{integer vector} of the data counts (the number of times the same
#'   value repeats in succession).
#'   
#'   Run length encoding (RLE) is a data compression algorithm which encodes
#'   the data in two \emph{vectors}: the consecutive data values and their
#'   counts.  If a data value occurs several times in succession then it is
#'   recorded only once and its corresponding count is equal to the number of
#'   times it occurs. Run-length encoding is different from a contingency
#'   table.
#'   
#' @examples
#' \dontrun{
#' # Create a vector of data
#' datav <- sample(5, 31, replace=TRUE)
#' # Calculate the run length encoding of datav
#' rle <- HighFreq::encode_it(datav)
#' # Decode the data from its run length encoding
#' decodev <- HighFreq::decode_it(rle)
#' all.equal(datav, decodev)
#' }
#' 
#' @export
decode_it <- function(encodel) {
    .Call(`_HighFreq_decode_it`, encodel)
}

#' Calculate the ranks of the elements of a single-column \emph{time series},
#' \emph{matrix}, or a \emph{vector} using \code{RcppArmadillo}.
#' 
#' @param \code{tseries} A single-column \emph{time series}, \emph{matrix}, or
#'   a \emph{vector}.
#'
#' @return An \emph{integer vector} with the ranks of the elements of the
#'   \code{tseries}.
#'
#' @details
#'   The function \code{calc_ranks()} calculates the ranks of the elements of a
#'   single-column \emph{time series}, \emph{matrix}, or a \emph{vector}. 
#'   
#'   The permutation index is an integer vector which sorts a given vector into
#'   ascending order. 
#'   The permutation index of the permutation index is the \emph{reverse}
#'   permutation index, because it sorts the vector from ascending order back
#'   into its original unsorted order.
#'   The ranks of the elements are equal to the \emph{reverse} permutation
#'   index. The function \code{calc_ranks()} calculates the \emph{reverse}
#'   permutation index.
#'   
#'   The ranks produced by \code{calc_ranks()} start at zero, following the 
#'   \code{C++} convention.
#'   
#'   The \code{Armadillo} function \code{arma::sort_index()} calculates the
#'   permutation index which sorts a given vector into an ascending order.
#'   Applying the function \code{arma::sort_index()} twice:\cr
#'   \code{arma::sort_index(arma::sort_index())},\cr
#'   calculates the \emph{reverse} permutation index to sort the vector from
#'   ascending order back into its original unsorted order.
#'   
#'   The function \code{calc_ranks()} calls the \code{Armadillo} function
#'   \code{arma::sort_index()} twice to calculate the \emph{reverse}
#'   permutation index, to sort the vector from ascending order back into its
#'   original unsorted order.
#'   
#' @examples
#' \dontrun{
#' # Create a vector of data
#' datav <- rnorm(1e3)
#' # Calculate the ranks of the elements using R code and RcppArmadillo
#' all.equal(rank(datav), drop(HighFreq::calc_ranks(datav))+1)
#' # Compare the speed of R code with RcppArmadillo
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcode=rank(datav),
#'   Rcpp=calc_ranks(datav),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
calc_ranks <- function(tseries) {
    .Call(`_HighFreq_calc_ranks`, tseries)
}

#' Calculate the ranks of the elements of a single-column \emph{time series},
#' \emph{matrix}, or a \emph{vector} using \code{RcppArmadillo}.
#' 
#' @param \code{tseries} A single-column \emph{time series}, \emph{matrix}, or
#'   a \emph{vector}.
#'
#' @return An \emph{integer vector} with the ranks of the elements of
#'   \code{tseries}.
#'
#' @details
#'   The function \code{calc_ranks_stl()} calculates the ranks of the elements
#'   of a single-column \emph{time series}, \emph{matrix}, or a \emph{vector}.
#'   The function \code{calc_ranks_stl()} is slightly faster than the function
#'   \code{calc_ranks()}.
#'   
#'   The permutation index is an integer vector which sorts a given vector into
#'   ascending order. 
#'   The permutation index of the permutation index is the \emph{reverse}
#'   permutation index, because it sorts the vector from ascending order back
#'   into its original unsorted order.
#'   The ranks of the elements are equal to the \emph{reverse} permutation
#'   index. The function \code{calc_ranks()} calculates the \emph{reverse}
#'   permutation index.
#'   
#'   The ranks produced by \code{calc_ranks_stl()} start at zero, following the
#'   \code{C++} convention.
#'
#'   The \code{STL} \code{C++} function \code{std::sort()} sorts a vector into
#'   ascending order. It can also be used to calculate the permutation index
#'   which sorts the vector into an ascending order.
#'   
#'   The function \code{calc_ranks_stl()} calls the function \code{std::sort()}
#'   twice:
#'   First, it calculates the permutation index which sorts the vector
#'   \code{tseries} into ascending order.
#'   Second, it calculates the permutation index of the permutation index,
#'   which are the ranks (the \emph{reverse} permutation index) of the vector
#'   \code{tseries}.
#' 
#' @examples
#' \dontrun{
#' # Create a vector of data
#' datav <- rnorm(1e3)
#' # Calculate the ranks of the elements using R code and RcppArmadillo
#' all.equal(rank(datav), drop(HighFreq::calc_ranks_stl(datav))+1)
#' # Compare the speed of R code with RcppArmadillo
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcode=rank(datav),
#'   Rcpp=HighFreq::calc_ranks_stl(datav),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
calc_ranks_stl <- function(tseries) {
    .Call(`_HighFreq_calc_ranks_stl`, tseries)
}

remove_dup <- function(stringv) {
    .Call(`_HighFreq_remove_dup`, stringv)
}

#' Multiply element-wise the rows or columns of a \emph{matrix} times a
#' \emph{vector}.
#' 
#' @param \code{vector} A \emph{numeric} \emph{vector}.
#' 
#' @param \code{matrix} A \emph{numeric} \emph{matrix}.
#' 
#' @param \code{byrow} A \emph{Boolean} argument: if \code{TRUE} then multiply
#'   the rows of \code{matrix} by \code{vector}, otherwise multiply the columns
#'   (the default is \code{byrow = TRUE}.)
#' 
#' @return A \emph{matrix} equal to the product of the elements of
#'   \code{matrix} times \code{vector}, with the same dimensions as the
#'   argument \code{matrix}.
#' 
#' @details
#'   The function \code{mult_mat()} multiplies element-wise the rows or columns
#'   of a \emph{matrix} times a \emph{vector}.
#'
#'   If \code{byrow = TRUE} (the default), then function \code{mult_mat()}
#'   multiplies the rows of the argument \code{matrix} times the argument
#'   \code{vector}.
#'   Otherwise it multiplies the columns of \code{matrix}.
#' 
#'   In \code{R}, \emph{matrix} multiplication is performed by columns.
#'   Performing multiplication by rows is often required, for example when
#'   multiplying asset returns by portfolio weights.
#'   But performing multiplication by rows requires explicit loops in \code{R},
#'   or it requires \emph{matrix} transpose.  And both are slow.
#'
#'   The function \code{mult_mat()} uses \code{RcppArmadillo} \code{C++}
#'   code, so when multiplying large \emph{matrix} columns it's several times
#'   faster than vectorized \code{R} code, and it's even much faster compared
#'   to \code{R} when multiplying the \emph{matrix} rows.
#' 
#'   The function \code{mult_mat()} performs loops over the \emph{matrix} rows
#'   and columns using the \code{Armadillo} operators \code{each_row()} and
#'   \code{each_col()}, instead of performing explicit \code{for()} loops (both
#'   methods are equally fast).
#'   
#' @examples
#' \dontrun{
#' # Create vector and matrix data
#' matrixv <- matrix(round(runif(25e4), 2), nc=5e2)
#' vectorv <- round(runif(5e2), 2)
#' 
#' # Multiply the matrix rows using R
#' matrixr <- t(vectorv*t(matrixv))
#' # Multiply the matrix rows using C++
#' matrixp <- HighFreq::mult_mat(vectorv, matrixv, byrow=TRUE)
#' all.equal(matrixr, matrixp)
#' # Compare the speed of Rcpp with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'     Rcpp=HighFreq::mult_mat(vectorv, matrixv, byrow=TRUE),
#'     Rcode=t(vectorv*t(matrixv)),
#'     times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#'     
#' # Multiply the matrix columns using R
#' matrixr <- vectorv*matrixv
#' # Multiply the matrix columns using C++
#' matrixp <- HighFreq::mult_mat(vectorv, matrixv, byrow=FALSE)
#' all.equal(matrixr, matrixp)
#' # Compare the speed of Rcpp with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'     Rcpp=HighFreq::mult_mat(vectorv, matrixv, byrow=FALSE),
#'     Rcode=vectorv*matrixv,
#'     times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
mult_mat <- function(vectorv, matrixv, byrow = TRUE) {
    .Call(`_HighFreq_mult_mat`, vectorv, matrixv, byrow)
}

#' Multiply the rows or columns of a \emph{matrix} times a \emph{vector},
#' element-wise and in place, without copying the data in memory.
#' 
#' @param \code{vectorv} A \emph{numeric} \emph{vector}.
#' 
#' @param \code{matrixv} A \emph{numeric} \emph{matrix}.
#' 
#' @param \code{byrow} A \emph{Boolean} argument: if \code{TRUE} then multiply
#'   the rows of \code{matrixv} by \code{vectorv}, otherwise multiply the columns
#'   (the default is \code{byrow = TRUE}.)
#' 
#' @return Void (no return value - modifies the data in place).
#' 
#' @details
#'   The function \code{mult_mat_ref()} multiplies the rows or columns of a
#'   \emph{matrix} times a \emph{vector}, element-wise and in place, without
#'   copying the data in memory.
#'
#'   It accepts a \emph{pointer} to the argument \code{matrixv}, and it
#'   overwrites the old \code{matrix} values with the new values. It performs
#'   the calculation in place, without copying the \emph{matrix} in memory,
#'   which can significantly increase the computation speed for large matrices.
#'
#'   If \code{byrow = TRUE} (the default), then function \code{mult_mat_ref()}
#'   multiplies the rows of the argument \code{matrixv} times the argument
#'   \code{vectorv}.
#'   Otherwise it multiplies the columns of \code{matrixv}.
#' 
#'   In \code{R}, \emph{matrix} multiplication is performed by columns.
#'   Performing multiplication by rows is often required, for example when
#'   multiplying asset returns by portfolio weights.
#'   But performing multiplication by rows requires explicit loops in \code{R},
#'   or it requires \emph{matrix} transpose.  And both are slow.
#'
#'   The function \code{mult_mat_ref()} uses \code{RcppArmadillo} \code{C++}
#'   code, so when multiplying large \emph{matrix} columns it's several times
#'   faster than vectorized \code{R} code, and it's even much faster compared
#'   to \code{R} when multiplying the \emph{matrix} rows.
#' 
#'   The function \code{mult_mat_ref()} performs loops over the \emph{matrix}
#'   rows and columns using the \code{Armadillo} operators \code{each_row()}
#'   and \code{each_col()}, instead of performing explicit \code{for()} loops
#'   (both methods are equally fast).
#'   
#' @examples
#' \dontrun{
#' # Create vector and matrix data
#' matrixv <- matrix(round(runif(25e4), 2), nc=5e2)
#' vectorv <- round(runif(5e2), 2)
#' 
#' # Multiply the matrix rows using R
#' matrixr <- t(vectorv*t(matrixv))
#' # Multiply the matrix rows using C++
#' HighFreq::mult_mat_ref(vectorv, matrixv, byrow=TRUE)
#' all.equal(matrixr, matrixv)
#' # Compare the speed of Rcpp with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'     Rcpp=HighFreq::mult_mat_ref(vectorv, matrixv, byrow=TRUE),
#'     Rcode=t(vectorv*t(matrixv)),
#'     times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#'     
#' # Multiply the matrix columns using R
#' matrixr <- vectorv*matrixv
#' # Multiply the matrix columns using C++
#' HighFreq::mult_mat_ref(vectorv, matrixv, byrow=FALSE)
#' all.equal(matrixr, matrixv)
#' # Compare the speed of Rcpp with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'     Rcpp=HighFreq::mult_mat_ref(vectorv, matrixv, byrow=FALSE),
#'     Rcode=vectorv*matrixv,
#'     times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
mult_mat_ref <- function(vectorv, matrixv, byrow = TRUE) {
    invisible(.Call(`_HighFreq_mult_mat_ref`, vectorv, matrixv, byrow))
}

#' Calculate the eigen decomposition of a square, symmetric matrix using
#' \code{RcppArmadillo}.
#' 
#' @param \code{matrixv} A square, symmetric matrix.
#'
#' @param \code{eigenval} A \emph{vector} of eigen values.
#' 
#' @param \code{eigenvec} A \emph{matrix} of eigen vectors.
#' 
#' @return Void (no return value - passes the eigen values and eigen vectors by
#'   reference).
#'
#' @details
#'   The function \code{calc_eigen()} calculates the eigen decomposition of a
#'   square, symmetric matrix using \code{RcppArmadillo}.  It calls the
#'   \code{Armadillo} function \code{arma::eig_sym()} to calculate the eigen
#'   decomposition. 
#'   
#'   For small matrices, the function \code{calc_eigen()} is several times
#'   faster than the \code{R} function \code{eigen()}, since
#'   \code{calc_eigen()} has no overhead in \code{R} code. But for large
#'   matrices, they are about the same, since both call \code{C++} code.
#'
#' @examples
#' \dontrun{
#' # Create random positive semi-definite matrix
#' matrixv <- matrix(runif(25), nc=5)
#' matrixv <- t(matrixv) %*% matrixv
#' # Calculate the eigen decomposition using RcppArmadillo
#' eigenval <- numeric(5) # Allocate eigen values
#' eigenvec <- matrix(numeric(25), nc=5) # Allocate eigen vectors
#' HighFreq::calc_eigen(matrixv, eigenval, eigenvec)
#' # Calculate the eigen decomposition using R
#' eigenr <- eigen(matrixv)
#' # Compare the eigen decompositions
#' all.equal(eigenr$values, drop(eigenval))
#' all.equal(abs(eigenr$vectors), abs(eigenvec))
#' # Compare the speed of Rcpp with R code
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_eigen(matrixv, eigenval, eigenvec),
#'   Rcode=eigen(matrixv),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
calc_eigen <- function(matrixv, eigenval, eigenvec) {
    invisible(.Call(`_HighFreq_calc_eigen`, matrixv, eigenval, eigenvec))
}

#' Calculate the partial eigen decomposition of a dense symmetric matrix using
#' \code{RcppArmadillo}.
#' 
#' @param \code{matrixv} A square matrix.
#'
#' @param \code{neigen} An \emph{integer} equal to the number of eigenvalues
#'   to be calculated.
#'
#' @return A list with two elements: a \emph{vector} of eigenvalues (named
#'   "values"), and a \emph{matrix} of eigenvectors (named "vectors").
#'
#' @details
#'   The function \code{calc_eigenp()} calculates the partial eigen
#'   decomposition (the lowest order principal components, with the largest
#'   eigenvalues) of a dense matrix using RcppArmadillo.  It calls the internal
#'   \code{Armadillo} eigen solver \code{SymEigsSolver} in the namespace
#'   \code{arma::newarp} to calculate the partial eigen decomposition.
#'   
#'   The eigen solver \code{SymEigsSolver} uses the Implicitly Restarted
#'   Lanczos Method (IRLM) which was adapted from the
#'   \href{https://en.wikipedia.org/wiki/ARPACK}{ARPACK} library. The eigen
#'   solver \code{SymEigsSolver} was implemented by
#'   \href{https://github.com/yixuan/arpack-arma}{Yixuan Qiu}.
#'   
#'   The function \code{arma::eigs_sym()} also calculates the partial eigen
#'   decomposition using the eigen solver \code{SymEigsSolver}, but it only
#'   works for sparse matrices which are not standard R matrices.
#'   
#'   For matrices smaller than \code{100} rows, the function
#'   \code{calc_eigenp()} is slower than the function \code{calc_eigen()} which
#'   calculates the full eigen decomposition.  But it's faster for very large
#'   matrices.
#'
#' @examples
#' \dontrun{
#' # Create random positive semi-definite matrix
#' matrixv <- matrix(runif(100), nc=10)
#' matrixv <- t(matrixv) %*% matrixv
#' # Calculate the partial eigen decomposition
#' neigen <- 5
#' eigenp <- HighFreq::calc_eigenp(matrixv, neigen)
#' # Calculate the eigen decomposition using RcppArmadillo
#' eigenval <- numeric(10) # Allocate eigen values
#' eigenvec <- matrix(numeric(100), nc=10) # Allocate eigen vectors
#' HighFreq::calc_eigen(matrixv, eigenval, eigenvec)
#' # Compare the eigen decompositions
#' all.equal(eigenp$values[1:neigen], eigenval[1:neigen])
#' all.equal(abs(eigenp$vectors), abs(eigenvec[, 1:neigen]))
#' # Compare the speed of partial versus full decomposition
#' summary(microbenchmark(
#'   partial=HighFreq::calc_eigenp(matrixv, neigen),
#'   full=HighFreq::calc_eigen(matrixv, eigenval, eigenvec),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
calc_eigenp <- function(matrixv, neigen) {
    .Call(`_HighFreq_calc_eigenp`, matrixv, neigen)
}

#' Calculate the \emph{reduced inverse} of a symmetric \emph{matrix} of
#' data using eigen decomposition.
#' 
#' @param \code{matrixv} A symmetric \emph{matrix} of data.
#'   
#' @param \code{dimax} An \emph{integer} equal to the number of \emph{eigen
#'   values} used for calculating the \emph{reduced inverse} of the matrix
#'   \code{matrixv} (the default is \code{dimax = 0} - standard matrix inverse
#'   using all the \emph{eigen values}).
#' 
#' @param \code{singmin} A \emph{numeric} threshold level for discarding
#'   small \emph{eigen values} in order to regularize the inverse of the matrix
#'   \code{matrixv} (the default is \code{0.0}).
#'
#' @return A \emph{matrix} equal to the \emph{reduced inverse} of the
#'   matrix \code{matrixv}.
#'
#' @details
#'   The function \code{calc_inv()} calculates the \emph{reduced inverse}
#'   of the matrix \code{matrixv} using eigen decomposition.
#'   
#'   The function \code{calc_inv()} first performs eigen decomposition of the
#'   matrix \code{matrixv}.
#'   The eigen decomposition of a matrix \eqn{\strong{C}} is defined as the
#'   factorization:
#'   \deqn{
#'     \strong{C} = \strong{O} \, \Sigma \, \strong{O}^T
#'   } Where \eqn{\strong{O}} is the matrix of \emph{eigen vectors} and
#'   \eqn{\Sigma} is a diagonal matrix of \emph{eigen values}.
#'   
#'   The inverse \eqn{\strong{C}^{-1}} of the matrix \eqn{\strong{C}} can be
#'   calculated from the eigen decomposition as:
#'   \deqn{
#'     \strong{C}^{-1} = \strong{O} \, \Sigma^{-1} \, \strong{O}^T
#'   }
#'   
#'   The \emph{reduced inverse} of the matrix \eqn{\strong{C}} is obtained
#'   by removing \emph{eigen vectors} with very small \emph{eigen values}:
#'   \deqn{
#'     \strong{C}^{-1} = \strong{O}_{dimax} \, \Sigma^{-1}_{dimax} \, \strong{O}^T_{dimax}
#'   }
#'   Where \eqn{\strong{O}_{dimax}} is the matrix of \emph{eigen vectors} 
#'   that correspond to the largest \emph{eigen values} \eqn{\Sigma_{dimax}}. 
#'   
#'   The function \code{calc_inv()} applies regularization to the matrix
#'   inverse using the arguments \code{dimax} and \code{singmin}.
#'   
#'   The function \code{calc_inv()} applies regularization by discarding the
#'   smallest \emph{eigen values} \eqn{\Sigma_i} that are less than the
#'   threshold level \code{singmin} times the sum of all the \emph{eigen
#'   values}: \deqn{\Sigma_i < eigen\_thresh \cdot (\sum{\Sigma_i})}
#'   
#'   It also discards additional \emph{eigen vectors} so that only the highest
#'   order \emph{eigen vectors} remain, up to order \code{dimax}.
#'   It calculates the \emph{reduced inverse} from the eigen decomposition
#'   using only the largest \emph{eigen values} up to \code{dimax}.  For
#'   example, if
#'   \code{dimax = 3} then it only uses the \code{3} highest order \emph{eigen
#'   vectors}, with the largest \emph{eigen values}. This has the effect of
#'   dimension reduction.
#'   
#'   If the matrix \code{matrixv} has a large number of small \emph{eigen
#'   values}, then the number of remaining \emph{eigen values} may be less than
#'   \code{dimax}.
#'   
#' @examples
#' \dontrun{
#' # Calculate ETF returns
#' retp <- na.omit(rutils::etfenv$returns[, c("VTI", "TLT", "DBC")])
#' # Calculate covariance matrix
#' covmat <- cov(retp)
#' # Calculate matrix inverse using RcppArmadillo
#' invmat <- HighFreq::calc_inv(covmat)
#' # Calculate matrix inverse in R
#' invr <- solve(covmat)
#' all.equal(invmat, invr, check.attributes=FALSE)
#' # Calculate reduced inverse using RcppArmadillo
#' invmat <- HighFreq::calc_inv(covmat, dimax=3)
#' # Calculate reduced inverse using eigen decomposition in R
#' eigend <- eigen(covmat)
#' dimax <- 1:3
#' invr <- eigend$vectors[, dimax] %*% (t(eigend$vectors[, dimax])/eigend$values[dimax])
#' # Compare RcppArmadillo with R
#' all.equal(invmat, invr)
#' }
#' 
#' @examples
calc_inv <- function(matrixv, dimax = 0L, singmin = 0.0) {
    .Call(`_HighFreq_calc_inv`, matrixv, dimax, singmin)
}

#' Calculate the \emph{reduced inverse} of a \emph{matrix} of data using
#' Singular Value Decomposition (\emph{SVD}).
#' 
#' @param \code{matrixv} A \emph{matrix} of data.
#'   
#' @param \code{dimax} An \emph{integer} equal to the number of \emph{singular
#'   values} used for calculating the \emph{reduced inverse} of the matrix
#'   \code{matrixv} (the default is \code{dimax = 0} - standard matrix inverse
#'   using all the \emph{singular values}).
#' 
#' @param \code{singmin} A \emph{numeric} threshold level for discarding
#'   small \emph{singular values} in order to regularize the inverse of the
#'   matrix \code{matrixv} (the default is \code{0.0}).
#'
#' @return A \emph{matrix} equal to the \emph{reduced inverse} of the
#'   matrix \code{matrixv}.
#'
#' @details
#'   The function \code{calc_invsvd()} calculates the \emph{reduced
#'   inverse} of the matrix \code{matrixv} using Singular Value Decomposition
#'   (\emph{SVD}).
#'   
#'   The function \code{calc_invsvd()} first performs Singular Value
#'   Decomposition (\emph{SVD}) of the matrix \code{matrixv}.
#'   The \emph{SVD} of a matrix \eqn{\strong{C}} is defined as the
#'   factorization:
#'   \deqn{
#'     \strong{C} = \strong{U} \, \Sigma \, \strong{V}^T
#'   }
#'   Where \eqn{\strong{U}} and \eqn{\strong{V}} are the left and right
#'   \emph{singular matrices}, and \eqn{\Sigma} is a diagonal matrix of
#'   \emph{singular values}.
#'   
#'   The inverse \eqn{\strong{C}^{-1}} of the matrix \eqn{\strong{C}} can be
#'   calculated from the \emph{SVD} matrices as:
#'   \deqn{
#'     \strong{C}^{-1} = \strong{V} \, \Sigma^{-1} \, \strong{U}^T
#'   }
#'   
#'   The \emph{reduced inverse} of the matrix \eqn{\strong{C}} is obtained
#'   by removing \emph{singular vectors} with very small \emph{singular values}:
#'   \deqn{
#'     \strong{C}^{-1} = \strong{V}_n \, \Sigma_n^{-1} \, \strong{U}_n^T
#'   }
#'   Where \eqn{\strong{U}_n}, \eqn{\strong{V}_n} and \eqn{\Sigma_n} are the
#'   \emph{SVD} matrices with the rows and columns corresponding to very small
#'   \emph{singular values} removed.
#'   
#'   The function \code{calc_invsvd()} applies regularization to the matrix
#'   inverse using the arguments \code{dimax} and \code{singmin}.
#'   
#'   The function \code{calc_invsvd()} applies regularization by discarding the
#'   smallest \emph{singular values} \eqn{\sigma_i} that are less than the
#'   threshold level \code{singmin} times the sum of all the
#'   \emph{singular values}: \deqn{\sigma_i < eigen\_thresh \cdot
#'   (\sum{\sigma_i})}
#'   
#'   It also discards additional \emph{singular vectors} so that only the
#'   highest order \emph{singular vectors} remain, up to order \code{dimax}. It
#'   calculates the \emph{reduced inverse} from the \emph{SVD} matrices
#'   using only the largest \emph{singular values} up to order \code{dimax}.
#'   For example, if \code{dimax = 3} then it only uses the \code{3} highest
#'   order \emph{singular vectors}, with the largest \emph{singular values}.
#'   This has the effect of dimension reduction.
#'   
#'   If the matrix \code{matrixv} has a large number of small \emph{singular
#'   values}, then the number of remaining \emph{singular values} may be less
#'   than \code{dimax}.
#'   
#' @examples
#' \dontrun{
#' # Calculate ETF returns
#' retp <- na.omit(rutils::etfenv$returns[, c("VTI", "TLT", "DBC")])
#' # Calculate covariance matrix
#' covmat <- cov(retp)
#' # Calculate matrix inverse using RcppArmadillo
#' invmat <- HighFreq::calc_invsvd(covmat)
#' # Calculate matrix inverse in R
#' invr <- solve(covmat)
#' all.equal(invmat, invr, check.attributes=FALSE)
#' # Calculate reduced inverse using RcppArmadillo
#' invmat <- HighFreq::calc_invsvd(covmat, dimax=3)
#' # Calculate reduced inverse from SVD in R
#' svdec <- svd(covmat)
#' dimax <- 1:3
#' invr <- svdec$v[, dimax] %*% (t(svdec$u[, dimax])/svdec$d[dimax])
#' # Compare RcppArmadillo with R
#' all.equal(invmat, invr)
#' }
#' 
#' @export
calc_invsvd <- function(matrixv, dimax = 0L, singmin = 0.0) {
    .Call(`_HighFreq_calc_invsvd`, matrixv, dimax, singmin)
}

#' Calculate the approximate inverse of a square \emph{matrix} recursively
#' using the Schulz formula (without copying the data in memory).
#' 
#' @param \code{matrixv} A \emph{matrix} of data to be inverted.
#' 
#' @param \code{invmat} A \emph{matrix} of data equal to the starting point for
#'   the recursion.
#' 
#' @param \code{niter} An \emph{integer} equal to the number of recursion
#'   iterations.
#' 
#' @return No return value.
#'
#' @details
#'   The function \code{calc_invrec()} calculates the approximate inverse
#'   \eqn{\strong{A}^{-1}} of a square \emph{matrix} \eqn{\strong{A}}
#'   recursively using the Schulz formula:
#'   \deqn{
#'     \strong{A}_{i+1}^{-1} \leftarrow 2 \strong{A}_i^{-1} - \strong{A}_i^{-1} \strong{A} \strong{A}_i^{-1}
#'   }
#'   The Schulz formula is repeated \code{niter} times.
#'   The Schulz formula is useful for updating the inverse when the matrix
#'   \eqn{\strong{A}} changes only slightly.  For example, for updating the
#'   inverse of the covariance matrix as it changes slowly over time.
#'
#'   The function \code{calc_invrec()} accepts a \emph{pointer} to the argument
#'   \code{invmat} (which is the initial value of the inverse matrix for the
#'   recursion), and it overwrites the old inverse matrix values with the
#'   updated inverse values.
#'
#'   The function \code{calc_invrec()} performs the calculation in place,
#'   without copying the matrix in memory, which can significantly increase the
#'   computation speed for large matrices.
#'   
#'   The function \code{calc_invrec()} doesn't return a value.
#'   The function \code{calc_invrec()} performs the calculations using
#'   \code{C++} \code{Armadillo} code.
#'   
#' @examples
#' \dontrun{
#' # Calculate a random matrix
#' matrixv <- matrix(rnorm(100), nc=10)
#' # Define the initial value of the inverse matrix
#' invmat <- solve(matrixv) + matrix(rnorm(100, sd=0.1), nc=10)
#' # Calculate the inverse in place using RcppArmadillo
#' HighFreq::calc_invrec(matrixv, invmat, 3)
#' # Multiply the matrix times its inverse
#' multmat <- matrixv %*% invmat
#' round(multmat, 4)
#' # Calculate the sum of the off-diagonal elements
#' sum(multmat[upper.tri(multmat)])
#' # Compare RcppArmadillo with R
#' all.equal(invmat, solve(matrixv))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'    rcode=solve(matrixv),
#'    cppcode=HighFreq::calc_invrec(matrixv, invmat, 3),
#'    times=10))[, c(1, 4, 5)]
#' }
#' 
#' @export
calc_invrec <- function(matrixv, invmat, niter = 1L) {
    invisible(.Call(`_HighFreq_calc_invrec`, matrixv, invmat, niter))
}

#' Calculate the inverse of a square \emph{matrix} in place, without copying
#' the data in memory.
#' 
#' @param \code{matrixv} A \emph{matrix} of data to be inverted.  (The argument
#'   is interpreted as a \emph{pointer} to a \emph{matrix}, and it is
#'   overwritten with the inverse matrix.)
#' 
#' @return No return value.
#'
#' @details
#'   The function \code{calc_invref()} calculates the inverse of a square
#'   \emph{matrix} in place, without copying the data in memory. It accepts a
#'   \emph{pointer} to the argument \code{matrixv} (which is the \code{matrix}
#'   to be inverted), and it overwrites the old matrix values with the inverse
#'   matrix values. It performs the calculation in place, without copying the
#'   data in memory, which can significantly increase the computation speed for
#'   large matrices.
#'
#'   The function \code{calc_invref()} doesn't return a value.
#'   The function \code{calc_invref()} calls the \code{C++} \code{Armadillo}
#'   function \code{arma::inv()} to calculate the matrix inverse.
#'   
#' @examples
#' \dontrun{
#' # Calculate a random matrix
#' matrixv <- matrix(rnorm(100), nc=10)
#' # Copy matrixv to a matrix in a different memory location
#' invmat <- matrixv + 0
#' # Calculate the inverse in place using RcppArmadillo
#' HighFreq::calc_invref(invmat)
#' # Multiply the matrix times its inverse
#' multmat <- matrixv %*% invmat
#' round(multmat, 4)
#' # Calculate the sum of the off-diagonal elements
#' sum(multmat[upper.tri(multmat)])
#' # Compare RcppArmadillo with R
#' all.equal(invmat, solve(matrixv))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'    rcode=solve(matrixv),
#'    cppcode=calc_invref(matrixv),
#'    times=10))[, c(1, 4, 5)]
#' }
#' 
#' @export
calc_invref <- function(matrixv) {
    invisible(.Call(`_HighFreq_calc_invref`, matrixv))
}

#' Standardize (center and scale) the columns of a \emph{time series} of data
#' in place, without copying the data in memory, using \code{RcppArmadillo}.
#' 
#' @param \code{tseries} A \emph{time series} or \emph{matrix} of data.
#' 
#' @param \code{center} A \emph{Boolean} argument: if \code{TRUE} then center
#'   the columns so that they have zero mean or median (the default is
#'   \code{TRUE}).
#' 
#' @param \code{scale} A \emph{Boolean} argument: if \code{TRUE} then scale the
#'   columns so that they have unit standard deviation or MAD (the default is
#'   \code{TRUE}).
#' 
#' @param \code{use_median} A \emph{Boolean} argument: if \code{TRUE} then the 
#'   centrality (central tendency) is calculated as the \emph{median} and the 
#'   dispersion is calculated as the \emph{median absolute deviation}
#'   (\emph{MAD}) (the default is \code{FALSE}).
#'   If \code{use_median = FALSE} then the centrality is calculated as the
#'   \emph{mean} and the dispersion is calculated as the \emph{standard
#'   deviation}.
#'
#' @return Void (no return value - modifies the data in place).
#'
#' @details
#'   The function \code{calc_scale()} standardizes (centers and scales) the
#'   columns of a \emph{time series} of data in place, without copying the data
#'   in memory, using \code{RcppArmadillo}.
#'
#'   If the arguments \code{center} and \code{scale} are both \code{TRUE} and
#'   \code{use_median} is \code{FALSE} (the defaults), then \code{calc_scale()}
#'   performs the same calculation as the standard \code{R} function
#'   \code{scale()}, and it calculates the centrality (central tendency) as the
#'   \emph{mean} and the dispersion as the \emph{standard deviation}.
#'
#'   If the arguments \code{center} and \code{scale} are both \code{TRUE} (the
#'   defaults), then \code{calc_scale()} standardizes the data.
#'   If the argument \code{center} is \code{FALSE} then \code{calc_scale()}
#'   only scales the data (divides it by the standard deviations).
#'   If the argument \code{scale} is \code{FALSE} then \code{calc_scale()}
#'   only demeans the data (subtracts the means).
#'   
#'   If the argument \code{use_median} is \code{TRUE}, then it calculates the
#'   centrality as the \emph{median} and the dispersion as the \emph{median
#'   absolute deviation} (\emph{MAD}).
#'
#'   If the number of rows of \code{tseries} is less than \code{3} then it
#'   does nothing and \code{tseries} is not scaled.
#'   
#'   The function \code{calc_scale()} accepts a \emph{pointer} to the argument
#'   \code{tseries}, and it overwrites the old data with the standardized data.
#'   It performs the calculation in place, without copying the data in memory,
#'   which can significantly increase the computation speed for large time
#'   series.
#'
#'   The function \code{calc_scale()} uses \code{RcppArmadillo} \code{C++}
#'   code, so on a typical time series it can be over \emph{10} times faster
#'   than the function \code{scale()}.
#'   
#' @examples
#' \dontrun{
#' # Calculate a time series of returns
#' retp <- zoo::coredata(na.omit(rutils::etfenv$returns[, c("IEF", "VTI")]))
#' # Demean the returns
#' demeaned <- apply(retp, 2, function(x) (x-mean(x)))
#' HighFreq::calc_scale(retp, scale=FALSE)
#' all.equal(demeaned, retp, check.attributes=FALSE)
#' # Calculate a time series of returns
#' retp <- zoo::coredata(na.omit(rutils::etfenv$returns[, c("IEF", "VTI")]))
#' # Standardize the returns
#' retss <- scale(retp)
#' HighFreq::calc_scale(retp)
#' all.equal(retss, retp, check.attributes=FALSE)
#' # Compare the speed of Rcpp with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcode=scale(retp),
#'   Rcpp=HighFreq::calc_scale(retp),
#'   times=100))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
calc_scale <- function(tseries, center = TRUE, scale = TRUE, use_median = FALSE) {
    invisible(.Call(`_HighFreq_calc_scale`, tseries, center, scale, use_median))
}

#' Aggregate a time series of data into a single bar of \emph{OHLC} data.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} with multiple
#'   columns of data.
#'
#' @return A \emph{matrix} containing a single row, with the \emph{open},
#'   \emph{high}, \emph{low}, and \emph{close} values, and also the total
#'   \emph{volume} (if provided as either the second or fifth column of
#'   \code{tseries}).
#'
#' @details
#'   The function \code{agg_ohlc()} aggregates a time series of data into a
#'   single bar of \emph{OHLC} data. It can accept either a single column of
#'   data or four columns of \emph{OHLC} data.
#'   It can also accept an additional column containing the trading volume.
#'   
#' The function \code{agg_ohlc()} calculates the \emph{open} value as equal to
#' the \emph{open} value of the first row of \code{tseries}.
#'   The \emph{high} value as the maximum of the \emph{high} column of
#'   \code{tseries}.
#'   The \emph{low} value as the minimum of the \emph{low} column of
#'   \code{tseries}.
#'   The \emph{close} value as the \emph{close} of the last row of
#'   \code{tseries}.
#'   The \emph{volume} value as the sum of the \emph{volume} column of
#'   \code{tseries}.
#'
#'   For a single column of data, the \emph{open}, \emph{high}, \emph{low}, and
#'   \emph{close} values are all the same.
#'
#' @examples
#' \dontrun{
#' # Define matrix of OHLC data
#' ohlc <- coredata(rutils::etfenv$VTI[, 1:5])
#' # Aggregate to single row matrix
#' ohlcagg <- HighFreq::agg_ohlc(ohlc)
#' # Compare with calculation in R
#' all.equal(drop(ohlcagg),
#'   c(ohlc[1, 1], max(ohlc[, 2]), min(ohlc[, 3]), ohlc[NROW(ohlc), 4], sum(ohlc[, 5])), 
#'   check.attributes=FALSE)
#' }
#' 
#' @export
agg_ohlc <- function(tseries) {
    .Call(`_HighFreq_agg_ohlc`, tseries)
}

#' Aggregate a time series to an \emph{OHLC} time series with lower
#' periodicity.
#'
#' Given a time series of prices at a higher periodicity (say seconds), it
#' calculates the \emph{OHLC} prices at a lower periodicity (say minutes).
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} with multiple
#'   columns of data.
#'   
#' @param \emph{endd} An \emph{integer vector} of end points.
#'
#' @return A \emph{matrix} with \emph{OHLC} data, with the number of rows equal
#'   to the number of \emph{endd} minus one.
#'   
#' @details
#'   The function \code{roll_ohlc()} performs a loop over the end points
#'   \emph{endd}, along the rows of the data \code{tseries}. At each end point,
#'   it selects the past rows of the data \code{tseries}, starting at the first
#'   bar after the previous end point, and then calls the function
#'   \code{agg_ohlc()} on the selected data \code{tseries} to calculate the
#'   aggregations.
#'   
#'   The function \code{roll_ohlc()} can accept either a single column of data
#'   or four columns of \emph{OHLC} data.
#'   It can also accept an additional column containing the trading volume.
#'
#'   The function \code{roll_ohlc()} performs a similar aggregation as the
#'   function \code{to.period()} from package
#'   \href{https://cran.r-project.org/web/packages/xts/index.html}{xts}.
#'
#' @examples
#' \dontrun{
#' # Define matrix of OHLC data
#' ohlc <- rutils::etfenv$VTI[, 1:5]
#' # Define end points at 25 day intervals
#' endd <- HighFreq::calc_endpoints(NROW(ohlc), step=25)
#' # Aggregate over endd:
#' ohlcagg <- HighFreq::roll_ohlc(tseries=ohlc, endd=endd)
#' # Compare with xts::to.period()
#' ohlcagg_xts <- .Call("toPeriod", ohlc, as.integer(endd+1), TRUE, NCOL(ohlc), FALSE, FALSE, colnames(ohlc), PACKAGE="xts")
#' all.equal(ohlcagg, coredata(ohlcagg_xts), check.attributes=FALSE)
#' }
#' 
#' @export
roll_ohlc <- function(tseries, endd) {
    .Call(`_HighFreq_roll_ohlc`, tseries, endd)
}

#' Calculate the rolling convolutions (weighted sums) of a \emph{time series}
#' with a single-column \emph{matrix} of weights.
#' 
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#' 
#' @param \code{weightv} A single-column \emph{matrix} of weights.
#'
#' @return A \emph{matrix} with the same dimensions as the input argument
#'   \code{tseries}.
#'
#' @details
#'   The function \code{roll_conv()} calculates the convolutions of the
#'   \emph{matrix} columns with a single-column \emph{matrix} of weights.  It
#'   performs a loop over the \emph{matrix} rows and multiplies the past
#'   (higher) values by the weights.  It calculates the rolling weighted sums
#'   of the past data.
#'   
#'   The function \code{roll_conv()} uses the \code{Armadillo} function
#'   \code{arma::conv2()}. It performs a similar calculation to the standard
#'   \code{R} function \cr\code{filter(x=tseries, filter=weightv,
#'   method="convolution", sides=1)}, but it's over \code{6} times faster, and
#'   it doesn't produce any leading \code{NA} values.
#'   
#' @examples
#' \dontrun{
#' # First example
#' # Calculate a time series of returns
#' retp <- na.omit(rutils::etfenv$returns[, c("IEF", "VTI")])
#' # Create simple weights equal to a 1 value plus zeros
#' weightv <- c(1, rep(0, 10))
#' # Calculate rolling weighted sums
#' retf <- HighFreq::roll_conv(retp, weightv)
#' # Compare with original
#' all.equal(coredata(retp), retf, check.attributes=FALSE)
#' # Second example
#' # Calculate exponentially decaying weights
#' weightv <- exp(-0.2*(1:11))
#' weightv <- weightv/sum(weightv)
#' # Calculate rolling weighted sums
#' retf <- HighFreq::roll_conv(retp, weightv)
#' # Calculate rolling weighted sums using filter()
#' retc <- filter(x=retp, filter=weightv, method="convolution", sides=1)
#' # Compare both methods
#' all.equal(retc[-(1:11), ], retf[-(1:11), ], check.attributes=FALSE)
#' }
#' 
#' @export
roll_conv <- function(tseries, weightv) {
    .Call(`_HighFreq_roll_conv`, tseries, weightv)
}

#' Calculate the rolling sums over a \emph{time series} or a \emph{matrix}
#' using \emph{Rcpp}.
#' 
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#' 
#' @param \code{lookb} The length of the look-back interval, equal to the
#'   number of data points included in calculating the rolling sum (the default
#'   is \code{lookb = 1}).
#'   
#' @param \code{weightv} A single-column \emph{matrix} of weights (the default
#'   is \code{weightv = 0}).
#'
#' @return A \emph{matrix} with the same dimensions as the input argument
#'   \code{tseries}.
#'
#' @details
#'   The function \code{roll_sum()} calculates the rolling \emph{weighted} sums
#'   over the columns of the data \code{tseries}.
#'   
#'   If the argument \code{weightv} is equal to zero (the default), then the
#'   function \code{roll_sum()} calculates the simple rolling sums of the 
#'   \emph{time series} data \eqn{p_t} over the look-back interval \eqn{\Delta}:
#'   \deqn{
#'     \bar{p}_t = \sum_{j=(t-\Delta+1)}^{t} p_j
#'   }
#'   
#'   If the \code{weightv} argument has the same number of rows as the argument
#'   \code{tseries}, then the function \code{roll_sum()} calculates rolling 
#'   \emph{weighted} sums of the \emph{time series} data \eqn{p_t} in two steps.
#'   
#'   It first calculates the rolling sums of the products of the weights
#'   \eqn{w_t} times the \emph{time series} data \eqn{p_t} over the look-back
#'   interval \eqn{\Delta}:
#'   \deqn{
#'     \bar{w}_t = \sum_{j=(t-\Delta+1)}^{t} w_j
#'   }
#'   \deqn{
#'     \bar{p}^w_t = \sum_{j=(t-\Delta+1)}^{t} w_j p_j
#'   }
#'   
#'   It then calculates the rolling \emph{weighted} sums \eqn{\bar{p}_t} as the
#'   ratio of the sum products of the weights and the data, divided by the sums
#'   of the weights:
#'   \deqn{
#'     \bar{p}_t = \frac{\bar{p}^w_t}{\bar{w}_t}
#'   }
#'   
#'   The function \code{roll_sum()} returns a \emph{matrix} with the same
#'   dimensions as the input argument \code{tseries}.
#' 
#'   The function \code{roll_sum()} is written in \code{C++} \code{Armadillo}
#'   code, so it's much faster than equivalent \code{R} code.
#'   
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("VTI", "IEF")])
#' # Define parameters
#' lookb <- 11
#' # Calculate rolling sums and compare with rutils::roll_sum()
#' sumc <- HighFreq::roll_sum(retp, lookb)
#' sumr <- rutils::roll_sum(retp, lookb)
#' all.equal(sumc, coredata(sumr), check.attributes=FALSE)
#' # Calculate rolling sums using R code
#' sumr <- apply(zoo::coredata(retp), 2, cumsum)
#' sumlag <- rbind(matrix(numeric(2*lookb), nc=2), sumr[1:(NROW(sumr) - lookb), ])
#' sumr <- (sumr - sumlag)
#' all.equal(sumc, sumr, check.attributes=FALSE)
#' # Calculate weights equal to the trading volumes
#' weightv <- quantmod::Vo(rutils::etfenv$VTI)
#' weightv <- weightv[zoo::index(retp)]
#' # Calculate rolling weighted sums
#' sumc <- HighFreq::roll_sum(retp, lookb, 1/weightv)
#' # Plot dygraph of the weighted sums
#' datav <- cbind(retp$VTI, sumc[, 1])
#' colnames(datav) <- c("VTI", "Weighted")
#' endd <- rutils::calc_endpoints(datav, interval="weeks")
#' dygraphs::dygraph(cumsum(datav)[endd], main=colnames(datav)) %>% 
#'   dyOptions(colors=c("blue", "red"), strokeWidth=2) %>% 
#'   dyLegend(width=300)
#' }
#' 
#' @export
roll_sum <- function(tseries, lookb = 1L, weightv = 0L) {
    .Call(`_HighFreq_roll_sum`, tseries, lookb, weightv)
}

#' Calculate the rolling sums at the end points of a \emph{time series} or a
#' \emph{matrix}.
#' 
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#' 
#' @param \code{startp} An \emph{integer} vector of start points (the default
#'   is \code{startp = 0}).
#' 
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#'   \code{endd = 0}).
#' 
#' @param \code{step} The number of time periods between the end points (the
#'   default is \code{step = 1}).
#'
#' @param \code{lookb} The number of end points in the look-back interval
#'   (the default is \code{lookb = 1}).
#'   
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#'   calculating the end points.
#'
#' @return A \emph{matrix} with the same number of columns as the input time
#'   series \code{tseries}, and the number of rows equal to the number of end
#'   points.
#'   
#' @details
#'   The function \code{roll_sumep()} calculates the rolling sums at the end
#'   points of the \emph{time series} \code{tseries}.
#'   
#'   The function \code{roll_sumep()} is implemented in \code{RcppArmadillo}
#'   \code{C++} code, which makes it several times faster than \code{R} code.
#'   
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("VTI", "IEF")])
#' # Define end points at 25 day intervals
#' endd <- HighFreq::calc_endpoints(NROW(retp), step=25)
#' # Define start points as 75 day lag of end points
#' startp <- HighFreq::calc_startpoints(endd, lookb=3)
#' # Calculate rolling sums using Rcpp
#' sumc <- HighFreq::roll_sumep(retp, startp=startp, endd=endd)
#' # Calculate rolling sums using R code
#' sumr <- sapply(1:NROW(endd), function(ep) {
#' colSums(retp[(startp[ep]+1):(endd[ep]+1), ])
#'   })  # end sapply
#' sumr <- t(sumr)
#' all.equal(sumc, sumr, check.attributes=FALSE)
#' }
#' 
#' @export
roll_sumep <- function(tseries, startp = 0L, endd = 0L, step = 1L, lookb = 1L, stub = 0L) {
    .Call(`_HighFreq_roll_sumep`, tseries, startp, endd, step, lookb, stub)
}

#' Calculate the rolling weighted sums over a \emph{time series} or a
#' \emph{matrix} using \emph{Rcpp}.
#' 
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#' 
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#'   \code{endd = NULL}).
#'   
#' @param \code{lookb} The length of the look-back interval, equal to the
#'   number of data points included in calculating the rolling sum (the default
#'   is \code{lookb = 1}).
#'   
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#'   calculating the end points (the default is \code{stub = NULL}).
#' 
#' @param \code{weightv} A single-column \emph{matrix} of weights (the default
#'   is \code{weightv = NULL}).
#'
#' @return A \emph{matrix} with the same dimensions as the input argument
#'   \code{tseries}.
#'
#' @details
#'   The function \code{roll_sumw()} calculates the rolling weighted sums over
#'   the columns of the data \code{tseries}.
#' 
#'   The function \code{roll_sumw()} calculates the rolling weighted sums as
#'   convolutions of the columns of \code{tseries} with the \emph{column
#'   vector} of weights using the \code{Armadillo} function
#'   \code{arma::conv2()}.  It performs a similar calculation to the standard
#'   \code{R} function \cr\code{stats::filter(x=retp, filter=weightv,
#'   method="convolution", sides=1)}, but it can be many times faster, and it
#'   doesn't produce any leading \code{NA} values.
#'   
#'   The function \code{roll_sumw()} returns a \emph{matrix} with the same
#'   dimensions as the input argument \code{tseries}.
#' 
#'   The arguments \code{weightv}, \code{endd}, and \code{stub} are
#'   optional.
#'   
#'   If the argument \code{weightv} is not supplied, then simple sums are
#'   calculated, not weighted sums.
#'   
#'   If either the \code{stub} or \code{endd} arguments are supplied,
#'   then the rolling sums are calculated at the end points. 
#'   
#'   If only the argument \code{stub} is supplied, then the end points are
#'   calculated from the \code{stub} and \code{lookb} arguments. The first
#'   end point is equal to \code{stub} and the end points are spaced
#'   \code{lookb} periods apart.
#'   
#'   If the arguments \code{weightv}, \code{endd}, and \code{stub} are
#'   not supplied, then the sums are calculated over a number of data points
#'   equal to \code{lookb}.
#'   
#'   The function \code{roll_sumw()} is also several times faster than
#'   \code{rutils::roll_sum()} which uses vectorized \code{R} code.
#'   
#'   Technical note:
#'   The function \code{roll_sumw()} has arguments with default values equal to
#'   \code{NULL}, which are implemented in \code{Rcpp} code.
#'   
#' @examples
#' \dontrun{
#' # First example
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("VTI", "IEF")])
#' # Define parameters
#' lookb <- 11
#' # Calculate rolling sums and compare with rutils::roll_sum()
#' sumc <- HighFreq::roll_sum(retp, lookb)
#' sumr <- rutils::roll_sum(retp, lookb)
#' all.equal(sumc, coredata(sumr), check.attributes=FALSE)
#' # Calculate rolling sums using R code
#' sumr <- apply(zoo::coredata(retp), 2, cumsum)
#' sumlag <- rbind(matrix(numeric(2*lookb), nc=2), sumr[1:(NROW(sumr) - lookb), ])
#' sumr <- (sumr - sumlag)
#' all.equal(sumc, sumr, check.attributes=FALSE)
#' 
#' # Calculate rolling sums at end points
#' stubv <- 21
#' sumc <- HighFreq::roll_sumw(retp, lookb, stub=stubv)
#' endd <- (stubv + lookb*(0:(NROW(retp) %/% lookb)))
#' endd <- endd[endd < NROW(retp)]
#' sumr <- apply(zoo::coredata(retp), 2, cumsum)
#' sumr <- sumr[endd+1, ]
#' sumlag <- rbind(numeric(2), sumr[1:(NROW(sumr) - 1), ])
#' sumr <- (sumr - sumlag)
#' all.equal(sumc, sumr, check.attributes=FALSE)
#' 
#' # Calculate rolling sums at end points - pass in endd
#' sumc <- HighFreq::roll_sumw(retp, endd=endd)
#' all.equal(sumc, sumr, check.attributes=FALSE)
#' 
#' # Create exponentially decaying weights
#' weightv <- exp(-0.2*(1:11))
#' weightv <- matrix(weightv/sum(weightv), nc=1)
#' # Calculate rolling weighted sum
#' sumc <- HighFreq::roll_sumw(retp, weightv=weightv)
#' # Calculate rolling weighted sum using filter()
#' retc <- filter(x=retp, filter=weightv, method="convolution", sides=1)
#' all.equal(sumc[-(1:11), ], retc[-(1:11), ], check.attributes=FALSE)
#' 
#' # Calculate rolling weighted sums at end points
#' sumc <- HighFreq::roll_sumw(retp, endd=endd, weightv=weightv)
#' all.equal(sumc, retc[endd+1, ], check.attributes=FALSE)
#' 
#' # Create simple weights equal to a 1 value plus zeros
#' weightv <- matrix(c(1, rep(0, 10)), nc=1)
#' # Calculate rolling weighted sum
#' weighted <- HighFreq::roll_sumw(retp, weightv=weightv)
#' # Compare with original
#' all.equal(coredata(retp), weighted, check.attributes=FALSE)
#' }
#' 
#' @export
roll_sumw <- function(tseries, endd = NULL, lookb = 1L, stub = NULL, weightv = NULL) {
    .Call(`_HighFreq_roll_sumw`, tseries, endd, lookb, stub, weightv)
}

#' Calculate the exponential moving average (EMA) of streaming \emph{time
#' series} data using an online recursive formula.
#' 
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#' 
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'   
#' @param \code{weightv} A single-column \emph{matrix} of weights.
#'
#' @return A \emph{matrix} with the same dimensions as the input argument
#'   \code{tseries}.
#'
#' @details
#'   The function \code{run_mean()} calculates the exponential moving average
#'   (EMA) of the streaming \emph{time series} data \eqn{p_t} by recursively
#'   weighting present and past values using the decay factor \eqn{\lambda}. If
#'   the \code{weightv} argument is equal to zero, then the function
#'   \code{run_mean()} simply calculates the exponentially weighted moving
#'   average value of the streaming \emph{time series} data \eqn{p_t}:
#'   \deqn{
#'     \bar{p}_t = \lambda \bar{p}_{t-1} + (1-\lambda) p_t = (1-\lambda) \sum_{j=0}^{n} \lambda^j p_{t-j}
#'   }
#'   
#'   Some applications require applying additional weight factors, like for
#'   example the volume-weighted average price indicator (VWAP).  Then the
#'   streaming prices can be multiplied by the streaming trading volumes.
#'   
#'   If the argument \code{weightv} has the same number of rows as the argument
#'   \code{tseries}, then the function \code{run_mean()} calculates the
#'   exponential moving average (EMA) in two steps.
#'   
#'   First it calculates the trailing mean weights \eqn{\bar{w}_t}:
#'   \deqn{
#'     \bar{w}_t = \lambda \bar{w}_{t-1} + (1-\lambda) w_t
#'   }
#'   
#'   Second it calculates the trailing mean products \eqn{\bar{w p}_t} of the
#'   weights \eqn{w_t} and the data \eqn{p_t}:
#'   \deqn{
#'     \bar{w p}_t = \lambda \bar{w p}_{t-1} + (1-\lambda) w_t p_t
#'   }
#'   Where \eqn{p_t} is the streaming data, \eqn{w_t} are the streaming
#'   weights, \eqn{\bar{w}_t} are the trailing mean weights, and \eqn{\bar{w p}_t}
#'   are the trailing mean products of the data and the weights.
#'   
#'   The trailing mean weighted value \eqn{\bar{p}_t} is equal to the ratio of the
#'   data and weights products, divided by the mean weights:
#'   \deqn{
#'     \bar{p}_t = \frac{\bar{w p}_t}{\bar{w}_t}
#'   }
#' 
#'   The above online recursive formulas are convenient for processing live
#'   streaming data because they don't require maintaining a buffer of past
#'   data.
#'   The formulas are equivalent to a convolution with exponentially decaying
#'   weights, but they're much faster to calculate.
#'   Using exponentially decaying weights is more natural than using a sliding
#'   look-back interval, because it gradually "forgets" about the past data.
#'   
#'   The value of the decay factor \eqn{\lambda} must be in the range between
#'   \code{0} and \code{1}.  
#'   If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#'   values have a greater weight, and the trailing mean values have a stronger
#'   dependence on past data.  This is equivalent to a long look-back
#'   interval.
#'   If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#'   past values have a smaller weight, and the trailing mean values have a
#'   weaker dependence on past data.  This is equivalent to a short look-back
#'   interval.
#' 
#'   The function \code{run_mean()} performs the same calculation as the
#'   standard \code{R} function\cr\code{stats::filter(x=series, filter=lambda,
#'   method="recursive")}, but it's several times faster.
#' 
#'   The function \code{run_mean()} returns a \emph{matrix} with the same
#'   dimensions as the input argument \code{tseries}.
#'   
#' @examples
#' \dontrun{
#' # Calculate historical prices
#' ohlc <- rutils::etfenv$VTI
#' closep <- quantmod::Cl(ohlc)
#' # Calculate the trailing means
#' lambdaf <- 0.9
#' meanv <- HighFreq::run_mean(closep, lambda=lambdaf)
#' # Calculate the trailing means using R code
#' pricef <- (1-lambdaf)*filter(closep, 
#'   filter=lambdaf, init=as.numeric(closep[1, 1])/(1-lambdaf), 
#'   method="recursive")
#' all.equal(drop(meanv), unclass(pricef), check.attributes=FALSE)
#' 
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::run_mean(closep, lambda=lambdaf),
#'   Rcode=filter(closep, filter=lambdaf, init=as.numeric(closep[1, 1])/(1-lambdaf), method="recursive"),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#'   
#' # Calculate weights equal to the trading volumes
#' weightv <- quantmod::Vo(ohlc)
#' # Calculate the exponential moving average (EMA)
#' meanw <- HighFreq::run_mean(closep, lambda=lambdaf, weightv=weightv)
#' # Plot dygraph of the EMA
#' datav <- xts(cbind(meanv, meanw), zoo::index(ohlc))
#' colnames(datav) <- c("means trailing", "means weighted")
#' dygraphs::dygraph(datav, main="Trailing Means") %>%
#'   dyOptions(colors=c("blue", "red"), strokeWidth=2) %>%
#'   dyLegend(show="always", width=300)
#' }
#' 
#' @export
run_mean <- function(tseries, lambda, weightv = 0L) {
    .Call(`_HighFreq_run_mean`, tseries, lambda, weightv)
}

#' Calculate the trailing maximum values of streaming \emph{time series} data
#' using an online recursive formula.
#' 
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#' 
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'   
#' @return A \emph{matrix} with the same dimensions as the input argument
#'   \code{tseries}.
#'
#' @details
#'   The function \code{run_max()} calculates the trailing maximum values of
#'   streaming \emph{time series} data by recursively weighting present and past
#'   values using the decay factor \eqn{\lambda}.
#'
#'   It calculates the trailing maximum values \eqn{p^{max}_t} of the streaming
#'   data \eqn{p_t} as follows:
#'   \deqn{
#'     p^{max}_t = max(p_t, \lambda p^{max}_{t-1} + (1-\lambda) p_t)
#'   }
#'   The first term in the sum is the maximum value multiplied by the decay
#'   factor \eqn{\lambda}, so that the past maximum value is gradually
#'   "forgotten". The second term pulls the maximum value to the current value
#'   \eqn{p_t}.
#'   
#'   The value of the decay factor \eqn{\lambda} must be in the range between
#'   \code{0} and \code{1}.  
#'   If \eqn{\lambda} is close to \code{1} then the past maximum values persist
#'   for longer.  This is equivalent to a long look-back interval.
#'   If \eqn{\lambda} is much less than \code{1} then the past maximum values
#'   decay quickly, and the trailing maximum depends on the more recent
#'   streaming data.  This is equivalent to a short look-back interval.
#' 
#'   The above formula can also be expressed as:
#'   \deqn{
#'     p^{max}_t = \lambda max(p_t, p^{max}_{t-1}) + (1-\lambda) p_t
#'   }
#'   The first term is the maximum value multiplied by the decay factor
#'   \eqn{\lambda}, so that the past maximum value is gradually "forgotten".
#'   The second term pulls the maximum value to the current value \eqn{p_t}.
#'   
#'   The above recursive formula is convenient for processing live streaming
#'   data because it doesn't require maintaining a buffer of past data.
#' 
#'   The function \code{run_max()} returns a \emph{matrix} with the same
#'   dimensions as the input argument \code{tseries}.
#'   
#' @examples
#' \dontrun{
#' # Calculate historical prices
#' closep <- zoo::coredata(quantmod::Cl(rutils::etfenv$VTI))
#' # Calculate the trailing maximums
#' lambdaf <- 0.9
#' pricmax <- HighFreq::run_max(closep, lambda=lambdaf)
#' # Plot dygraph of VTI prices and trailing maximums
#' datav <- cbind(quantmod::Cl(rutils::etfenv$VTI), pricmax)
#' colnames(datav) <- c("prices", "max")
#' colnamev <- colnames(datav)
#' dygraphs::dygraph(datav, main="VTI Prices and Trailing Maximums") %>%
#'   dySeries(label=colnamev[1], strokeWidth=2, col="blue") %>%
#'   dySeries(label=colnamev[2], strokeWidth=2, col="red")
#' }
#' 
#' @export
run_max <- function(tseries, lambda) {
    .Call(`_HighFreq_run_max`, tseries, lambda)
}

#' Calculate the trailing minimum values of streaming \emph{time series} data
#' using an online recursive formula.
#' 
#' @param \code{tseries} A \emph{time series} or a \emph{matrix}.
#' 
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'   
#' @return A \emph{matrix} with the same dimensions as the input argument
#'   \code{tseries}.
#'
#' @details
#'   The function \code{run_min()} calculates the trailing minimum values of
#'   streaming \emph{time series} data by recursively weighting present and past
#'   values using the decay factor \eqn{\lambda}.
#'
#'   It calculates the trailing minimum values \eqn{p^{min}_t} of the streaming
#'   data \eqn{p_t} as follows:
#'   \deqn{
#'     p^{min}_t = min(p_t, \lambda p^{min}_{t-1} + (1-\lambda) p_t)
#'   }
#'   The first term in the sum is the minimum value multiplied by the decay
#'   factor \eqn{\lambda}, so that the past minimum value is gradually
#'   "forgotten". The second term pulls the minimum value to the current value
#'   \eqn{p_t}.
#'   
#'   The value of the decay factor \eqn{\lambda} must be in the range between
#'   \code{0} and \code{1}.  
#'   If \eqn{\lambda} is close to \code{1} then the past minimum values persist
#'   for longer.  This is equivalent to a long look-back interval.
#'   If \eqn{\lambda} is much less than \code{1} then the past minimum values
#'   decay quickly, and the trailing minimum depends on the more recent
#'   streaming data.  This is equivalent to a short look-back interval.
#' 
#'   The above formula can also be expressed as:
#'   \deqn{
#'     p^{min}_t = \lambda min(p_t, p^{min}_{t-1}) + (1-\lambda) p_t
#'   }
#'   The first term is the minimum value multiplied by the decay factor
#'   \eqn{\lambda}, so that the past minimum value is gradually "forgotten".
#'   The second term pulls the minimum value to the current value \eqn{p_t}.
#'   
#'   The above recursive formula is convenient for processing live streaming
#'   data because it doesn't require maintaining a buffer of past data.
#' 
#'   The function \code{run_min()} returns a \emph{matrix} with the same
#'   dimensions as the input argument \code{tseries}.
#'   
#' @examples
#' \dontrun{
#' # Calculate historical prices
#' closep <- zoo::coredata(quantmod::Cl(rutils::etfenv$VTI))
#' # Calculate the trailing minimums
#' lambdaf <- 0.9
#' pricmin <- HighFreq::run_min(closep, lambda=lambdaf)
#' # Plot dygraph of VTI prices and trailing minimums
#' datav <- cbind(quantmod::Cl(rutils::etfenv$VTI), pricmin)
#' colnames(datav) <- c("prices", "min")
#' colnamev <- colnames(datav)
#' dygraphs::dygraph(datav, main="VTI Prices and Trailing Minimums") %>%
#'   dySeries(label=colnamev[1], strokeWidth=1, col="blue") %>%
#'   dySeries(label=colnamev[2], strokeWidth=1, col="red")
#' }
#' 
#' @export
run_min <- function(tseries, lambda) {
    .Call(`_HighFreq_run_min`, tseries, lambda)
}

#' Calculate the trailing variance of streaming \emph{time series} of data
#' using an online recursive formula.
#' 
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#' 
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'   
#' @return A \emph{matrix} with the same dimensions as the input argument
#'   \code{tseries}.
#'
#' @details
#'   The function \code{run_var()} calculates the trailing variance of
#'   streaming \emph{time series} of data \eqn{r_t}, by recursively weighting
#'   the past variance estimates \eqn{\sigma^2_{t-1}}, with the squared
#'   differences of the data minus its trailing means \eqn{(r_t -
#'   \bar{r}_t)^2}, using the decay factor \eqn{\lambda}:
#'   \deqn{
#'     \bar{r}_t = \lambda \bar{r}_{t-1} + (1-\lambda) r_t
#'   }
#'   \deqn{
#'     \sigma^2_t = \lambda \sigma^2_{t-1} + (1-\lambda) (r_t - \bar{r}_t)^2
#'   }
#'   Where \eqn{r_t} are the streaming data, \eqn{\bar{r}_t} are the trailing
#'   means, and \eqn{\sigma^2_t} are the trailing variance estimates.
#' 
#'   The above online recursive formulas are convenient for processing live
#'   streaming data because they don't require maintaining a buffer of past
#'   data.
#'   The formulas are equivalent to a convolution with exponentially decaying
#'   weights, but they're much faster to calculate.
#'   Using exponentially decaying weights is more natural than using a sliding
#'   look-back interval, because it gradually "forgets" about the past data.
#' 
#'   The value of the decay factor \eqn{\lambda} must be in the range between
#'   \code{0} and \code{1}.  
#'   If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#'   values have a greater weight, and the trailing variance values have a
#'   stronger dependence on past data.  This is equivalent to a long
#'   look-back interval.
#'   If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#'   past values have a smaller weight, and the trailing variance values have a
#'   weaker dependence on past data.  This is equivalent to a short look-back
#'   interval.
#' 
#'   The function \code{run_var()} performs the same calculation as the
#'   standard \code{R} function\cr\code{stats::filter(x=series,
#'   filter=weightv, method="recursive")}, but it's several times faster.
#' 
#'   The function \code{run_var()} returns a \emph{matrix} with the same
#'   dimensions as the input argument \code{tseries}.
#'   
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- zoo::coredata(na.omit(rutils::etfenv$returns$VTI))
#' # Calculate the trailing variance
#' lambdaf <- 0.9
#' vars <- HighFreq::run_var(retp, lambda=lambdaf)
#' # Calculate centered returns
#' retc <- (retp - HighFreq::run_mean(retp, lambda=lambdaf))
#' # Calculate the trailing variance using R code
#' retc2 <- (1-lambdaf)*filter(retc^2, filter=lambdaf, 
#'   init=as.numeric(retc[1, 1])^2/(1-lambdaf), 
#'   method="recursive")
#' all.equal(vars, unclass(retc2), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::run_var(retp, lambda=lambdaf),
#'   Rcode=filter(retc^2, filter=lambdaf, init=as.numeric(retc[1, 1])^2/(1-lambdaf), method="recursive"),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
run_var <- function(tseries, lambda) {
    .Call(`_HighFreq_run_var`, tseries, lambda)
}

#' Calculate the trailing means, volatilities, and z-scores of a streaming
#' \emph{time series} of data using an online recursive formula.
#' 
#' @param \code{tseries} A single \emph{time series} or a single column
#'   \emph{matrix} of data.
#' 
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'   
#' @return A \emph{matrix} with three columns (means, volatilities, and
#'   z-scores) and the same number of rows as the input argument
#'   \code{tseries}.
#'
#' @details
#'   The function \code{run_zscores()} calculates the trailing means,
#'   volatilities, and z-scores of a single streaming \emph{time series} of
#'   data \eqn{r_t}, by recursively weighting the past variance estimates
#'   \eqn{\sigma^2_{t-1}}, with the squared differences of the data minus its
#'   trailing means \eqn{(r_t - \bar{r}_t)^2}, using the decay factor
#'   \eqn{\lambda}:
#'   \deqn{
#'     \bar{r}_t = \lambda \bar{r}_{t-1} + (1-\lambda) r_t
#'   }
#'   \deqn{
#'     \sigma^2_t = \lambda \sigma^2_{t-1} + (1-\lambda) (r_t - \bar{r}_t)^2
#'   }
#'   \deqn{
#'     z_t = \frac{r_t - \bar{r}_t}{\sigma_t}
#'   }
#'   Where \eqn{r_t} are the streaming data, \eqn{\bar{r}_t} are the trailing
#'   means, \eqn{\sigma^2_t} are the trailing variance estimates, and \eqn{z_t}
#'   are the z-scores.
#'   
#'   The above online recursive formulas are convenient for processing live
#'   streaming data because they don't require maintaining a buffer of past
#'   data.
#'   The formulas are equivalent to a convolution with exponentially decaying
#'   weights, but they're much faster to calculate.
#'   Using exponentially decaying weights is more natural than using a sliding
#'   look-back interval, because it gradually "forgets" about the past data.
#' 
#'   The value of the decay factor \eqn{\lambda} must be in the range between
#'   \code{0} and \code{1}.  
#'   If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#'   values have a greater weight, and the trailing variance values have a
#'   stronger dependence on past data.  This is equivalent to a long
#'   look-back interval.
#'   If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#'   past values have a smaller weight, and the trailing variance values have a
#'   weaker dependence on past data.  This is equivalent to a short look-back
#'   interval.
#' 
#'   The function \code{run_zscores()} returns a \emph{matrix} with three
#'   columns (means, volatilities, and z-scores) and the same number of rows as
#'   the input argument \code{tseries}.
#'   
#' @examples
#' \dontrun{
#' # Calculate historical VTI log prices
#' pricev <- log(na.omit(rutils::etfenv$prices$VTI))
#' # Calculate the trailing variance and z-scores of prices
#' lambdaf <- 0.9 # Decay factor
#' zscores <- HighFreq::run_zscores(pricev, lambda=lambdaf)
#' datav <- cbind(pricev, zscores[, 3])
#' colnames(datav) <- c("VTI", "Zscores")
#' colnamev <- colnames(datav)
#' dygraphs::dygraph(datav, main="VTI Prices and Z-scores") %>%
#'    dyAxis("y", label=colnamev[1], independentTicks=TRUE) %>%
#'    dyAxis("y2", label=colnamev[2], independentTicks=TRUE) %>%
#'    dySeries(axis="y", label=colnamev[1], strokeWidth=2, col="blue") %>%
#'    dySeries(axis="y2", label=colnamev[2], strokeWidth=2, col="red") %>%
#'    dyLegend(show="always", width=300)
#' }
#' 
#' @export
run_zscores <- function(tseries, lambda) {
    .Call(`_HighFreq_run_zscores`, tseries, lambda)
}

#' Calculate the trailing variance of streaming \emph{OHLC} price data using an
#' online recursive formula.
#' 
#' @param \code{ohlc} A \emph{time series} or a \emph{matrix} with \emph{OHLC}
#'   price data.
#'   
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'
#' @return A single-column \emph{matrix} of variance estimates, with the same
#'   number of rows as the input \code{ohlc} price data.
#'
#' @details
#'   The function \code{run_var_ohlc()} calculates a single-column
#'   \emph{matrix} of variance estimates of streaming \emph{OHLC} price data.
#'   
#'   The function \code{run_var_ohlc()} calculates the variance from the
#'   differences between the \emph{Open}, \emph{High}, \emph{Low}, and
#'   \emph{Close} prices, using the \emph{Yang-Zhang} range volatility
#'   estimator:
#'   \deqn{
#'     \sigma^2_t = (1-\lambda) ((O_t - C_{t-1})^2 + 0.134 (C_t - O_t)^2 + 
#'     0.866 ((H_i - O_i) (H_i - C_i) + (L_i - O_i) (L_i - C_i))) + 
#'     \lambda \sigma^2_{t-1}
#'   }
#'   It recursively weighs the current variance estimate with the past
#'   estimates \eqn{\sigma^2_{t-1}}, using the decay factor \eqn{\lambda}.
#'
#'   The above recursive formula is convenient for processing live streaming
#'   data because it doesn't require maintaining a buffer of past data.
#'   The formula is equivalent to a convolution with exponentially decaying
#'   weights, but it's faster.
#'   Using exponentially decaying weights is more natural than using a sliding
#'   look-back interval, because it gradually "forgets" about the past data.
#'   
#'   The function \code{run_var_ohlc()} does not calculate the logarithm of
#'   the prices.
#'   So if the argument \code{ohlc} contains dollar prices then
#'   \code{run_var_ohlc()} calculates the dollar variance.
#'   If the argument \code{ohlc} contains the log prices then
#'   \code{run_var_ohlc()} calculates the percentage variance.
#'   
#'   The function \code{run_var_ohlc()} is implemented in \code{RcppArmadillo}
#'   \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Extract the log OHLC prices of VTI
#' ohlc <- log(rutils::etfenv$VTI)
#' # Calculate the trailing variance
#' vart <- HighFreq::run_var_ohlc(ohlc, lambda=0.8)
#' # Calculate the rolling variance
#' varoll <- HighFreq::roll_var_ohlc(ohlc, lookb=5, method="yang_zhang", scale=FALSE)
#' datav <- cbind(vart, varoll)
#' colnames(datav) <- c("trailing", "rolling")
#' colnamev <- colnames(datav)
#' datav <- xts::xts(datav, index(ohlc))
#' # dygraph plot of VTI trailing versus rolling volatility
#' dygraphs::dygraph(sqrt(datav[-(1:111), ]), main="Trailing and Rolling Volatility of VTI") %>%
#'   dyOptions(colors=c("red", "blue"), strokeWidth=2) %>%
#'   dyLegend(show="always", width=300)
#' # Compare the speed of trailing versus rolling volatility
#' library(microbenchmark)
#' summary(microbenchmark(
#'   trailing=HighFreq::run_var_ohlc(ohlc, lambda=0.8),
#'   rolling=HighFreq::roll_var_ohlc(ohlc, lookb=5, method="yang_zhang", scale=FALSE),
#'   times=10))[, c(1, 4, 5)]
#' }
#' @export
run_var_ohlc <- function(ohlc, lambda) {
    .Call(`_HighFreq_run_var_ohlc`, ohlc, lambda)
}

#' Calculate the correlation matrix from the covariance matrix.
#' 
#' @param \code{covmat} A \emph{matrix} of covariances.
#' 
#' @return Void (no return value - modifies the covariance matrix in place).
#' 
#' @details
#'   The function \code{push_cov2cor()} calculates the correlation matrix from
#'   the covariance matrix, in place, without copying the data in memory.
#'   
#'   The function \code{push_cov2cor()} accepts a \emph{pointer} to the
#'   covariance matrix, and it overwrites it with the correlation matrix.
#'
#'   The function \code{push_cov2cor()} is written in \code{RcppArmadillo}
#'   \code{C++} so it's much faster than \code{R} code.
#'   
#' @examples
#' \dontrun{
#' # Calculate a time series of returns
#' retp <- na.omit(rutils::etfenv$returns[, c("IEF", "VTI", "DBC")])
#' # Calculate the covariance matrix of returns
#' covmat <- cov(retp)
#' # Calculate the correlation matrix of returns
#' push_cov2cor(covmat)
#' all.equal(covmat, cor(retp))
#' }
#' 
#' @export
push_cov2cor <- function(covmat) {
    invisible(.Call(`_HighFreq_push_cov2cor`, covmat))
}

#' Update the trailing covariance matrix of streaming asset returns,
#' with a row of new returns using an online recursive formula.
#' 
#' @param \code{retsn} A \emph{vector} of new asset returns.
#' 
#' @param \code{covmat} A trailing covariance \emph{matrix} of asset returns.
#' 
#' @param \code{meanv} A \emph{vector} of trailing means of asset returns.
#' 
#' @param \code{lambdacov} A decay factor which multiplies the past covariance.
#' 
#' @return Void (no return value - modifies the trailing covariance matrix
#'   and the return means in place).
#' 
#' @details
#'   The function \code{push_covar()} updates the trailing covariance matrix of
#'   streaming asset returns, with a row of new returns.  It updates the
#'   covariance matrix in place, without copying the data in memory.
#'   
#'   The streaming asset returns \eqn{r_t} contain multiple columns and the
#'   parameter \code{retsn} represents a single row of \eqn{r_t} - the asset
#'   returns at time \eqn{t}.  The elements of the vectors \code{retsn} and 
#'   \code{meanv} represent single rows of data with multiple columns.
#'   
#'   The function \code{push_covar()} accepts \emph{pointers} to the arguments
#'   \code{covmat} and \code{meanv}, 
#'   and it overwrites the old values with the new values. It performs the
#'   calculation in place, without copying the data in memory, which can
#'   significantly increase the computation speed for large matrices.
#'
#'   First, the function \code{push_covar()} updates the trailing means
#'   \eqn{\bar{r}_t} of the streaming asset returns \eqn{r_t} by recursively
#'   weighting present and past values using the decay factor \eqn{\lambda}:
#'   \deqn{
#'     \bar{r}_t = \lambda \bar{r}_{t-1} + (1-\lambda) r_t
#'   }
#'   This recursive formula is equivalent to the exponentially weighted moving
#'   average of the streaming asset returns \eqn{r_t}.
#'
#'   It then calculates the demeaned returns:
#'   \deqn{
#'     \hat{r}_t = r_t - \bar{r}_t
#'   }
#'   
#'   Finally, it updates the trailing covariance matrix of the returns:
#'   \deqn{
#'     {cov}_t = \lambda {cov}_{t-1} + (1-\lambda) \hat{r}^T_t \hat{r}_t
#'   }
#'   
#'   The decay factor \eqn{\lambda} determines the strength of the updates,
#'   with smaller \eqn{\lambda} values giving more weight to the new data. If
#'   the asset returns are not stationary, then applying more weight to the new
#'   returns reduces the bias of the trailing covariance matrix, but it also
#'   increases its variance. Simulation can be used to find the value of the
#'   \eqn{\lambda} parameter to achieve the best bias-variance tradeoff.
#'   
#'   The function \code{push_covar()} is written in \code{RcppArmadillo}
#'   \code{C++} so it's much faster than \code{R} code.
#'   
#' @examples
#' \dontrun{
#' # Calculate a time series of returns
#' retp <- na.omit(rutils::etfenv$returns[, c("IEF", "VTI", "DBC")])
#' # Calculate the returns without last row
#' nrows <- NROW(retp)
#' retss <- retp[-nrows]
#' # Calculate the covariance of returns
#' meanv <- colMeans(retss)
#' covmat <- cov(retss)
#' # Update the covariance of returns
#' HighFreq::push_covar(retsn=retp[nrows], covmat=covmat, meanv=meanv, lambdacov=0.9)
#' }
#' 
#' @export
push_covar <- function(retsn, covmat, meanv, lambdacov) {
    invisible(.Call(`_HighFreq_push_covar`, retsn, covmat, meanv, lambdacov))
}

#' Update the trailing eigen values and eigen vectors of streaming asset return
#' data, with a row of new returns.
#' 
#' @param \code{retsn} A \emph{vector} of new asset returns.
#' 
#' @param \code{covmat} A trailing covariance \emph{matrix} of asset returns.
#' 
#' @param \code{eigenval} A \emph{vector} of eigen values.
#' 
#' @param \code{eigenvec} A \emph{matrix} of eigen vectors.
#' 
#' @param \code{eigenret} A \emph{vector} of eigen portfolio returns.
#' 
#' @param \code{meanv} A \emph{vector} of trailing means of asset returns.
#' 
#' @param \code{lambdacov} A decay factor which multiplies the past covariance.
#' 
#' @return Void (no return value - modifies the trailing eigen values, eigen
#'   vectors, the eigen portfolio returns, and the return means in place).
#' 
#' @details
#'   The function \code{push_eigen()} updates the trailing eigen values, eigen
#'   vectors, and the eigen portfolio returns of streaming asset returns, with
#'   a row of new data.  It updates the eigenelements in place, without copying
#'   the data in memory.
#'   
#'   The streaming asset returns \eqn{r_t} contain multiple columns and the
#'   parameter \code{retsn} represents a single row of \eqn{r_t} - the asset
#'   returns at time \eqn{t}.  The elements of the vectors \code{retsn},
#'   \code{eigenret}, and \code{meanv} represent single rows of data with
#'   multiple columns.
#'   
#'   The function \code{push_eigen()} accepts \emph{pointers} to the arguments
#'   \code{eigenval}, \code{eigenval}, \code{eigenvec}, \code{meanv}, and
#'   \code{eigenret}, and it overwrites the old values with the new values. It
#'   performs the calculation in place, without copying the data in memory,
#'   which can significantly increase the computation speed for large matrices.
#'
#'   First, the function \code{push_eigen()} calls the function
#'   \code{HighFreq::push_covar()} to update the trailing covariance matrix of
#'   streaming asset returns, with a row of new returns.  It updates the
#'   covariance matrix in place, without copying the data in memory.
#'   
#'   It then calls the \code{Armadillo} function \code{arma::eig_sym} to
#'   calculate the eigen decomposition of the trailing covariance matrix.
#'   
#'   The function \code{push_eigen()} calculates the eigen portfolio returns by
#'   multiplying the scaled asset returns times the eigen vectors
#'   \eqn{\strong{v}_{t-1}}:
#'   \deqn{
#'     r^{eigen}_t = \strong{v}_{t-1} \frac{r_t}{\sigma_{t-1}}
#'   }
#'   Where \eqn{\strong{v}_{t-1}} is the matrix of previous eigen vectors that
#'   are passed by reference through the parameter \code{eigenvec}. The eigen
#'   returns \eqn{r^{eigen}_t} are the returns of the eigen portfolios, with
#'   weights equal to the eigen vectors \eqn{\strong{v}_{t-1}}. The eigen
#'   weights are applied to the asset returns scaled by their volatilities.
#'   The eigen returns \eqn{r^{eigen}_t} are passed by reference through the
#'   parameter \code{eigenret}. 
#'   
#'   The decay factor \eqn{\lambda} determines the strength of the updates,
#'   with smaller \eqn{\lambda} values giving more weight to the new data. If
#'   the asset returns are not stationary, then applying more weight to the new
#'   returns reduces the bias of the trailing covariance matrix, but it also
#'   increases its variance. Simulation can be used to find the value of the
#'   \eqn{\lambda} parameter to achieve the best bias-variance tradeoff.
#'   
#'   The function \code{push_eigen()} is written in \code{RcppArmadillo}
#'   \code{C++} so it's much faster than \code{R} code.
#'   
#' @examples
#' \dontrun{
#' # Calculate a time series of returns
#' retp <- na.omit(rutils::etfenv$returns[, c("IEF", "VTI", "DBC")])
#' # Calculate the returns without the last row
#' nrows <- NROW(retp)
#' retss <- retp[-nrows]
#' # Calculate the previous covariance of returns
#' meanv <- colMeans(retss)
#' covmat <- cov(retss)
#' # Update the covariance of returns
#' eigenret <- numeric(NCOL(retp))
#' HighFreq::push_eigen(retsn=retp[nrows], covmat=covmat, 
#'   eigenval=eigenval, eigenvec=eigenvec, 
#'   eigenret=eigenret, meanv=meanv, lambdacov=0.9)
#' }
#' 
#' @export
push_eigen <- function(retsn, covmat, eigenval, eigenvec, eigenret, meanv, lambdacov) {
    invisible(.Call(`_HighFreq_push_eigen`, retsn, covmat, eigenval, eigenvec, eigenret, meanv, lambdacov))
}

#' Update the trailing eigen values and eigen vectors of streaming asset return
#' data, with a row of new returns, using the \emph{SGA} algorithm.
#' 
#' @param \code{retsn} A \emph{vector} of new asset returns.
#' 
#' @param \code{eigenval} A \emph{vector} of eigen values.
#' 
#' @param \code{eigenvec} A \emph{matrix} of eigen vectors.
#' 
#' @param \code{eigenret} A \emph{vector} of eigen portfolio returns.
#' 
#' @param \code{meanv} A \emph{vector} of trailing means of asset returns.
#' 
#' @param \code{varv} A \emph{vector} of the trailing asset variances.
#' 
#' @param \code{lambda} A decay factor which multiplies the past mean and
#'   variance.
#' 
#' @param \code{gamma} A \emph{numeric} gain factor which multiplies the past
#'   eigenelements.
#' 
#' @return Void (no return value - modifies the trailing eigen values, eigen
#'   vectors, the return means, and the return variances in place).
#' 
#' @details
#'   The function \code{push_sga()} updates the trailing eigen values, eigen
#'   vectors, and the eigen portfolio returns of streaming asset returns, with
#'   a row of new data, using the \emph{SGA} algorithm. It updates the
#'   eigenelements in place, without copying the data in memory.
#'   
#'   The streaming asset returns \eqn{r_t} contain multiple columns and the
#'   parameter \code{retsn} represents a single row of \eqn{r_t} - the asset
#'   returns at time \eqn{t}.  The elements of the vectors \code{retsn},
#'   \code{meanv}, and \code{varv} represent single rows of data with multiple
#'   columns.
#'   
#'   The function \code{push_sga()} accepts \emph{pointers} to the arguments
#'   \code{eigenval}, \code{eigenvec}, \code{meanv}, and \code{varv},
#'   and it overwrites the old values with the new values. It performs the
#'   calculation in place, without copying the data in memory, which can
#'   significantly increase the computation speed for large matrices.
#'
#'   First, the function \code{push_sga()} updates the trailing means
#'   \eqn{\bar{r}_t} and variances \eqn{\sigma^2_t} of the streaming asset
#'   returns \eqn{r_t} by recursively weighting present and past values
#'   using the decay factor \eqn{\lambda}:
#'   \deqn{
#'     \bar{r}_t = \lambda \bar{r}_{t-1} + (1-\lambda) r_t
#'   }
#'   \deqn{
#'     \sigma^2_t = \lambda \sigma^2_{t-1} + (1-\lambda) (r_t - \bar{r}_t)^2
#'   }
#'   The past values \eqn{\bar{r}_{t-1}} and \eqn{\sigma^2_{t-1}} are passed in
#'   by reference through the variables \code{meanv} and \code{varv}. The
#'   updated values are then passed out by reference.
#'
#'   These recursive formulas are equivalent to the exponentially weighted
#'   moving averages of the streaming asset returns \eqn{r_t}.
#'
#'   It then calculates a vector of the eigen portfolio returns:
#'   \deqn{
#'     r^{eigen}_t = \strong{v}_{t-1} \frac{r_t}{\sigma_{t-1}}
#'   }
#'   Where \eqn{\strong{v}_{t-1}} is the matrix of previous eigen vectors that
#'   are passed by reference through the parameter \code{eigenvec}. The eigen
#'   returns \eqn{r^{eigen}_t} are the returns of the eigen portfolios, with
#'   weights equal to the eigen vectors \eqn{\strong{v}_{t-1}}. The eigen
#'   weights are applied to the asset returns scaled by their volatilities.
#'   The eigen returns \eqn{r^{eigen}_t} are passed by reference through the
#'   parameter \code{eigenret}. 
#'   
#'   The function \code{push_sga()} then standardizes the columns of the new
#'   returns:
#'   \deqn{
#'     \hat{r}_t = \frac{r_t - \bar{r}_t}{\sigma_t}
#'   }
#'   
#'   Finally, the vector of eigen values \eqn{\Lambda_{j, t}} and the matrix of
#'   eigen vectors \eqn{\strong{v}_{j, t}} (\eqn{j} is the column index) are
#'   then updated using the \emph{SGA} algorithm:
#'   \deqn{
#'     \Lambda_{j, t} = (1-\gamma) \Lambda_{j, t-1} + \gamma \phi_{j, t-1}
#'   }
#'   \deqn{
#'     \strong{v}_{j, t} = \strong{v}_{j, t-1} + \gamma \phi_{j, t-1} (\hat{r}_{t} - \phi_{j, t-1} \strong{v}_{j, t-1} - 2 \sum_{i=1}^{j-1} \phi_{i, t-1} \strong{v}_{i, t-1})
#'   }
#'   Where \eqn{\phi_{j, t-1} = \hat{r}_{t} \strong{v}_{j, t-1}} are the matrix
#'   products of the new data times the previous eigen vectors. 
#'   
#'   The gain factor \eqn{\gamma} determines the strength of the updates, with
#'   larger \eqn{\gamma} values giving more weight to the new data. If the
#'   asset returns are not stationary, then applying more weight to the new
#'   returns reduces the bias of the trailing eigen vectors, but it also
#'   increases their variance. Simulation can be used to find the value of the
#'   \eqn{\gamma} parameter to achieve the best bias-variance tradeoff.
#'   
#'   A description of the \emph{SGA} algorithm can be found in the package
#'   \href{https://cran.r-project.org/web/packages/onlinePCA/index.html}{onlinePCA} and in the 
#'   \href{https://paperswithcode.com/paper/online-principal-component-analysis-in-high}{Online PCA paper}.
#'   
#'   The function \code{push_sga()} is written in \code{RcppArmadillo}
#'   \code{C++} code and it calls the \code{Armadillo} function
#'   \code{arma::qr_econ()} to perform the QR decomposition, to calculate the
#'   eigen vectors.
#'   
#' @examples
#' \dontrun{
#' # Calculate a time series of returns
#' retp <- na.omit(rutils::etfenv$returns[, c("IEF", "VTI", "DBC")])
#' # Calculate the covariance of returns without the last row
#' nrows <- NROW(retp)
#' retss <- retp[-nrows]
#' HighFreq::calc_scale(retss)
#' meanv <- colMeans(retss)
#' varv <- sapply(retss, var)
#' covmat <- cov(retss)
#' ncols <- NCOL(retss)
#' # Calculate the eigen decomposition using RcppArmadillo
#' eigenval <- numeric(ncols) # Allocate eigen values
#' eigenvec <- matrix(numeric(ncols^2), nc=ncols) # Allocate eigen vectors
#' HighFreq::calc_eigen(covmat, eigenval, eigenvec)
#' # Update the eigen decomposition using SGA
#' eigenret <- numeric(NCOL(retp))
#' HighFreq::push_sga(retsn=retp[nrows], 
#'   eigenval=eigenval, eigenvec=eigenvec, 
#'   eigenret=eigenret, meanv=meanv, varv=varv, lambda=0.9, gamma=0.1)
#' }
#' 
#' @export
push_sga <- function(retsn, eigenval, eigenvec, eigenret, meanv, varv, lambda, gamma) {
    invisible(.Call(`_HighFreq_push_sga`, retsn, eigenval, eigenvec, eigenret, meanv, varv, lambda, gamma))
}

#' Calculate the trailing covariances of two streaming \emph{time series} of
#' returns using an online recursive formula.
#' 
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} with two
#'   columns of returns data.
#' 
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'   
#' @return A \emph{matrix} with five columns of data: the trailing covariances,
#'   the variances, and the mean values of the two columns of the argument
#'   \code{tseries}.
#'
#' @details
#'   The function \code{run_covar()} calculates the trailing covariances of two
#'   streaming \emph{time series} of returns, by recursively weighting the past
#'   covariance estimates \eqn{{cov}_{t-1}}, with the products of their
#'   returns minus their means, using the decay factor \eqn{\lambda}:
#'   \deqn{
#'     \bar{x}_t = \lambda \bar{x}_{t-1} + (1-\lambda) x_t
#'   }
#'   \deqn{
#'     \bar{y}_t = \lambda \bar{y}_{t-1} + (1-\lambda) y_t
#'   }
#'   \deqn{
#'     \sigma^2_{x t} = \lambda \sigma^2_{x t-1} + (1-\lambda) (x_t - \bar{x}_t)^2
#'   }
#'   \deqn{
#'     \sigma^2_{y t} = \lambda \sigma^2_{y t-1} + (1-\lambda) (y_t - \bar{y}_t)^2
#'   }
#'   \deqn{
#'     {cov}_t = \lambda {cov}_{t-1} + (1-\lambda) (x_t - \bar{x}_t) (y_t - \bar{y}_t)
#'   }
#'   Where \eqn{{cov}_t} is the trailing covariance estimate at time \eqn{t},
#'   \eqn{\sigma^2_{x t}}, \eqn{\sigma^2_{y t}}, \eqn{\bar{x}_t} and
#'   \eqn{\bar{x}_t} are the trailing variances and means of the returns, and
#'   \eqn{x_t} and \eqn{y_t} are the two streaming returns data.
#' 
#'   The above online recursive formulas are convenient for processing live
#'   streaming data because they don't require maintaining a buffer of past
#'   data. The formulas are equivalent to a convolution with exponentially
#'   decaying weights, but they're much faster to calculate.
#'   Using exponentially decaying weights is more natural than using a sliding
#'   look-back interval, because it gradually "forgets" about the past data.
#' 
#'   The value of the decay factor \eqn{\lambda} must be in the range between
#'   \code{0} and \code{1}.  
#'   If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#'   values have a greater weight, and the trailing covariance values have a
#'   stronger dependence on past data.  This is equivalent to a long
#'   look-back interval.
#'   If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#'   past values have a smaller weight, and the trailing covariance values have
#'   a weaker dependence on past data.  This is equivalent to a short
#'   look-back interval.
#' 
#'   The function \code{run_covar()} returns five columns of data: the trailing 
#'   covariances, the variances, and the mean values of the two columns of the
#'   argument \code{tseries}.  This allows calculating the trailing
#'   correlations, betas, and alphas.
#' 
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- zoo::coredata(na.omit(rutils::etfenv$returns[, c("IEF", "VTI")]))
#' # Calculate the trailing covariance
#' lambdaf <- 0.9
#' covars <- HighFreq::run_covar(retp, lambda=lambdaf)
#' # Calculate the trailing correlation
#' correl <- covars[, 1]/sqrt(covars[, 2]*covars[, 3])
#' # Calculate the trailing covariance using R code
#' nrows <- NROW(retp)
#' retm <- matrix(numeric(2*nrows), nc=2)
#' retm[1, ] <- retp[1, ]
#' retd <- matrix(numeric(2*nrows), nc=2)
#' covarr <- numeric(nrows)
#' covarr[1] <- retp[1, 1]*retp[1, 2]
#' for (it in 2:nrows) {
#'   retm[it, ] <- lambdaf*retm[it-1, ] + (1-lambdaf)*(retp[it, ])
#'   retd[it, ] <- retp[it, ] - retm[it, ]
#'   covarr[it] <- lambdaf*covarr[it-1] + (1-lambdaf)*retd[it, 1]*retd[it, 2]
#' } # end for
#' all.equal(covars[, 1], covarr, check.attributes=FALSE)
#' }
#' 
#' @export
run_covar <- function(tseries, lambda) {
    .Call(`_HighFreq_run_covar`, tseries, lambda)
}

#' Calculate the trailing autocovariance of a \emph{time series} of returns
#' using an online recursive formula.
#' 
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} with a single
#'   column of returns data.
#' 
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'   
#' @param \code{lagg} An \emph{integer} equal to the number of periods to lag.
#'   (The default is \code{lagg = 1}.)
#'
#' @return A \emph{matrix} with three columns of data: the trailing
#'   autocovariances, the variances, and the mean values of the argument
#'   \code{tseries}.
#'
#' @details
#'   The function \code{run_autocovar()} calculates the trailing
#'   autocovariance of a streaming \emph{time series} of returns, by
#'   recursively weighting the past autocovariance estimates \eqn{{cov}_{t-1}},
#'   with the products of their returns minus their means, using the decay
#'   factor \eqn{\lambda}:
#'   \deqn{
#'     \bar{x}_t = \lambda \bar{x}_{t-1} + (1-\lambda) x_t
#'   }
#'   \deqn{
#'     \sigma^2_{t} = \lambda \sigma^2_{t-1} + (1-\lambda) (x_t - \bar{x}_t)^2
#'   }
#'   \deqn{
#'     {cov}_t = \lambda {cov}_{t-1} + (1-\lambda) (x_t - \bar{x}_t) (x_{t-l} - \bar{x}_{t-l})
#'   }
#'   Where \eqn{{cov}_t} is the trailing autocovariance estimate at time
#'   \eqn{t}, with \code{lagg=l}.
#'   And \eqn{\sigma^2_{t}} and \eqn{\bar{x}_t} are the trailing variances and
#'   means of the streaming data.
#' 
#'   The above online recursive formulas are convenient for processing live
#'   streaming data because they don't require maintaining a buffer of past
#'   data. The formulas are equivalent to a convolution with exponentially
#'   decaying weights, but they're much faster to calculate.
#'   Using exponentially decaying weights is more natural than using a sliding
#'   look-back interval, because it gradually "forgets" about the past data.
#' 
#'   The value of the decay factor \eqn{\lambda} must be in the range between
#'   \code{0} and \code{1}.  
#'   If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#'   values have a greater weight, and the trailing covariance values have a
#'   stronger dependence on past data.  This is equivalent to a long
#'   look-back interval.
#'   If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#'   past values have a smaller weight, and the trailing covariance values have
#'   a weaker dependence on past data.  This is equivalent to a short
#'   look-back interval.
#' 
#'   The function \code{run_autocovar()} returns three columns of data: the
#'   trailing autocovariances, the variances, and the mean values of the
#'   argument \code{tseries}.  This allows calculating the trailing
#'   autocorrelations.
#' 
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- zoo::coredata(na.omit(rutils::etfenv$returns$VTI))
#' # Calculate the trailing autocovariance
#' lambdaf <- 0.9
#' lagg <- 3
#' covars <- HighFreq::run_autocovar(retp, lambda=lambdaf, lagg=lagg)
#' # Calculate the trailing autocorrelation
#' correl <- covars[, 1]/covars[, 2]
#' # Calculate the trailing autocovariance using R code
#' nrows <- NROW(retp)
#' retm <- numeric(nrows)
#' retm[1] <- retp[1, ]
#' retd <- numeric(nrows)
#' covarr <- numeric(nrows)
#' covarr[1] <- retp[1, ]^2
#' for (it in 2:nrows) {
#'   retm[it] <- lambdaf*retm[it-1] + (1-lambdaf)*(retp[it])
#'   retd[it] <- retp[it] - retm[it]
#'   covarr[it] <- lambdaf*covarr[it-1] + (1-lambdaf)*retd[it]*retd[max(it-lagg, 1)]
#' } # end for
#' all.equal(covarr, covars[, 1])
#' }
#' 
#' @export
run_autocovar <- function(tseries, lambda, lagg = 1L) {
    .Call(`_HighFreq_run_autocovar`, tseries, lambda, lagg)
}

#' Perform regressions on the streaming \emph{time series} of response and
#' predictor data, and calculate the regression coefficients, the residuals,
#' and the forecasts, using online recursive formulas.
#' 
#' @param \code{respv} A single-column \emph{time series} or a single-column
#'   \emph{matrix} of response data.
#' 
#' @param \code{predm} A \emph{time series} or a \emph{matrix} of predictor
#'   data.
#' 
#' @param \code{lambda} A decay factor which multiplies past estimates.
#'   
#' @param \code{controlv} A \emph{list} of model parameters (see Details).
#'
#'   
#' @return A \emph{matrix} with the regression coefficients, the residuals, and
#'   the forecasts (in that order - see details), with the same number of rows
#'   as the predictor argument \code{predm}.
#'
#' @details
#'   The function \code{run_reg()} performs regressions on the streaming \emph{time
#'   series} of response \eqn{r_t} and predictor \eqn{p_t} data:
#'   \deqn{
#'     r_t = \beta_t p_t + \epsilon_t
#'   }
#'   Where \eqn{\beta_t} are the trailing regression coefficients and
#'   \eqn{\epsilon_t} are the residuals.
#'   
#'   It recursively updates the covariance matrix \eqn{{cov}_t} between the
#'   response and the predictor data, and the covariance matrix
#'   \eqn{{cov}_{pt}} between the predictors, using the decay factor
#'   \eqn{\lambda}:
#'   \deqn{
#'     {cov}_t = \lambda {cov}_{t-1} + (1-\lambda) r^T_t p_t
#'   }
#'   \deqn{
#'     {cov}_{p t} = \lambda {cov}_{p (t-1)} + (1-\lambda) p^T_t p_t
#'   }
#'   
#'   It calculates the regression coefficients \eqn{\beta_t} as equal to the
#'   covariance matrix between the response and the predictor data
#'   \eqn{{cov}_t}, divided by the covariance matrix between the predictors
#'   \eqn{{cov}_{pt}}:
#'   \deqn{
#'     \beta_t = {cov}_t \, {cov}^{-1}_{p t}
#'   }
#'   
#'   It calculates the residuals \eqn{\epsilon_t} as the difference between the
#'   response \eqn{r_t} minus the fitted values \eqn{\beta_t p_t}:
#'   \deqn{
#'     \epsilon_t = r_t - \beta_t p_t
#'   }
#'   
#'   And the residual variance \eqn{\sigma^2_t} as:
#'   \deqn{
#'     \bar{\epsilon}_t = \lambda \bar{\epsilon}_{t-1} + (1-\lambda) \epsilon_t
#'   }
#'   \deqn{
#'     \sigma^2_t = \lambda \sigma^2_{t-1} + (1-\lambda) (\epsilon_t - \bar{\epsilon}_t)^2
#'   }
#' 
#'   It then calculates the regression forecasts \eqn{f_t}, as equal to the
#'   past regression coefficients \eqn{\beta_{t-1}} times the current predictor
#'   data \eqn{p_t}:
#'   \deqn{
#'     f_t = \beta_{t-1} p_t
#'   }
#' 
#'   It finally calculates the forecast errors as the difference between the
#'   response minus the regression forecasts: \eqn{r_t - f_t}.
#' 
#'   The coefficient matrix \eqn{\beta} and the residuals \eqn{\epsilon} have
#'   the same number of rows as the predictor argument \code{predm}.
#'
#'   The function \code{run_reg()} accepts a list of regression model
#'   parameters through the argument \code{controlv}.
#'   The argument \code{controlv} contains the parameters \code{regmod} and
#'   \code{residscale}.
#'   Below is a description of how these parameters work.
#'   The list of model parameters can be created using the function
#'   \code{param_reg()}.  
#'
#'   The number of regression coefficients is equal to the number of columns of
#'   the predictor matrix \code{n}.
#'   If the predictor matrix contains a unit intercept column then the first
#'   regression coefficient is equal to the alpha value \eqn{\alpha}.
#'
#'   If \code{regmod = "least_squares"} (the default) then it performs the
#'   standard least squares regression.  This is currently the only option.
#' 
#'   The \emph{residuals} and the the \emph{forecast errors} may be scaled by
#'   their volatilities to obtain the \emph{z-scores}. 
#'   The default is \code{residscale = "none"} - no scaling.
#'   If the argument \code{residscale = "scale"} then the \emph{residuals}
#'   \eqn{\epsilon_t} are divided by their volatilities \eqn{\sigma_t}
#'   without subtracting their means:
#'   \deqn{
#'     \epsilon_t = \frac{\epsilon_t}{\sigma_t}
#'   }
#'   If the argument \code{residscale = "standardize"} then the residual means
#'   \eqn{\bar{\epsilon}} are subtracted from the \emph{residuals}, and then
#'   they are divided by their volatilities \eqn{\sigma_t}:
#'   \deqn{
#'     \epsilon_t = \frac{\epsilon_t - \bar{\epsilon}}{\sigma_t}
#'   }
#'   Which are equal to the \emph{z-scores}.
#'   
#'   The \emph{forecast errors} are also scaled in the same way as the
#'   \emph{residuals}, according to the argument\code{residscale}.
#' 
#'   The above online recursive formulas are convenient for processing live
#'   streaming data because they don't require maintaining a buffer of past
#'   data.
#'   The above recursive formulas are equivalent to a convolution with
#'   exponentially decaying weights, but they're much faster to calculate.
#'   Using exponentially decaying weights is more natural than using a sliding
#'   look-back interval, because it gradually "forgets" about the past data.
#'
#'   The value of the decay factor \eqn{\lambda} must be in the range between
#'   \code{0} and \code{1}.
#'   If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#'   values have a greater weight, so the trailing values have a greater
#'   dependence on past data.  This is equivalent to a long look-back
#'   interval.
#'   If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#'   past values have a smaller weight, so the trailing values have a weaker
#'   dependence on past data.  This is equivalent to a short look-back
#'   interval.
#' 
#'   The function \code{run_reg()} returns multiple columns of data, with the
#'   same number of rows as the predictor matrix \code{predm}. If the predictor
#'   matrix \code{predm} has \code{n} columns then \code{run_reg()} returns a
#'   matrix with \code{n+2} columns.
#'   The first \code{n} columns contain the regression coefficients (with the
#'   first column equal to the alpha value \eqn{\alpha}).
#'   The last \code{2} columns are the regression residuals and the forecast
#'   errors.
#' 
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("XLF", "VTI", "IEF")])
#' # Response equals XLF returns
#' respv <- retp[, 1]
#' # Predictor matrix equals VTI and IEF returns
#' predm <- retp[, -1]
#' # Add unit intercept column to the predictor matrix
#' predm <- cbind(rep(1, NROW(predm)), predm)
#' # Calculate the trailing regressions
#' lambdaf <- 0.9
#' # Create a list of regression parameters
#' controlv <- HighFreq::param_reg(residscale="standardize")
#' regs <- HighFreq::run_reg(respv=respv, predm=predm, lambda=lambda, controlv=controlv)
#' # Plot the trailing residuals
#' datav <- cbind(cumsum(respv), regs[, NCOL(regs)])
#' colnames(datav) <- c("XLF", "residuals")
#' colnamev <- colnames(datav)
#' dygraphs::dygraph(datav["2008/2009"], main="Residuals of XLF Versus VTI and IEF") %>%
#'   dyAxis("y", label=colnamev[1], independentTicks=TRUE) %>%
#'   dyAxis("y2", label=colnamev[2], independentTicks=TRUE) %>%
#'   dySeries(axis="y", strokeWidth=2, col="blue") %>%
#'   dySeries(axis="y2", strokeWidth=2, col="red") %>%
#'   dyLegend(show="always", width=300)
#' 
#' # Calculate the trailing regressions using R code
#' lambda1 <- (1-lambdaf)
#' respv <- zoo::coredata(respv)
#' predm <- zoo::coredata(predm)
#' nrows <- NROW(predm)
#' ncols <- NCOL(predm)
#' covrespred <- respv[1, ]*predm[1, ]
#' covpred <- outer(predm[1, ], predm[1, ])
#' betas <- matrix(numeric(nrows*ncols), nc=ncols)
#' betas[1, ] <- covrespred %*% MASS::ginv(covpred)
#' resids <- numeric(nrows)
#' residm <- 0
#' residv <- 0
#' for (it in 2:nrows) {
#'  covrespred <- lambdaf*covrespred + lambda1*respv[it, ]*predm[it, ]
#'  covpred <- lambdaf*covpred + lambda1*outer(predm[it, ], predm[it, ])
#'  betas[it, ] <- covrespred %*% MASS::ginv(covpred)
#'  resids[it] <- respv[it, ] - (betas[it, ] %*% predm[it, ])
#'  residm <- lambdaf*residm + lambda1*resids[it]
#'  residv <- lambdaf*residv + lambda1*(resids[it] - residm)^2
#'  resids[it] <- (resids[it] - residm)/sqrt(residv)
#' } # end for
#' # Compare values, excluding warmup period
#' all.equal(regs[-(1:1e3), ], cbind(betas, resids)[-(1:1e3), ], check.attributes=FALSE)
#'  
#' }
#' 
#' @export
run_reg <- function(respv, predm, lambda, controlv) {
    .Call(`_HighFreq_run_reg`, respv, predm, lambda, controlv)
}

#' Calculate the mean (location) of the columns of a \emph{time series} or a
#' \emph{matrix} using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{method} A \emph{character string} specifying the type of the
#'   mean (location) model (the default is \code{method = "moment"} - see
#'   Details).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#'   returns (the default is \code{confl = 0.75}).
#'
#' @return A single-row matrix with the mean (location) of the columns of
#'   \code{tseries}.
#'
#' @details
#'   The function \code{calc_mean()} calculates the mean (location) values of
#'   the columns of the \emph{time series} \code{tseries} using \code{C++}
#'   \code{RcppArmadillo} code.
#'
#'   If \code{method = "moment"} (the default) then \code{calc_mean()}
#'   calculates the location as the mean - the first moment of the data.
#'
#'   If \code{method = "quantile"} then it calculates the location
#'   \eqn{\bar{r}} as the average of the quantiles as follows:
#'   \deqn{
#'     \bar{r} = \frac{q_{\alpha} + q_{1-\alpha}}{2}
#'   }
#'   Where \eqn{\alpha} is the confidence level for calculating the quantiles
#'   (argument \code{confl}).
#'
#'   If \code{method = "nonparametric"} then it calculates the location as the
#'   median.
#'   
#'   The code examples below compare the function \code{calc_mean()} with the
#'   mean (location) calculated using \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("XLP", "VTI")])
#' # Calculate the column means in RcppArmadillo
#' HighFreq::calc_mean(retp)
#' # Calculate the column means in R
#' sapply(retp, mean)
#' # Compare the values
#' all.equal(drop(HighFreq::calc_mean(retp)), 
#'   sapply(retp, mean), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_mean(retp),
#'   Rcode=sapply(retp, mean),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' # Calculate the quantile mean (location)
#' HighFreq::calc_mean(retp, method="quantile", confl=0.9)
#' # Calculate the quantile mean (location) in R
#' colSums(sapply(retp, quantile, c(0.9, 0.1), type=5))
#' # Compare the values
#' all.equal(drop(HighFreq::calc_mean(retp, method="quantile", confl=0.9)), 
#'   colSums(sapply(retp, quantile, c(0.9, 0.1), type=5)), 
#'   check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_mean(retp, method="quantile", confl=0.9),
#'   Rcode=colSums(sapply(retp, quantile, c(0.9, 0.1), type=5)),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' # Calculate the column medians in RcppArmadillo
#' HighFreq::calc_mean(retp, method="nonparametric")
#' # Calculate the column medians in R
#' sapply(retp, median)
#' # Compare the values
#' all.equal(drop(HighFreq::calc_mean(retp, method="nonparametric")), 
#'   sapply(retp, median), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_mean(retp, method="nonparametric"),
#'   Rcode=sapply(retp, median),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
calc_mean <- function(tseries, method = "moment", confl = 0.75) {
    .Call(`_HighFreq_calc_mean`, tseries, method, confl)
}

#' Calculate the variance of a single-column \emph{time series} or a
#' \emph{vector} using \code{RcppArmadillo}.
#' 
#' @param \code{tseries} A single-column \emph{time series} or a \emph{vector}.
#'
#' @return A \emph{numeric} value equal to the variance of the \emph{vector}.
#'
#' @details
#'   The function \code{calc_varvec()} calculates the variance of a
#'   \emph{vector} using \code{RcppArmadillo} \code{C++} code, so it's
#'   significantly faster than the \code{R} function \code{var()}.
#'
#' @examples
#' \dontrun{
#' # Create a vector of random returns
#' retp <- rnorm(1e6)
#' # Compare calc_varvec() with standard var()
#' all.equal(HighFreq::calc_varvec(retp), var(retp))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_varvec(retp),
#'   Rcode=var(retp),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
calc_varvec <- function(tseries) {
    .Call(`_HighFreq_calc_varvec`, tseries)
}

#' Calculate the dispersion (variance) of the columns of a \emph{time series}
#' or a \emph{matrix} using \code{RcppArmadillo}.
#' 
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'   
#' @param \code{method} A \emph{character string} specifying the type of the
#'   dispersion model (the default is \code{method = "moment"} - see Details).
#'    
#' @param \code{confl} The confidence level for calculating the quantiles of
#'   returns (the default is \code{confl = 0.75}).
#'
#' @return A row vector equal to the dispersion of the columns of the matrix
#'   \code{tseries}.
#'
#' @details
#'   The function \code{calc_var()} calculates the dispersion of the
#'   columns of a \emph{time series} or a \emph{matrix} of data using
#'   \code{RcppArmadillo} \code{C++} code.
#'   
#'   The dispersion is a measure of the variability of the data.  Examples of
#'   dispersion are the variance and the Median Absolute Deviation (\emph{MAD}).
#'
#'   If \code{method = "moment"} (the default) then \code{calc_var()}
#'   calculates the dispersion as the second moment of the data (the variance).
#'   Then \code{calc_var()} performs the same calculation as the function
#'   \code{colVars()} from package
#'   \href{https://cran.r-project.org/web/packages/matrixStats/index.html}{matrixStats},
#'   but it's much faster because it uses \code{RcppArmadillo} \code{C++} code.
#'
#'   If \code{method = "quantile"} then it calculates the dispersion as the
#'   difference between the quantiles as follows:
#'   \deqn{
#'     \sigma = q_{\alpha} - q_{1-\alpha}
#'   }
#'   Where \eqn{\alpha} is the confidence level for calculating the quantiles.
#'   
#'   If \code{method = "nonparametric"} then it calculates the dispersion as the
#'   Median Absolute Deviation (\emph{MAD}):
#'   \deqn{
#'     MAD = median(abs(x - median(x)))
#'   }
#'   It also multiplies the \emph{MAD} by a factor of \code{1.4826}, to make it
#'   comparable to the standard deviation.
#'
#'   If \code{method = "nonparametric"} then \code{calc_var()} performs the
#'   same calculation as the function \code{stats::mad()}, but it's much faster
#'   because it uses \code{RcppArmadillo} \code{C++} code.
#'
#'   If the number of rows of \code{tseries} is less than \code{3} then it
#'   returns zeros.
#'   
#' @examples
#' \dontrun{
#' # Calculate VTI and XLF returns
#' retp <- na.omit(rutils::etfenv$returns[, c("VTI", "XLF")])
#' # Compare HighFreq::calc_var() with standard var()
#' all.equal(drop(HighFreq::calc_var(retp)), 
#'   apply(retp, 2, var), check.attributes=FALSE)
#' # Compare HighFreq::calc_var() with matrixStats
#' all.equal(drop(HighFreq::calc_var(retp)), 
#'   matrixStats::colVars(retp), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with matrixStats and with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_var(retp),
#'   matrixStats=matrixStats::colVars(retp),
#'   Rcode=apply(retp, 2, var),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' # Compare HighFreq::calc_var() with stats::mad()
#' all.equal(drop(HighFreq::calc_var(retp, method="nonparametric")), 
#'   sapply(retp, mad), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with stats::mad()
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_var(retp, method="nonparametric"),
#'   Rcode=sapply(retp, mad),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
calc_var <- function(tseries, method = "moment", confl = 0.75) {
    .Call(`_HighFreq_calc_var`, tseries, method, confl)
}

#' Calculate the covariance matrix of the columns of a \emph{time series}
#' using \code{RcppArmadillo}.
#' 
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'   
#' @param \code{method} A \emph{character string} specifying the type of the
#'   covariance model (the default is \code{method = "moment"} - see Details).
#'    
#' @param \code{confl} The confidence level for calculating the quantiles of
#'   returns (the default is \code{confl = 0.75}).
#'
#' @return A square matrix with the covariance coefficients of the columns of
#'   the \emph{time series} \code{tseries}.
#'
#' @details
#'   The function \code{calc_covar()} calculates the covariance matrix of the
#'   columns of a \emph{time series} or a \emph{matrix} of data using
#'   \code{RcppArmadillo} \code{C++} code.
#'   The covariance is a measure of the codependency of the data.
#'
#'   If \code{method = "moment"} (the default) then \code{calc_covar()}
#'   calculates the covariance as the second co-moment:
#'   \deqn{
#'     \sigma_{xy} = \frac{1}{n-1} \sum_{i=1}^n (x_i - \bar{x}) (y_i - \bar{y})
#'   }
#'   Then \code{calc_covar()} performs the same calculation as the \code{R}
#'   function \code{stats::cov()}.
#'
#'   If \code{method = "quantile"} then it calculates the covariance as the
#'   difference between the quantiles as follows:
#'   \deqn{
#'     \mu = q_{\alpha} - q_{1-\alpha}
#'   }
#'   Where \eqn{\alpha} is the confidence level for calculating the quantiles.
#'   
#'   If \code{method = "nonparametric"} then it calculates the covariance as the
#'   Median Absolute Deviation (\emph{MAD}):
#'   \deqn{
#'     MAD = median(abs(x - median(x)))
#'   }
#'   It also multiplies the \emph{MAD} by a factor of \code{1.4826}, to make it
#'   comparable to the standard deviation.
#'
#'   If \code{method = "nonparametric"} then \code{calc_covar()} performs the
#'   same calculation as the function \code{stats::mad()}, but it's much faster
#'   because it uses \code{RcppArmadillo} \code{C++} code.
#'
#'   If the number of rows of \code{tseries} is less than \code{3} then it
#'   returns zeros.
#'   
#' @examples
#' \dontrun{
#' # Calculate VTI and XLF returns
#' retp <- na.omit(rutils::etfenv$returns[, c("VTI", "XLF")])
#' # Compare HighFreq::calc_covar() with standard var()
#' all.equal(drop(HighFreq::calc_covar(retp)), 
#'   cov(retp), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with matrixStats and with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_covar(retp),
#'   Rcode=cov(retp),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' # Compare HighFreq::calc_covar() with stats::mad()
#' all.equal(drop(HighFreq::calc_covar(retp, method="nonparametric")), 
#'   sapply(retp, mad), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with stats::mad()
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_covar(retp, method="nonparametric"),
#'   Rcode=sapply(retp, mad),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
calc_covar <- function(tseries, method = "moment", confl = 0.75) {
    .Call(`_HighFreq_calc_covar`, tseries, method, confl)
}

#' Calculate the variance of returns aggregated over the end points. 
#'
#' @param \code{pricev} A \emph{time series} or a \emph{matrix} of prices.
#'
#' @param \code{step} The number of time periods in each interval between
#'   neighboring end points (the default is \code{step = 1}).
#' 
#' @return The variance of aggregated returns.
#'
#' @details
#'   The function \code{calc_var_ag()} calculates the variance of returns
#'   aggregated over the end points.
#'
#'   It first calculates the end points spaced apart by the number of periods
#'   equal to the argument \code{step}.  Then it calculates the aggregated
#'   returns by differencing the prices \code{pricev} calculated at the end
#'   points. Finally it calculates the variance of the returns.
#'
#'   The choice of the first end point is arbitrary, so \code{calc_var_ag()}
#'   calculates the different end points for all the possible starting points.
#'   It then calculates the variance values for all the different end points
#'   and averages them.
#'
#'   The aggregated volatility \eqn{\sigma_t} increases with the length of the
#'   aggregation interval \eqn{\Delta t}.
#'   The aggregated volatility increases as the length of the aggregation
#'   interval \eqn{\Delta t} raised to the power of the \emph{Hurst exponent}
#'   \eqn{H}:
#'     \deqn{
#'       \sigma_t = \sigma {\Delta t}^H
#'     }
#'   Where \eqn{\sigma} is the daily return volatility.
#' 
#'   The function \code{calc_var_ag()} can therefore be used to calculate the
#'   \emph{Hurst exponent} from the variance ratio.
#'
#' @examples
#' \dontrun{
#' # Calculate the prices
#' closep <- na.omit(rutils::etfenv$prices[, c("XLP", "VTI")])
#' closep <- log(closep)
#' # Calculate the variance of daily returns
#' calc_var_ag(prices, step=1)
#' # Calculate the variance using R
#' sapply(rutils::diffit(closep), var)
#' # Calculate the variance of returns aggregated over 21 days
#' calc_var_ag(prices, step=21)
#' # The variance over 21 days is approximately 21 times the daily variance
#' 21*calc_var_ag(prices, step=1)
#' }
#' 
#' @export
calc_var_ag <- function(pricev, step = 1L) {
    .Call(`_HighFreq_calc_var_ag`, pricev, step)
}

#' Calculate the variance of returns from \emph{OHLC} prices using different
#' price range estimators.
#'
#' @param \code{ohlc} A \emph{time series} or a \emph{matrix} of \emph{OHLC}
#'   prices.
#'   
#' @param \code{method} A \emph{character string} representing the price range
#'   estimator for calculating the variance.  The estimators include:
#'   \itemize{
#'     \item "close" close-to-close estimator,
#'     \item "rogers_satchell" Rogers-Satchell estimator,
#'     \item "garman_klass" Garman-Klass estimator,
#'     \item "garman_klass_yz" Garman-Klass with account for close-to-open price jumps,
#'     \item "yang_zhang" Yang-Zhang estimator,
#'    }
#'    (The default is the \code{method = "yang_zhang"}.)
#'    
#' @param \code{closel} A \emph{vector} with the lagged \emph{close} prices
#'   of the \emph{OHLC time series}.  This is an optional argument. (The
#'   default is \code{closel = 0}).
#'   
#' @param \code{scale} \emph{Boolean} argument: Should the returns be divided
#'   by the time index, the number of seconds in each period? (The default is
#'   \code{scale = TRUE}).
#'
#' @param \code{index} A \emph{vector} with the time index of the \emph{time
#'   series}.  This is an optional argument (the default is \code{index = 0}).
#'   
#' @return A single \emph{numeric} value equal to the variance of the
#'   \emph{OHLC time series}.
#'
#' @details
#'   The function \code{calc_var_ohlc()} calculates the variance from all the
#'   different intra-day and day-over-day returns (defined as the differences
#'   of \emph{OHLC} prices), using several different variance estimation
#'   methods.
#'
#'   The function \code{calc_var_ohlc()} does not calculate the logarithm of
#'   the prices.
#'   So if the argument \code{ohlc} contains dollar prices then
#'   \code{calc_var_ohlc()} calculates the dollar variance.
#'   If the argument \code{ohlc} contains the log prices then
#'   \code{calc_var_ohlc()} calculates the percentage variance.
#'
#'   The default \code{method} is \emph{"yang_zhang"}, which theoretically
#'   has the lowest standard error among unbiased estimators.
#'   The methods \emph{"close"}, \emph{"garman_klass_yz"}, and
#'   \emph{"yang_zhang"} do account for \emph{close-to-open} price jumps, while
#'   the methods \emph{"garman_klass"} and \emph{"rogers_satchell"} do not
#'   account for \emph{close-to-open} price jumps.
#'
#'   If \code{scale} is \code{TRUE} (the default), then the returns are
#'   divided by the differences of the time index (which scales the variance to
#'   the units of variance per second squared). This is useful when calculating
#'   the variance from minutes bar data, because dividing returns by the
#'   number of seconds decreases the effect of overnight price jumps. If the
#'   time index is in days, then the variance is equal to the variance per day
#'   squared.
#'   
#'   If the number of rows of \code{ohlc} is less than \code{3} then it
#'   returns zero.
#'   
#'   The optional argument \code{index} is the time index of the \emph{time
#'   series} \code{ohlc}. If the time index is in seconds, then the
#'   differences of the index are equal to the number of seconds in each time
#'   period.  If the time index is in days, then the differences are equal to
#'   the number of days in each time period.
#'   
#'   The optional argument \code{closel} are the lagged \emph{close} prices
#'   of the \emph{OHLC time series}.  Passing in the lagged \emph{close} prices
#'   speeds up the calculation, so it's useful for rolling calculations.
#'   
#'   The function \code{calc_var_ohlc()} is implemented in \code{RcppArmadillo}
#'   \code{C++} code, and it's over \code{10} times faster than
#'   \code{calc_var_ohlc_r()}, which is implemented in \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Extract the log OHLC prices of SPY
#' ohlc <- log(HighFreq::SPY)
#' # Extract the time index of SPY prices
#' indeks <- c(1, diff(xts::.index(ohlc)))
#' # Calculate the variance of SPY returns, with scaling of the returns
#' HighFreq::calc_var_ohlc(ohlc, 
#'  method="yang_zhang", scale=TRUE, index=indeks)
#' # Calculate variance without accounting for overnight jumps
#' HighFreq::calc_var_ohlc(ohlc, 
#'  method="rogers_satchell", scale=TRUE, index=indeks)
#' # Calculate the variance without scaling the returns
#' HighFreq::calc_var_ohlc(ohlc, scale=FALSE)
#' # Calculate the variance by passing in the lagged close prices
#' closel <- HighFreq::lagit(ohlc[, 4])
#' all.equal(HighFreq::calc_var_ohlc(ohlc), 
#'   HighFreq::calc_var_ohlc(ohlc, closel=closel))
#' # Compare with HighFreq::calc_var_ohlc_r()
#' all.equal(HighFreq::calc_var_ohlc(ohlc, index=indeks), 
#'   HighFreq::calc_var_ohlc_r(ohlc))
#' # Compare the speed of Rcpp with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_var_ohlc(ohlc),
#'   Rcode=HighFreq::calc_var_ohlc_r(ohlc),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' @export
calc_var_ohlc <- function(ohlc, method = "yang_zhang", closel = 0L, scale = TRUE, index = 0L) {
    .Call(`_HighFreq_calc_var_ohlc`, ohlc, method, closel, scale, index)
}

#' Calculate the variance of aggregated \emph{OHLC} prices using different
#' price range estimators.
#'
#' @param \code{ohlc} A \emph{time series} or a \emph{matrix} of \emph{OHLC}
#'   prices.
#'
#' @param \code{step} The number of time periods in each interval between
#'   neighboring end points.
#' 
#' @param \code{method} A \emph{character string} representing the price range
#'   estimator for calculating the variance.  The estimators include:
#'   \itemize{
#'     \item "close" close-to-close estimator,
#'     \item "rogers_satchell" Rogers-Satchell estimator,
#'     \item "garman_klass" Garman-Klass estimator,
#'     \item "garman_klass_yz" Garman-Klass with account for close-to-open price jumps,
#'     \item "yang_zhang" Yang-Zhang estimator,
#'    }
#'    (The default is the \code{method = "yang_zhang"}.)
#'    
#' @param \code{closel} A \emph{vector} with the lagged \emph{close} prices
#'   of the \emph{OHLC time series}.  This is an optional argument. (The
#'   default is \code{closel = 0}).
#'   
#' @param \code{scale} \emph{Boolean} argument: Should the returns be divided
#'   by the time index, the number of seconds in each period? (The default is
#'   \code{scale = TRUE}).
#'
#' @param \code{index} A \emph{vector} with the time index of the \emph{time
#'   series}.  This is an optional argument (the default is \code{index = 0}).
#'   
#' @return The variance of aggregated \emph{OHLC} prices.
#'
#' @details
#'   The function \code{calc_var_ohlc_ag()} calculates the variance of
#'   \emph{OHLC} prices aggregated over the end points.
#'
#'   It first calculates the end points spaced apart by the number of periods
#'   equal to the argument \code{step}.  Then it aggregates the \emph{OHLC}
#'   prices to the end points. Finally it calculates the variance of the
#'   aggregated \emph{OHLC} prices.
#'
#'   The choice of the first end point is arbitrary, so \code{calc_var_ohlc_ag()}
#'   calculates the different end points for all the possible starting points.
#'   It then calculates the variance values for all the different end points
#'   and averages them.
#'
#'   The aggregated volatility \eqn{\sigma_t} increases with the length of the
#'   aggregation interval \eqn{\Delta t}.
#'   The aggregated volatility increases as the length of the aggregation
#'   interval \eqn{\Delta t} raised to the power of the \emph{Hurst exponent}
#'   \eqn{H}:
#'     \deqn{
#'       \sigma_t = \sigma {\Delta t}^H
#'     }
#'   Where \eqn{\sigma} is the daily return volatility.
#' 
#'   The function \code{calc_var_ohlc_ag()} can therefore be used to calculate
#'   the \emph{Hurst exponent} from the variance ratio.
#'
#' @examples
#' \dontrun{
#' # Calculate the log ohlc prices
#' ohlc <- log(rutils::etfenv$VTI)
#' # Calculate the daily variance of percentage returns
#' calc_var_ohlc_ag(ohlc, step=1)
#' # Calculate the variance of returns aggregated over 21 days
#' calc_var_ohlc_ag(ohlc, step=21)
#' # The variance over 21 days is approximately 21 times the daily variance
#' 21*calc_var_ohlc_ag(ohlc, step=1)
#' }
#' 
#' @export
calc_var_ohlc_ag <- function(ohlc, step, method = "yang_zhang", closel = 0L, scale = TRUE, index = 0L) {
    .Call(`_HighFreq_calc_var_ohlc_ag`, ohlc, step, method, closel, scale, index)
}

#' Calculate the skewness of the columns of a \emph{time series} or a
#' \emph{matrix} using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{method} A \emph{character string} specifying the type of the
#'   skewness model (the default is \code{method = "moment"} - see Details).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#'   returns (the default is \code{confl = 0.75}).
#'
#' @return A single-row matrix with the skewness of the columns of
#'   \code{tseries}.
#'
#' @details
#'   The function \code{calc_skew()} calculates the skewness of the columns of
#'   a \emph{time series} or a \emph{matrix} of data using \code{C++}
#'   \code{RcppArmadillo} code.
#'
#'   If \code{method = "moment"} (the default) then \code{calc_skew()}
#'   calculates the skewness as the third moment of the data.
#'
#'   If \code{method = "quantile"} then it calculates the skewness
#'   \eqn{\varsigma} from the differences between the quantiles of the data as
#'   follows:
#'   \deqn{
#'     \varsigma = \frac{q_{\alpha} + q_{1-\alpha} - 2 q_{0.5}}{q_{\alpha} - q_{1-\alpha}}
#'   }
#'   Where \eqn{\alpha} is the confidence level for calculating the quantiles.
#'
#'   If \code{method = "nonparametric"} then it calculates the skewness as the
#'   difference between the mean of the data minus its median, divided by the
#'   standard deviation.
#'   
#'   If the number of rows of \code{tseries} is less than \code{3} then it
#'   returns zeros.
#'   
#'   The code examples below compare the function \code{calc_skew()} with the
#'   skewness calculated using \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Define a single-column time series of returns
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Calculate the moment skewness
#' HighFreq::calc_skew(retp)
#' # Calculate the moment skewness in R
#' calc_skewr <- function(x) {
#'   x <- (x-mean(x))
#'   sum(x^3)/var(x)^1.5/NROW(x)
#' }  # end calc_skewr
#' all.equal(HighFreq::calc_skew(retp), 
#'   calc_skewr(retp), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_skew(retp),
#'   Rcode=calc_skewr(retp),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' # Calculate the quantile skewness
#' HighFreq::calc_skew(retp, method="quantile", confl=0.9)
#' # Calculate the quantile skewness in R
#' calc_skewq <- function(x, a = 0.75) {
#'   	quantiles <- quantile(x, c(1-a, 0.5, a), type=5)
#'   	(quantiles[3] + quantiles[1] - 2*quantiles[2])/(quantiles[3] - quantiles[1])
#' }  # end calc_skewq
#' all.equal(drop(HighFreq::calc_skew(retp, method="quantile", confl=0.9)), 
#'   calc_skewq(retp, a=0.9), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_skew(retp, method="quantile"),
#'   Rcode=calc_skewq(retp),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' # Calculate the nonparametric skewness
#' HighFreq::calc_skew(retp, method="nonparametric")
#' # Compare HighFreq::calc_skew() with R nonparametric skewness
#' all.equal(drop(HighFreq::calc_skew(retp, method="nonparametric")), 
#'   (mean(retp)-median(retp))/sd(retp), 
#'   check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_skew(retp, method="nonparametric"),
#'   Rcode=(mean(retp)-median(retp))/sd(retp),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
calc_skew <- function(tseries, method = "moment", confl = 0.75) {
    .Call(`_HighFreq_calc_skew`, tseries, method, confl)
}

#' Calculate the kurtosis of the columns of a \emph{time series} or a
#' \emph{matrix} using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{method} A \emph{character string} specifying the type of the
#'   kurtosis model (the default is \code{method = "moment"} - see Details).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#'   returns (the default is \code{confl = 0.75}).
#'
#' @return A single-row matrix with the kurtosis of the columns of
#'   \code{tseries}.
#'
#' @details
#'   The function \code{calc_kurtosis()} calculates the kurtosis of the columns
#'   of the \emph{matrix} \code{tseries} using \code{RcppArmadillo} \code{C++}
#'   code.
#'
#'   If \code{method = "moment"} (the default) then \code{calc_kurtosis()}
#'   calculates the fourth moment of the data.
#'   But it doesn't center the columns of \code{tseries} because that requires
#'   copying the matrix \code{tseries} in memory, so it's time-consuming.
#'
#'   If \code{method = "quantile"} then it calculates the skewness
#'   \eqn{\kappa} from the differences between the quantiles of the data as
#'   follows:
#'   \deqn{
#'     \kappa = \frac{q_{\alpha} - q_{1-\alpha}}{q_{0.75} - q_{0.25}}
#'   }
#'   Where \eqn{\alpha} is the confidence level for calculating the quantiles.
#'
#'   If \code{method = "nonparametric"} then it calculates the kurtosis as the
#'   difference between the mean of the data minus its median, divided by the
#'   standard deviation.
#'   
#'   If the number of rows of \code{tseries} is less than \code{3} then it
#'   returns zeros.
#'   
#'   The code examples below compare the function \code{calc_kurtosis()} with the
#'   kurtosis calculated using \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Define a single-column time series of returns
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Calculate the moment kurtosis
#' HighFreq::calc_kurtosis(retp)
#' # Calculate the moment kurtosis in R
#' calc_kurtr <- function(x) {
#'   x <- (x-mean(x))
#'   sum(x^4)/var(x)^2/NROW(x)
#' }  # end calc_kurtr
#' all.equal(HighFreq::calc_kurtosis(retp), 
#'   calc_kurtr(retp), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_kurtosis(retp),
#'   Rcode=calc_kurtr(retp),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' # Calculate the quantile kurtosis
#' HighFreq::calc_kurtosis(retp, method="quantile", confl=0.9)
#' # Calculate the quantile kurtosis in R
#' calc_kurtq <- function(x, a=0.9) {
#'   	quantiles <- quantile(x, c(1-a, 0.25, 0.75, a), type=5)
#'   	(quantiles[4] - quantiles[1])/(quantiles[3] - quantiles[2])
#' }  # end calc_kurtq
#' all.equal(drop(HighFreq::calc_kurtosis(retp, method="quantile", confl=0.9)), 
#'   calc_kurtq(retp, a=0.9), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_kurtosis(retp, method="quantile"),
#'   Rcode=calc_kurtq(retp),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' # Calculate the nonparametric kurtosis
#' HighFreq::calc_kurtosis(retp, method="nonparametric")
#' # Compare HighFreq::calc_kurtosis() with R nonparametric kurtosis
#' all.equal(drop(HighFreq::calc_kurtosis(retp, method="nonparametric")), 
#'   (mean(retp)-median(retp))/sd(retp), 
#'   check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_kurtosis(retp, method="nonparametric"),
#'   Rcode=(mean(retp)-median(retp))/sd(retp),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
calc_kurtosis <- function(tseries, method = "moment", confl = 0.75) {
    .Call(`_HighFreq_calc_kurtosis`, tseries, method, confl)
}

#' Calculate the Hurst exponent from the volatility ratio of aggregated returns.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of log prices.
#'
#' @param \code{aggv} A \emph{vector} of aggregation intervals.
#' 
#' @return The Hurst exponent calculated from the volatility ratio of
#'   aggregated returns.  If \code{tseries} contains multiple columns, then the
#'   function \code{calc_hurst()} returns a single-row matrix of Hurst
#'   exponents.
#'
#' @details
#'   The function \code{calc_hurst()} calculates the Hurst exponent from the
#'   ratios of the volatilities of aggregated returns.
#'   
#'   An aggregation interval is equal to the number of time periods between the
#'   neighboring aggregation end points.
#'
#'   The aggregated volatility \eqn{\sigma_t} increases with the length of the
#'   aggregation interval \eqn{\Delta t}.
#'   The aggregated volatility increases as the length of the aggregation
#'   interval \eqn{\Delta t} raised to the power of the \emph{Hurst exponent}
#'   \eqn{H}:
#'     \deqn{
#'       \sigma_t = \sigma {\Delta t}^H
#'     }
#'   Where \eqn{\sigma} is the daily return volatility.
#' 
#'   For a single aggregation interval \eqn{\Delta t}, the \emph{Hurst
#'   exponent} \eqn{H} is equal to the logarithm of the ratio of the
#'   volatilities divided by the logarithm of the aggregation interval
#'   \eqn{\Delta t}:
#'     \deqn{
#'       H = \frac{\log{\sigma_t} - \log{\sigma}}{\log{\Delta t}}
#'     }
#' 
#'   For a \emph{vector} of aggregation intervals \eqn{\Delta t_i}, the
#'   \emph{Hurst exponent} \eqn{H} is equal to the regression slope between the
#'   logarithms of the aggregated volatilities \eqn{\sigma_i} versus the
#'   logarithms of the aggregation intervals \eqn{\Delta t_i}:
#'     \deqn{
#'       H = \frac{\code{cov}(\log{\sigma_i}, \log{\Delta t_i})}{\code{var}(\log{\Delta t_i})}
#'     }
#' 
#'   The function \code{calc_hurst()} calls the function \code{calc_var_ag()}
#'   to calculate the variance of aggregated returns \eqn{\sigma^2_t}.
#' 
#' @examples
#' \dontrun{
#' # Calculate the log prices
#' closep <- na.omit(rutils::etfenv$prices[, c("XLP", "VTI")])
#' closep <- log(closep)
#' # Calculate the Hurst exponents for a 21 day aggregation interval
#' HighFreq::calc_hurst(prices, aggv=21)
#' # Calculate the Hurst exponents for a vector of aggregation intervals
#' aggv <- seq.int(from=3, to=35, length.out=9)^2
#' HighFreq::calc_hurst(prices, aggv=aggv)
#' }
#' 
#' @export
calc_hurst <- function(tseries, aggv) {
    .Call(`_HighFreq_calc_hurst`, tseries, aggv)
}

#' Calculate the Hurst exponent from the volatility ratio of aggregated
#' \emph{OHLC} prices.
#'
#' @param \code{ohlc} A \emph{time series} or a \emph{matrix} of \emph{OHLC}
#'   prices.
#'
#' @param \code{step} The number of time periods in each interval between
#'   neighboring end points.
#' 
#' @param \code{method} A \emph{character string} representing the price range
#'   estimator for calculating the variance.  The estimators include:
#'   \itemize{
#'     \item "close" close-to-close estimator,
#'     \item "rogers_satchell" Rogers-Satchell estimator,
#'     \item "garman_klass" Garman-Klass estimator,
#'     \item "garman_klass_yz" Garman-Klass with account for close-to-open price jumps,
#'     \item "yang_zhang" Yang-Zhang estimator,
#'    }
#'    (The default is the \code{method = "yang_zhang"}.)
#'    
#' @param \code{closel} A \emph{vector} with the lagged \emph{close} prices
#'   of the \emph{OHLC time series}.  This is an optional argument. (The
#'   default is \code{closel = 0}).
#'   
#' @param \code{scale} \emph{Boolean} argument: Should the returns be divided
#'   by the time index, the number of seconds in each period? (The default is
#'   \code{scale = TRUE}).
#'
#' @param \code{index} A \emph{vector} with the time index of the \emph{time
#'   series}.  This is an optional argument (the default is \code{index = 0}).
#'   
#' @return The Hurst exponent calculated from the volatility ratio of
#'   aggregated \emph{OHLC} prices.
#'
#' @details
#' The function \code{calc_hurst_ohlc()} calculates the Hurst exponent from the
#' ratios of the variances of aggregated \emph{OHLC} prices.
#'
#'   The aggregated volatility \eqn{\sigma_t} increases with the length of the
#'   aggregation interval \eqn{\Delta t}.
#'   The aggregated volatility increases as the length of the aggregation
#'   interval \eqn{\Delta t} raised to the power of the \emph{Hurst exponent}
#'   \eqn{H}:
#'     \deqn{
#'       \sigma_t = \sigma {\Delta t}^H
#'     }
#'   Where \eqn{\sigma} is the daily return volatility.
#' 
#'   The \emph{Hurst exponent} \eqn{H} is equal to the logarithm of the ratio
#'   of the volatilities divided by the logarithm of the time interval
#'   \eqn{\Delta t}:
#'     \deqn{
#'       H = \frac{\log{\sigma_t} - \log{\sigma}}{\log{\Delta t}}
#'     }
#' 
#'   The function \code{calc_hurst_ohlc()} calls the function
#'   \code{calc_var_ohlc_ag()} to calculate the aggregated variance
#'   \eqn{\sigma^2_t}.
#' 
#' @examples
#' \dontrun{
#' # Calculate the log ohlc prices
#' ohlc <- log(rutils::etfenv$VTI)
#' # Calculate the Hurst exponent from 21 day aggregations
#' calc_hurst_ohlc(ohlc, step=21)
#' }
#' 
#' @export
calc_hurst_ohlc <- function(ohlc, step, method = "yang_zhang", closel = 0L, scale = TRUE, index = 0L) {
    .Call(`_HighFreq_calc_hurst_ohlc`, ohlc, step, method, closel, scale, index)
}

#' Perform multivariate linear regression using least squares and return a
#' named list of regression coefficients, their t-values, and p-values.
#' 
#' @param \code{respv} A single-column \emph{time series} or a \emph{vector}
#'   of response data.
#' 
#' @param \code{predm} A \emph{time series} or a \emph{matrix} of predictor
#'   data.
#' 
#' @return A named list with three elements: a \emph{matrix} of coefficients
#'   (named \emph{"coefficients"}), the \emph{z-score} of the last residual
#'   (named \emph{"zscore"}), and a \emph{vector} with the R-squared and
#'   F-statistic (named \emph{"stats"}). The numeric \emph{matrix} of
#'   coefficients named \emph{"coefficients"} contains the alpha and beta
#'   coefficients, and their \emph{t-values} and \emph{p-values}.
#'
#' @details
#'   The function \code{calc_lm()} performs the same calculations as the
#'   function \code{lm()} from package \emph{stats}. 
#'   It uses \code{RcppArmadillo} \code{C++} code so it's several times faster
#'   than \code{lm()}. The code was inspired by this article (but it's not
#'   identical to it):
#'   http://gallery.rcpp.org/articles/fast-linear-model-with-armadillo/
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("XLF", "VTI", "IEF")])
#' # Response equals XLF returns
#' respv <- retp[, 1]
#' # Predictor matrix equals VTI and IEF returns
#' predm <- retp[, -1]
#' # Perform multivariate regression using lm()
#' regmod <- lm(respv ~ predm)
#' regsum <- summary(regmod)
#' # Add unit intercept column to the predictor matrix
#' predm <- cbind(rep(1, NROW(predm)), predm)
#' # Perform multivariate regression using calc_lm()
#' regarma <- HighFreq::calc_lm(respv=respv, predm=predm)
#' # Compare the outputs of both functions
#' all.equal(regarma$coefficients[, "coeff"], unname(coef(regmod)))
#' all.equal(unname(regarma$coefficients), unname(regsum$coefficients))
#' all.equal(unname(regarma$stats), c(regsum$r.squared, unname(regsum$fstatistic[1])))
#' # Compare the speed of RcppArmadillo with R code
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_lm(respv=respv, predm=predm),
#'   Rcode=lm(respv ~ predm),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
calc_lm <- function(respv, predm) {
    .Call(`_HighFreq_calc_lm`, respv, predm)
}

#' Perform multivariate regression using different methods, and return a vector
#' of regression coefficients, their t-values, and the last residual z-score.
#' 
#' @param \code{respv} A single-column \emph{time series} or a \emph{vector}
#'   of response data.
#' 
#' @param \code{predm} A \emph{time series} or a \emph{matrix} of predictor
#'   data.
#' 
#' @param \code{controlv} A \emph{list} of model parameters (see Details).
#'
#' @return A single-row matrix with the regression coefficients, their
#'   t-values, and the last residual z-score.
#'
#' @details
#'   The function \code{calc_reg()} performs multivariate regression using
#'   different methods, and returns a vector of regression coefficients, their
#'   t-values, and the last residual z-score.
#'   
#'   The function \code{calc_reg()} accepts a list of regression model
#'   parameters through the argument \code{controlv}.
#'   The list of model parameters can be created using the function
#'   \code{param_reg()}.  Below is a description of the model parameters.
#'
#'   If \code{regmod = "least_squares"} (the default) then it performs the
#'   standard least squares regression, the same as the function
#'   \code{calc_lm()}, and the function \code{lm()} from the \code{R} package
#'   \emph{stats}.
#'   But it uses \code{RcppArmadillo} \code{C++} code so it's several times
#'   faster than \code{lm()}.
#'
#'   If \code{regmod = "regular"} then it performs shrinkage regression.  It
#'   calculates the \emph{reduced inverse} of the predictor matrix from its
#'   singular value decomposition.  It performs regularization by selecting
#'   only the largest \emph{singular values} equal in number to \code{dimax}.
#'   
#'   If \code{regmod = "quantile"} then it performs quantile regression (not
#'   implemented yet).
#' 
#'   The length of the return vector depends on the number of columns of the
#'   predictor matrix (including the intercept column, if it's been added in
#'   \code{R}).
#'   The number of regression coefficients is equal to the number of columns of
#'   the predictor matrix.
#'   The length of the return vector is equal to the number of regression
#'   coefficients, plus their t-values, plus the z-score.
#'   The number of t-values is equal to the number of coefficients.
#' 
#'   For example, if the number of columns of the predictor matrix is equal to
#'   \code{n}, then \code{calc_reg()} returns a vector with \code{2n+1}
#'   elements: \code{n} regression coefficients, \code{n} corresponding
#'   t-values, and \code{1} z-score value.
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("XLF", "VTI", "IEF")])
#' # Response equals XLF returns
#' respv <- retp[, 1]
#' # Predictor matrix equals VTI and IEF returns
#' predm <- retp[, -1]
#' # Perform multivariate regression using lm()
#' regmod <- lm(respv ~ predm)
#' regsum <- summary(regmod)
#' coeff <- regsum$coefficients
#' # Create a default list of regression parameters
#' controlv <- HighFreq::param_reg()
#' # Add unit intercept column to the predictor matrix
#' predm <- cbind(rep(1, NROW(predm)), predm)
#' # Perform multivariate regression using calc_reg()
#' regarma <- drop(HighFreq::calc_reg(respv=respv, predm=predm, controlv=controlv))
#' # Compare the outputs of both functions
#' all.equal(regarma[1:(2*NCOL(predm))], 
#'   c(coeff[, "Estimate"], coeff[, "t value"]), check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::calc_reg(respv=respv, predm=predm, controlv=controlv),
#'   Rcode=lm(respv ~ predm),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
calc_reg <- function(respv, predm, controlv) {
    .Call(`_HighFreq_calc_reg`, respv, predm, controlv)
}

#' Calculate a \emph{matrix} of mean (location) estimates over a rolling
#' look-back interval attached at the end points of a \emph{time series} or a
#' \emph{matrix}.
#'
#' @param \code{lookb} The number of end points in the look-back interval
#'   (the default is \code{lookb = 1}).
#'   
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{startp} An \emph{integer} vector of start points (the default
#'   is \code{startp = 0}).
#' 
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#'   \code{endd = 0}).
#' 
#' @param \code{step} The number of time periods between the end points (the
#'   default is \code{step = 1}).
#'
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#'   calculating the end points (the default is \code{stub = 0}).
#' 
#' @param \code{method} A \emph{character string} representing the type of mean 
#'   measure of (the default is \code{method = "moment"}).
#'
#' @return A \emph{matrix} of mean (location) estimates with the same number of
#'   columns as the input time series \code{tseries}, and the number of rows
#'   equal to the number of end points.
#'   
#' @details
#'   The function \code{roll_mean()} calculates a \emph{matrix} of mean
#'   (location) estimates over rolling look-back intervals attached at the end
#'   points of the \emph{time series} \code{tseries}.
#'   
#'   The function \code{roll_mean()} performs a loop over the end points, and at
#'   each end point it subsets the time series \code{tseries} over a look-back
#'   interval equal to \code{lookb} number of end points.
#'   
#'   It passes the subset time series to the function \code{calc_mean()}, which
#'   calculates the mean (location).
#'   See the function \code{calc_mean()} for a description of the mean methods.
#'   
#'   If the arguments \code{endd} and \code{startp} are not given then it
#'   first calculates a vector of end points separated by \code{step} time
#'   periods. It calculates the end points along the rows of \code{tseries}
#'   using the function \code{calc_endpoints()}, with the number of time
#'   periods between the end points equal to \code{step} time periods.
#' 
#'   For example, the rolling mean at \code{25} day end points, with a
#'   \code{75} day look-back, can be calculated using the parameters
#'   \code{step = 25} and \code{lookb = 3}.
#'
#'   The function \code{roll_mean()} with the parameter \code{step = 1}
#'   performs the same calculation as the function \code{roll_mean()} from
#'   package
#'   \href{https://cran.r-project.org/web/packages/RcppRoll/index.html}{RcppRoll},
#'   but it's several times faster because it uses \code{C++}
#'   \code{RcppArmadillo} code.
#'
#'   The function \code{roll_mean()} is implemented in \code{RcppArmadillo}
#'   \code{RcppArmadillo} \code{C++} code, which makes it several times faster
#'   than \code{R} code.
#'
#'   If only a simple rolling mean is required (not the median) then other
#'   functions like \code{roll_sum()} or \code{roll_vec()} may be even faster.
#'
#' @examples
#' \dontrun{
#' # Define time series of returns using package rutils
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Calculate the rolling means at 25 day end points, with a 75 day look-back
#' meanv <- HighFreq::roll_mean(retp, lookb=3, step=25)
#' # Compare the mean estimates over 11-period look-back intervals
#' all.equal(HighFreq::roll_mean(retp, lookb=11)[-(1:10), ], 
#'   drop(RcppRoll::roll_mean(retp, n=11)), check.attributes=FALSE)
#' # Define end points and start points
#' endd <- HighFreq::calc_endpoints(NROW(retp), step=25)
#' startp <- HighFreq::calc_startpoints(endd, lookb=3)
#' # Calculate the rolling means using RcppArmadillo
#' meanv <- HighFreq::roll_mean(retp, startp=startp, endd=endd)
#' # Calculate the rolling medians using RcppArmadillo
#' medianscpp <- HighFreq::roll_mean(retp, startp=startp, endd=endd, method="nonparametric")
#' # Calculate the rolling medians using R
#' medians = sapply(1:NROW(endd), function(i) {
#'   median(retp[startp[i]:endd[i] + 1])
#' })  # end sapply
#' all.equal(medians, drop(medianscpp))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::roll_mean(retp, startp=startp, endd=endd, method="nonparametric"),
#'   Rcode=sapply(1:NROW(endd), function(i) {median(retp[startp[i]:endd[i] + 1])}),
#'   times=10))[, c(1, 4, 5)]
#' }
#' @export
roll_mean <- function(tseries, lookb = 1L, startp = 0L, endd = 0L, step = 1L, stub = 0L, method = "moment", confl = 0.75) {
    .Call(`_HighFreq_roll_mean`, tseries, lookb, startp, endd, step, stub, method, confl)
}

#' Calculate a \emph{vector} of variance estimates over a rolling look-back
#' interval for a single-column \emph{time series} or a single-column
#' \emph{matrix}, using \code{RcppArmadillo}.
#'
#' @param \code{tseries} A single-column \emph{time series} or a single-column
#'   \emph{matrix}.
#' 
#' @param \code{lookb} The length of the look-back interval, equal to the
#'   number of \emph{vector} elements used for calculating a single variance
#'   estimate (the default is \code{lookb = 1}).
#'
#' @return A single-column \emph{matrix} with the same number of elements as
#'   the input argument \code{tseries}.
#'
#' @details
#'   The function \code{roll_varvec()} calculates a \emph{vector} of variance
#'   estimates over a rolling look-back interval for a single-column \emph{time
#'   series} or a single-column \emph{matrix}, using \code{RcppArmadillo} \code{C++}
#'   code.
#'   
#'   The function \code{roll_varvec()} uses an expanding look-back interval in
#'   the initial warmup period, to calculate the same number of elements as the
#'   input argument \code{tseries}.
#'
#'   The function \code{roll_varvec()} performs the same calculation as the
#'   function \code{roll_var()} from package
#'   \href{https://cran.r-project.org/web/packages/RcppRoll/index.html}{RcppRoll},
#'   but it's several times faster because it uses \code{RcppArmadillo} \code{C++}
#'   code.
#'
#' @examples
#' \dontrun{
#' # Create a vector of random returns
#' retp <- rnorm(1e6)
#' # Compare the variance estimates over 11-period look-back intervals
#' all.equal(drop(HighFreq::roll_varvec(retp, lookb=11))[-(1:10)], 
#'   RcppRoll::roll_var(retp, n=11))
#' # Compare the speed of RcppArmadillo with RcppRoll
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::roll_varvec(retp, lookb=11),
#'   RcppRoll=RcppRoll::roll_var(retp, n=11),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' @export
roll_varvec <- function(tseries, lookb = 1L) {
    .Call(`_HighFreq_roll_varvec`, tseries, lookb)
}

#' Calculate a \emph{matrix} of dispersion (variance) estimates over a rolling
#' look-back interval attached at the end points of a \emph{time series} or a
#' \emph{matrix}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'
#' @param \code{lookb} The number of end points in the look-back interval
#'   (the default is \code{lookb = 1}).
#'   
#' @param \code{startp} An \emph{integer} vector of start points (the default
#'   is \code{startp = 0}).
#' 
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#'   \code{endd = 0}).
#' 
#' @param \code{step} The number of time periods between the end points (the
#'   default is \code{step = 1}).
#'
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#'   calculating the end points (the default is \code{stub = 0}).
#' 
#' @param \code{method} A \emph{character string} representing the type of the
#'   measure of dispersion (the default is \code{method = "moment"}).
#'
#' @return A \emph{matrix} dispersion (variance) estimates with the same number
#'   of columns as the input time series \code{tseries}, and the number of rows
#'   equal to the number of end points.
#'   
#' @details
#'   The function \code{roll_var()} calculates a \emph{matrix} of dispersion
#'   (variance) estimates over rolling look-back intervals attached at the end
#'   points of the \emph{time series} \code{tseries}.
#'   
#'   The function \code{roll_var()} performs a loop over the end points, and at
#'   each end point it subsets the time series \code{tseries} over a look-back
#'   interval equal to \code{lookb} number of end points.
#'   
#'   It passes the subset time series to the function \code{calc_var()}, which
#'   calculates the dispersion.
#'   See the function \code{calc_var()} for a description of the dispersion
#'   methods.
#'   
#'   If the arguments \code{endd} and \code{startp} are not given then it
#'   first calculates a vector of end points separated by \code{step} time
#'   periods. It calculates the end points along the rows of \code{tseries}
#'   using the function \code{calc_endpoints()}, with the number of time
#'   periods between the end points equal to \code{step} time periods.
#' 
#'   For example, the rolling variance at \code{25} day end points, with a
#'   \code{75} day look-back, can be calculated using the parameters
#'   \code{step = 25} and \code{lookb = 3}.
#'
#'   The function \code{roll_var()} with the parameter \code{step = 1}
#'   performs the same calculation as the function \code{roll_var()} from
#'   package
#'   \href{https://cran.r-project.org/web/packages/RcppRoll/index.html}{RcppRoll},
#'   but it's several times faster because it uses \code{RcppArmadillo} \code{C++}
#'   code.
#'
#'   The function \code{roll_var()} is implemented in \code{RcppArmadillo}
#'   \code{RcppArmadillo} \code{C++} code, which makes it several times faster
#'   than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Define time series of returns using package rutils
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Calculate the rolling variance at 25 day end points, with a 75 day look-back
#' varv <- HighFreq::roll_var(retp, lookb=3, step=25)
#' # Compare the variance estimates over 11-period look-back intervals
#' all.equal(HighFreq::roll_var(retp, lookb=11)[-(1:10), ], 
#'   drop(RcppRoll::roll_var(retp, n=11)), check.attributes=FALSE)
#' # Compare the speed of HighFreq::roll_var() with RcppRoll::roll_var()
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::roll_var(retp, lookb=11),
#'   RcppRoll=RcppRoll::roll_var(retp, n=11),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' # Compare the speed of HighFreq::roll_var() with TTR::runMAD()
#' summary(microbenchmark(
#'     Rcpp=HighFreq::roll_var(retp, lookb=11, method="quantile"),
#'     TTR=TTR::runMAD(retp, n = 11),
#'     times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' @export
roll_var <- function(tseries, lookb = 1L, startp = 0L, endd = 0L, step = 1L, stub = 0L, method = "moment", confl = 0.75) {
    .Call(`_HighFreq_roll_var`, tseries, lookb, startp, endd, step, stub, method, confl)
}

#' Calculate a \emph{vector} of variance estimates over a rolling look-back
#' interval attached at the end points of a \emph{time series} or a
#' \emph{matrix} with \emph{OHLC} price data.
#' 
#' @param \code{ohlc} A \emph{time series} or a \emph{matrix} with \emph{OHLC}
#'   price data.
#'   
#' @param \code{startp} An \emph{integer} vector of start points (the default
#'   is \code{startp = 0}).
#' 
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#'   \code{endd = 0}).
#' 
#' @param \code{step} The number of time periods between the end points (the
#'   default is \code{step = 1}).
#'
#' @param \code{lookb} The number of end points in the look-back interval
#'   (the default is \code{lookb = 1}).
#'   
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#'   calculating the end points (the default is \code{stub = 0}).
#' 
#' @param \code{method} A \emph{character string} representing the price range
#'   estimator for calculating the variance.  The estimators include:
#'   \itemize{
#'     \item "close" close-to-close estimator,
#'     \item "rogers_satchell" Rogers-Satchell estimator,
#'     \item "garman_klass" Garman-Klass estimator,
#'     \item "garman_klass_yz" Garman-Klass with account for close-to-open price jumps,
#'     \item "yang_zhang" Yang-Zhang estimator,
#'    }
#'    (The default is the \emph{"yang_zhang"} estimator.)
#'    
#' @param \code{scale} \emph{Boolean} argument: Should the returns be divided
#'   by the time index, the number of seconds in each period?  (The default is
#'   \code{scale = TRUE}.)
#'   
#' @param \code{index} A \emph{vector} with the time index of the \emph{time
#'   series}.  This is an optional argument (the default is \code{index=0}).
#'
#' @return A column \emph{vector} of variance estimates, with the number of
#'   rows equal to the number of end points.
#'
#' @details
#'   The function \code{roll_var_ohlc()} calculates a \emph{vector} of variance
#'   estimates over a rolling look-back interval attached at the end points of
#'   the \emph{time series} \code{ohlc}.
#'   
#'   The input \emph{OHLC time series} \code{ohlc} is assumed to contain the
#'   log prices.
#'
#'   The function \code{roll_var_ohlc()} performs a loop over the end points,
#'   subsets the previous (past) rows of \code{ohlc}, and passes them into the
#'   function \code{calc_var_ohlc()}.
#' 
#'   At each end point, the variance is calculated over a look-back interval
#'   equal to \code{lookb} number of end points.
#'   In the initial warmup period, the variance is calculated over an expanding
#'   look-back interval.
#'   
#'   If the arguments \code{endd} and \code{startp} are not given then it
#'   first calculates a vector of end points separated by \code{step} time
#'   periods. It calculates the end points along the rows of \code{ohlc}
#'   using the function \code{calc_endpoints()}, with the number of time
#'   periods between the end points equal to \code{step} time periods.
#' 
#'   For example, the rolling variance at daily end points with an \code{11}
#'   day look-back, can be calculated using the parameters \code{step = 1} and
#'   \code{lookb = 1} (Assuming the \code{ohlc} data has daily
#'   frequency.)
#' 
#'   Similarly, the rolling variance at \code{25} day end points with a
#'   \code{75} day look-back, can be calculated using the parameters
#'   \code{step = 25} and \code{lookb = 3} (because \code{3*25 = 75}).
#' 
#'   The function \code{roll_var_ohlc()} calculates the variance from all the
#'   different intra-day and day-over-day returns (defined as the differences
#'   between \emph{OHLC} prices), using several different variance estimation
#'   methods.
#'   
#'   The default \code{method} is \emph{"yang_zhang"}, which theoretically
#'   has the lowest standard error among unbiased estimators.
#'   The methods \emph{"close"}, \emph{"garman_klass_yz"}, and
#'   \emph{"yang_zhang"} do account for \emph{close-to-open} price jumps, while
#'   the methods \emph{"garman_klass"} and \emph{"rogers_satchell"} do not
#'   account for \emph{close-to-open} price jumps.
#'
#'   If \code{scale} is \code{TRUE} (the default), then the returns are
#'   divided by the differences of the time index (which scales the variance to
#'   the units of variance per second squared.) This is useful when calculating
#'   the variance from minutes bar data, because dividing returns by the
#'   number of seconds decreases the effect of overnight price jumps. If the
#'   time index is in days, then the variance is equal to the variance per day
#'   squared.
#'   
#'   The optional argument \code{index} is the time index of the \emph{time
#'   series} \code{ohlc}. If the time index is in seconds, then the
#'   differences of the index are equal to the number of seconds in each time
#'   period.  If the time index is in days, then the differences are equal to
#'   the number of days in each time period.
#'   
#'   The function \code{roll_var_ohlc()} is implemented in \code{RcppArmadillo}
#'   \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Extract the log OHLC prices of SPY
#' ohlc <- log(HighFreq::SPY)
#' # Extract the time index of SPY prices
#' indeks <- c(1, diff(xts::.index(ohlc)))
#' # Rolling variance at minutes end points, with a 21 minute look-back
#' varoll <- HighFreq::roll_var_ohlc(ohlc, 
#'                               step=1, lookb=21, 
#'                               method="yang_zhang", 
#'                               index=indeks, scale=TRUE)
#' # Daily OHLC prices
#' ohlc <- rutils::etfenv$VTI
#' indeks <- c(1, diff(xts::.index(ohlc)))
#' # Rolling variance at 5 day end points, with a 20 day look-back (20=4*5)
#' varoll <- HighFreq::roll_var_ohlc(ohlc, 
#'                               step=5, lookb=4, 
#'                               method="yang_zhang", 
#'                               index=indeks, scale=TRUE)
#' # Same calculation in R
#' nrows <- NROW(ohlc)
#' closel = HighFreq::lagit(ohlc[, 4])
#' endd <- drop(HighFreq::calc_endpoints(nrows, 3)) + 1
#' startp <- drop(HighFreq::calc_startpoints(endd, 2))
#' npts <- NROW(endd)
#' varollr <- sapply(2:npts, function(it) {
#'   rangev <- startp[it]:endd[it]
#'   sub_ohlc = ohlc[rangev, ]
#'   sub_close = closel[rangev]
#'   sub_index = indeks[rangev]
#'   HighFreq::calc_var_ohlc(sub_ohlc, closel=sub_close, scale=TRUE, index=sub_index)
#' })  # end sapply
#' varollr <- c(0, varollr)
#' all.equal(drop(var_rolling), varollr)
#' }
#' @export
roll_var_ohlc <- function(ohlc, startp = 0L, endd = 0L, step = 1L, lookb = 1L, stub = 0L, method = "yang_zhang", scale = TRUE, index = 0L) {
    .Call(`_HighFreq_roll_var_ohlc`, ohlc, startp, endd, step, lookb, stub, method, scale, index)
}

#' Calculate a \emph{matrix} of skewness estimates over a rolling look-back
#' interval attached at the end points of a \emph{time series} or a
#' \emph{matrix}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'    
#' @param \code{startp} An \emph{integer} vector of start points (the default
#'   is \code{startp = 0}).
#' 
#' @param \code{endd} An \emph{integer} vector of end points (the default is 
#'   \code{endd = 0}).
#' 
#' @param \code{step} The number of time periods between the end points (the
#'   default is \code{step = 1}).
#'
#' @param \code{lookb} The number of end points in the look-back interval
#'   (the default is \code{lookb = 1}).
#'   
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#'   calculating the end points (the default is \code{stub = 0}).
#' 
#' @param \code{method} A \emph{character string} specifying the type of the
#'   skewness model (the default is \code{method = "moment"} - see Details).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#'   returns (the default is \code{confl = 0.75}).
#'
#' @return A \emph{matrix} of skewness estimates with the same number of
#'   columns as the input time series \code{tseries}, and the number of rows
#'   equal to the number of end points.
#'   
#' @details
#'   The function \code{roll_skew()} calculates a \emph{matrix} of skewness
#'   estimates over rolling look-back intervals attached at the end points of
#'   the \emph{time series} \code{tseries}.
#'   
#'   The function \code{roll_skew()} performs a loop over the end points, and
#'   at each end point it subsets the time series \code{tseries} over a
#'   look-back interval equal to \code{lookb} number of end points.
#'   
#'   It passes the subset time series to the function \code{calc_skew()}, which
#'   calculates the skewness.
#'   See the function \code{calc_skew()} for a description of the skewness
#'   methods.
#'   
#'   If the arguments \code{endd} and \code{startp} are not given then it
#'   first calculates a vector of end points separated by \code{step} time
#'   periods. It calculates the end points along the rows of \code{tseries}
#'   using the function \code{calc_endpoints()}, with the number of time
#'   periods between the end points equal to \code{step} time periods.
#' 
#'   For example, the rolling skewness at \code{25} day end points, with a
#'   \code{75} day look-back, can be calculated using the parameters
#'   \code{step = 25} and \code{lookb = 3}.
#'
#'   The function \code{roll_skew()} is implemented in \code{RcppArmadillo}
#'   \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Define time series of returns using package rutils
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Define end points and start points
#' endd <- 1 + HighFreq::calc_endpoints(NROW(retp), step=25)
#' startp <- HighFreq::calc_startpoints(endd, lookb=3)
#' # Calculate the rolling skewness at 25 day end points, with a 75 day look-back
#' skewv <- HighFreq::roll_skew(retp, step=25, lookb=3)
#' # Calculate the rolling skewness using R code
#' skewr <- sapply(1:NROW(endd), function(it) {
#'   HighFreq::calc_skew(retp[startp[it]:endd[it], ])
#' })  # end sapply
#' # Compare the skewness estimates
#' all.equal(drop(skewv), skewr, check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::roll_skew(retp, step=25, lookb=3),
#'   Rcode=sapply(1:NROW(endd), function(it) {
#'     HighFreq::calc_skew(retp[startp[it]:endd[it], ])
#'   }),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' @export
roll_skew <- function(tseries, startp = 0L, endd = 0L, step = 1L, lookb = 1L, stub = 0L, method = "moment", confl = 0.75) {
    .Call(`_HighFreq_roll_skew`, tseries, startp, endd, step, lookb, stub, method, confl)
}

#' Calculate a \emph{matrix} of kurtosis estimates over a rolling look-back
#' interval attached at the end points of a \emph{time series} or a
#' \emph{matrix}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'    
#' @param \code{startp} An \emph{integer} vector of start points (the default
#'   is \code{startp = 0}).
#' 
#' @param \code{endd} An \emph{integer} vector of end points (the default is 
#'   \code{endd = 0}).
#' 
#' @param \code{step} The number of time periods between the end points (the
#'   default is \code{step = 1}).
#'
#' @param \code{lookb} The number of end points in the look-back interval
#'   (the default is \code{lookb = 1}).
#'   
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#'   calculating the end points (the default is \code{stub = 0}).
#' 
#' @param \code{method} A \emph{character string} specifying the type of the
#'   kurtosis model (the default is \code{method = "moment"} - see Details).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#'   returns (the default is \code{confl = 0.75}).
#'
#' @return A \emph{matrix} of kurtosis estimates with the same number of
#'   columns as the input time series \code{tseries}, and the number of rows
#'   equal to the number of end points.
#'   
#' @details
#'   The function \code{roll_kurtosis()} calculates a \emph{matrix} of kurtosis
#'   estimates over rolling look-back intervals attached at the end points of
#'   the \emph{time series} \code{tseries}.
#'   
#'   The function \code{roll_kurtosis()} performs a loop over the end points,
#'   and at each end point it subsets the time series \code{tseries} over a
#'   look-back interval equal to \code{lookb} number of end points.
#'   
#'   It passes the subset time series to the function \code{calc_kurtosis()},
#'   which calculates the kurtosis. See the function \code{calc_kurtosis()} for
#'   a description of the kurtosis methods.
#'   
#'   If the arguments \code{endd} and \code{startp} are not given then it
#'   first calculates a vector of end points separated by \code{step} time
#'   periods. It calculates the end points along the rows of \code{tseries}
#'   using the function \code{calc_endpoints()}, with the number of time
#'   periods between the end points equal to \code{step} time periods.
#' 
#'   For example, the rolling kurtosis at \code{25} day end points, with a
#'   \code{75} day look-back, can be calculated using the parameters
#'   \code{step = 25} and \code{lookb = 3}.
#'
#'   The function \code{roll_kurtosis()} is implemented in \code{RcppArmadillo}
#'   \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Define time series of returns using package rutils
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Define end points and start points
#' endd <- 1 + HighFreq::calc_endpoints(NROW(retp), step=25)
#' startp <- HighFreq::calc_startpoints(endd, lookb=3)
#' # Calculate the rolling kurtosis at 25 day end points, with a 75 day look-back
#' kurtosisv <- HighFreq::roll_kurtosis(retp, step=25, lookb=3)
#' # Calculate the rolling kurtosis using R code
#' kurt_r <- sapply(1:NROW(endd), function(it) {
#'   HighFreq::calc_kurtosis(retp[startp[it]:endd[it], ])
#' })  # end sapply
#' # Compare the kurtosis estimates
#' all.equal(drop(kurtosisv), kurt_r, check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::roll_kurtosis(retp, step=25, lookb=3),
#'   Rcode=sapply(1:NROW(endd), function(it) {
#'     HighFreq::calc_kurtosis(retp[startp[it]:endd[it], ])
#'   }),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' @export
roll_kurtosis <- function(tseries, startp = 0L, endd = 0L, step = 1L, lookb = 1L, stub = 0L, method = "moment", confl = 0.75) {
    .Call(`_HighFreq_roll_kurtosis`, tseries, startp, endd, step, lookb, stub, method, confl)
}

#' Perform a rolling regression and calculate a matrix of regression
#' coefficients, their t-values, and z-scores.
#' 
#' @param \code{respv} A single-column \emph{time series} or a \emph{vector}
#'   of response data.
#' 
#' @param \code{predm} A \emph{time series} or a \emph{matrix} of predictor
#'   data.
#'   
#' @param \code{controlv} A \emph{list} of model parameters (see Details).
#'
#' @param \code{startp} An \emph{integer} vector of start points (the default
#'   is \code{startp = 0}).
#' 
#' @param \code{endd} An \emph{integer} vector of end points (the default is 
#'   \code{endd = 0}).
#' 
#' @param \code{step} The number of time periods between the end points (the
#'   default is \code{step = 1}).
#'
#' @param \code{lookb} The number of end points in the look-back interval
#'   (the default is \code{lookb = 1}).
#'   
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#'   calculating the end points (the default is \code{stub = 0}).
#' 
#' @return A \emph{matrix} with the regression coefficients, their t-values,
#'   and z-scores, and with the same number of rows as \code{predm} a
#'   number of columns equal to \code{2n+1}, where \code{n} is the number of
#'   columns of \code{predm}.
#'
#' @details
#'   The function \code{roll_reg()} performs a rolling regression over the end
#'   points of the predictor matrix, and calculates a \emph{matrix} of
#'   regression coefficients, their t-values, and z-scores.
#'   
#'   The function \code{roll_reg()} performs a loop over the end points, and at
#'   each end point it subsets the time series \code{predm} over a look-back
#'   interval equal to \code{lookb} number of end points.
#'   
#'   If the arguments \code{endd} and \code{startp} are not given then it
#'   first calculates a vector of end points separated by \code{step} time
#'   periods. It calculates the end points along the rows of \code{predm}
#'   using the function \code{calc_endpoints()}, with the number of time
#'   periods between the end points equal to \code{step} time periods.
#'   
#'   For example, the rolling regression at \code{25} day end points, with a
#'   \code{75} day look-back, can be calculated using the parameters
#'   \code{step = 25} and \code{lookb = 3}.
#'
#'   It passes the subset time series to the function \code{calc_reg()}, which
#'   calculates the regression coefficients, their t-values, and the z-score.
#'   The function \code{roll_reg()} accepts a list of model parameters
#'   through the argument \code{controlv}, and passes it to the function
#'   \code{calc_reg()}.
#'   The list of model parameters can be created using the function
#'   \code{param_reg()}.  See the function \code{param_reg()} for a
#'   description of the model parameters.
#'   
#'   The number of columns of the return matrix depends on the number of
#'   columns of the predictor matrix (including the intercept column, if it's
#'   been added in \code{R}).
#'   The number of regression coefficients is equal to the number of columns of
#'   the predictor matrix.
#'   If the predictor matrix contains an intercept column then the first
#'   regression coefficient is equal to the intercept value \eqn{\alpha}.
#'   
#'   The number of columns of the return matrix is equal to the number of
#'   regression coefficients, plus their t-values, plus the z-score column.
#'   The number of t-values is equal to the number of coefficients.
#'   If the number of columns of the predictor matrix is equal to \code{n},
#'   then \code{roll_reg()} returns a matrix with \code{2n+1} columns: \code{n}
#'   regression coefficients, \code{n} corresponding t-values, and \code{1}
#'   z-score column.
#' 
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' predm <- na.omit(rutils::etfenv$returns[, c("XLP", "VTI")])
#' # Add unit intercept column to the predictor matrix
#' predm <- cbind(rep(1, NROW(predm)), predm)
#' # Define monthly end points and start points
#' endd <- xts::endpoints(predm, on="months")[-1]
#' lookb <- 12
#' startp <- c(rep(1, lookb), endd[1:(NROW(endd)-lookb)])
#' # Create a default list of regression parameters
#' controlv <- HighFreq::param_reg()
#' # Calculate rolling betas using RcppArmadillo
#' regroll <- HighFreq::roll_reg(respv=predm[, 2], predm=predm[, -2], endd=(endd-1), startp=(startp-1), controlv=controlv)
#' betas <- regroll[, 2]
#' # Calculate rolling betas in R
#' betar <- sapply(1:NROW(endd), FUN=function(ep) {
#'   datav <- predm[startp[ep]:endd[ep], ]
#'   # HighFreq::calc_reg(datav[, 2], datav[, -2], controlv)
#'   drop(cov(datav[, 2], datav[, 3])/var(datav[, 3]))
#' })  # end sapply
#' # Compare the outputs of both functions
#' all.equal(betas, betar, check.attributes=FALSE)
#' }
#' 
#' @export
roll_reg <- function(respv, predm, controlv, startp = 0L, endd = 0L, step = 1L, lookb = 1L, stub = 0L) {
    .Call(`_HighFreq_roll_reg`, respv, predm, controlv, startp, endd, step, lookb, stub)
}

#' Perform a rolling standardization (centering and scaling) of the columns of
#' a \emph{time series} of data using \code{RcppArmadillo}.
#' 
#' @param \code{tseries} A \emph{time series} or \emph{matrix} of data.
#' 
#' @param \code{lookb} The length of the look-back interval, equal to the
#'   number of rows of data used in the scaling.
#'   
#' @param \code{center} A \emph{Boolean} argument: if \code{TRUE} then center
#'   the columns so that they have zero mean or median (the default is
#'   \code{TRUE}).
#' 
#' @param \code{scale} A \emph{Boolean} argument: if \code{TRUE} then scale the
#'   columns so that they have unit standard deviation or MAD (the default is
#'   \code{TRUE}).
#' 
#' @param \code{use_median} A \emph{Boolean} argument: if \code{TRUE} then the 
#'   centrality (central tendency) is calculated as the \emph{median} and the 
#'   dispersion is calculated as the \emph{median absolute deviation}
#'   (\emph{MAD}) (the default is \code{FALSE}).
#'   If \code{use_median = FALSE} then the centrality is calculated as the
#'   \emph{mean} and the dispersion is calculated as the \emph{standard
#'   deviation}.
#'
#' @return A \emph{matrix} with the same dimensions as the input argument
#'   \code{tseries}.
#'
#' @details
#'   The function \code{roll_scale()} performs a rolling standardization
#'   (centering and scaling) of the columns of the \code{tseries} argument
#'   using \code{RcppArmadillo}.
#'   The function \code{roll_scale()} performs a loop over the rows of
#'   \code{tseries}, subsets a number of previous (past) rows equal to
#'   \code{lookb}, and standardizes the subset matrix by calling the
#'   function \code{calc_scale()}.  It assigns the last row of the standardized
#'   subset \emph{matrix} to the return matrix.
#'   
#'   If the arguments \code{center} and \code{scale} are both \code{TRUE} and
#'   \code{use_median} is \code{FALSE} (the defaults), then
#'   \code{calc_scale()} performs the same calculation as the function
#'   \code{roll::roll_scale()}.
#'   
#'   If the arguments \code{center} and \code{scale} are both \code{TRUE} (the
#'   defaults), then \code{calc_scale()} standardizes the data.
#'   If the argument \code{center} is \code{FALSE} then \code{calc_scale()}
#'   only scales the data (divides it by the standard deviations).
#'   If the argument \code{scale} is \code{FALSE} then \code{calc_scale()}
#'   only demeans the data (subtracts the means).
#'   
#'   If the argument \code{use_median} is \code{TRUE}, then it calculates the
#'   centrality as the \emph{median} and the dispersion as the \emph{median
#'   absolute deviation} (\emph{MAD}).
#'   
#' @examples
#' \dontrun{
#' # Calculate a time series of returns
#' retp <- zoo::coredata(na.omit(rutils::etfenv$returns[, c("IEF", "VTI")]))
#' lookb <- 11
#' rolled_scaled <- roll::roll_scale(retp, width=lookb, min_obs=1)
#' rolled_scaled2 <- HighFreq::roll_scale(retp, lookb=lookb)
#' all.equal(rolled_scaled[-(1:2), ], rolled_scaled2[-(1:2), ],
#'   check.attributes=FALSE)
#' }
#' 
#' @export
roll_scale <- function(matrix, lookb, center = TRUE, scale = TRUE, use_median = FALSE) {
    .Call(`_HighFreq_roll_scale`, matrix, lookb, center, scale, use_median)
}

#' Standardize (center and scale) the columns of a \emph{time series} of data
#' over time and in place, without copying the data in memory, using
#' \code{RcppArmadillo}.
#' 
#' @param \code{tseries} A \emph{time series} or \emph{matrix} of data.
#' 
#' @param \code{lambda} A decay factor which multiplies past estimates.
#' 
#' @param \code{center} A \emph{Boolean} argument: if \code{TRUE} then center
#'   the columns so that they have zero mean or median (the default is
#'   \code{TRUE}).
#' 
#' @param \code{scale} A \emph{Boolean} argument: if \code{TRUE} then scale the
#'   columns so that they have unit standard deviation or MAD (the default is
#'   \code{TRUE}).
#' 
#' @return Void (no return value - modifies the data in place).
#'
#' @details
#'   The function \code{run_scale()} performs a trailing standardization
#'   (centering and scaling) of the columns of the \code{tseries} argument
#'   using \code{RcppArmadillo}.
#' 
#'   The function \code{run_scale()} accepts a \emph{pointer} to the argument
#'   \code{tseries}, and it overwrites the old data with the standardized
#'   data. It performs the calculation in place, without copying the data in
#'   memory, which can significantly increase the computation speed for large
#'   time series.
#'
#'   The function \code{run_scale()} performs a loop over the rows of
#'   \code{tseries}, and standardizes the data using its trailing means and
#'   standard deviations.
#'
#'   The function \code{run_scale()} calculates the trailing mean and variance
#'   of streaming \emph{time series} data \eqn{r_t}, by recursively weighting
#'   the past estimates with the new data, using the decay factor \eqn{\lambda}:
#'   \deqn{
#'     \bar{r}_t = \lambda \bar{r}_{t-1} + (1-\lambda) r_t
#'   }
#'   \deqn{
#'     \sigma^2_t = \lambda \sigma^2_{t-1} + (1-\lambda) (r_t - \bar{r}_t)^2
#'   }
#'   Where \eqn{\bar{r}_t} is the trailing mean and \eqn{\sigma^2_t} is the
#'   trailing variance.
#'   
#'   It then calculates the standardized data as follows:
#'   \deqn{
#'     r^{\prime}_t = \frac{r_t - \bar{r}_t}{\sigma_t}
#'   }
#'
#'   If the arguments \code{center} and \code{scale} are both \code{TRUE} (the
#'   defaults), then \code{calc_scale()} standardizes the data.
#'   If the argument \code{center} is \code{FALSE} then \code{calc_scale()}
#'   only scales the data (divides it by the standard deviations).
#'   If the argument \code{scale} is \code{FALSE} then \code{calc_scale()}
#'   only demeans the data (subtracts the means).
#'   
#'   The value of the decay factor \eqn{\lambda} must be in the range between
#'   \code{0} and \code{1}.  
#'   If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#'   values have a greater weight, and the trailing variance values have a
#'   stronger dependence on past data.  This is equivalent to a long
#'   look-back interval.
#'   If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#'   past values have a smaller weight, and the trailing variance values have a
#'   weaker dependence on past data.  This is equivalent to a short look-back
#'   interval.
#' 
#'   The above online recursive formulas are convenient for processing live
#'   streaming data because they don't require maintaining a buffer of past
#'   data.
#'   The formulas are equivalent to a convolution with exponentially decaying
#'   weights, but they're much faster to calculate.
#'   Using exponentially decaying weights is more natural than using a sliding
#'   look-back interval, because it gradually "forgets" about the past data.
#' 
#'   The function \code{run_scale()} uses \code{RcppArmadillo} \code{C++} code,
#'   so it can be over \code{100} times faster than the equivalent \code{R}
#'   code.
#'   
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("XLF", "VTI")])
#' # Calculate the trailing standardized returns using R code
#' lambdaf <- 0.9
#' lambda1 <- 1 - lambdaf
#' scaled <- zoo::coredata(retp)
#' meanm <- scaled[1, ];
#' vars <- scaled[1, ]^2;
#' for (it in 2:NROW(retp)) {
#'   meanm <- lambdaf*meanm + lambda1*scaled[it, ];
#'   vars <- lambdaf*vars + lambda1*(scaled[it, ] - meanm)^2;
#'   scaled[it, ] <- (scaled[it, ] - meanm)/sqrt(vars)
#' }  # end for
#' # Calculate the trailing standardized returns using C++ code
#' HighFreq::run_scale(retp, lambda=lambdaf)
#' all.equal(zoo::coredata(retp), scaled, check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::run_scale(retp, lambda=lambdaf),
#'   Rcode={for (it in 2:NROW(retp)) {
#'    meanm <- lambdaf*meanm + lambda1*scaled[it, ];
#'    vars <- lambdaf*vars + lambda1*(scaled[it, ] - meanm)^2;
#'    scaled[it, ] <- (scaled[it, ] - meanm)/sqrt(vars)
#'   }},  # end for
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
run_scale <- function(tseries, lambda, center = TRUE, scale = TRUE) {
    invisible(.Call(`_HighFreq_run_scale`, tseries, lambda, center, scale))
}

#' Calculate a \emph{vector} of z-scores of the residuals of rolling
#' regressions at the end points of the predictor matrix.
#' 
#' @param \code{respv} A single-column \emph{time series} or a \emph{vector}
#'   of response data.
#' 
#' @param \code{predm} A \emph{time series} or a \emph{matrix} of predictor
#'   data.
#'   
#' @param \code{startp} An \emph{integer} vector of start points (the default
#'   is \code{startp = 0}).
#' 
#' @param \code{endd} An \emph{integer} vector of end points (the default is
#'   \code{endd = 0}).
#' 
#' @param \code{step} The number of time periods between the end points (the
#'   default is \code{step = 1}).
#'
#' @param \code{lookb} The number of end points in the look-back interval
#'   (the default is \code{lookb = 1}).
#'   
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#'   calculating the end points (the default is \code{stub = 0}).
#' 
#' @return A column \emph{vector} of the same length as the number of rows of
#'   \code{predm}.
#'
#' @details
#'   The function \code{roll_zscores()} calculates a \emph{vector} of z-scores
#'   of the residuals of rolling regressions at the end points of the
#'   \emph{time series} \code{predm}.
#'   
#'   The function \code{roll_zscores()} performs a loop over the end points,
#'   and at each end point it subsets the time series \code{predm} over a
#'   look-back interval equal to \code{lookb} number of end points.
#'   
#'   It passes the subset time series to the function \code{calc_lm()}, which
#'   calculates the regression data.
#'   
#'   If the arguments \code{endd} and \code{startp} are not given then it
#'   first calculates a vector of end points separated by \code{step} time
#'   periods. It calculates the end points along the rows of \code{predm}
#'   using the function \code{calc_endpoints()}, with the number of time
#'   periods between the end points equal to \code{step} time periods.
#'   
#'   For example, the rolling variance at \code{25} day end points, with a
#'   \code{75} day look-back, can be calculated using the parameters
#'   \code{step = 25} and \code{lookb = 3}.
#'
#' @examples
#' \dontrun{
#' # Calculate historical returns
#' retp <- na.omit(rutils::etfenv$returns[, c("XLF", "VTI", "IEF")])
#' # Response equals XLF returns
#' respv <- retp[, 1]
#' # Predictor matrix equals VTI and IEF returns
#' predm <- retp[, -1]
#' # Calculate Z-scores from rolling time series regression using RcppArmadillo
#' lookb <- 11
#' zscores <- HighFreq::roll_zscores(respv=respv, predm=predm, lookb)
#' # Calculate z-scores in R from rolling multivariate regression using lm()
#' zscoresr <- sapply(1:NROW(predm), function(ro_w) {
#'   if (ro_w == 1) return(0)
#'   startpoint <- max(1, ro_w-lookb+1)
#'   responsi <- response[startpoint:ro_w]
#'   predicti <- predictor[startpoint:ro_w, ]
#'   regmod <- lm(responsi ~ predicti)
#'   residuals <- regmod$residuals
#'   residuals[NROW(residuals)]/sd(residuals)
#' })  # end sapply
#' # Compare the outputs of both functions
#' all.equal(zscores[-(1:lookb)], zscoresr[-(1:lookb)], 
#'   check.attributes=FALSE)
#' }
#' 
#' @export
roll_zscores <- function(respv, predm, startp = 0L, endd = 0L, step = 1L, lookb = 1L, stub = 0L) {
    .Call(`_HighFreq_roll_zscores`, respv, predm, startp, endd, step, lookb, stub)
}

#' Calculate a \emph{matrix} of moment values over a rolling look-back
#' interval attached at the end points of a \emph{time series} or a
#' \emph{matrix}.
#'
#' @param \code{tseries} A \emph{time series} or a \emph{matrix} of data.
#'    
#' @param \code{funame} A \emph{character string} specifying the moment
#'   function (the default is \code{funame = "calc_mean"}).
#' 
#' @param \code{method} A \emph{character string} specifying the type of the
#'   model for the moment (the default is \code{method = "moment"}).
#'
#' @param \code{confl} The confidence level for calculating the quantiles of
#'   returns (the default is \code{confl = 0.75}).
#'
#' @param \code{startp} An \emph{integer} vector of start points (the default
#'   is \code{startp = 0}).
#' 
#' @param \code{endd} An \emph{integer} vector of end points (the default is 
#'   \code{endd = 0}).
#' 
#' @param \code{step} The number of time periods between the end points (the
#'   default is \code{step = 1}).
#'
#' @param \code{lookb} The number of end points in the look-back interval
#'   (the default is \code{lookb = 1}).
#'   
#' @param \code{stub} An \emph{integer} value equal to the first end point for
#'   calculating the end points (the default is \code{stub = 0}).
#'
#' @return A \emph{matrix} with the same number of columns as the input time
#'   series \code{tseries}, and the number of rows equal to the number of end
#'   points.
#'   
#' @details
#'   The function \code{roll_moment()} calculates a \emph{matrix} of moment
#'   values, over rolling look-back intervals attached at the end points of the
#'   \emph{time series} \code{tseries}.
#'   
#'   The function \code{roll_moment()} performs a loop over the end points, and
#'   at each end point it subsets the time series \code{tseries} over a
#'   look-back interval equal to \code{lookb} number of end points.
#'   
#'   It passes the subset time series to the function specified by the argument
#'   \code{funame}, which calculates the statistic.
#'   See the functions \code{calc_*()} for a description of the different
#'   moments.
#'   The function name must be one of the following:
#'   \itemize{
#'     \item "calc_mean" for the estimator of the mean (location),
#'     \item "calc_var" for the estimator of the dispersion (variance),
#'     \item "calc_skew" for the estimator of the skewness,
#'     \item "calc_kurtosis" for the estimator of the kurtosis.
#'    }
#'    (The default is the \code{funame = "calc_mean"}).
#'   
#'   If the arguments \code{endd} and \code{startp} are not given then it
#'   first calculates a vector of end points separated by \code{step} time
#'   periods. It calculates the end points along the rows of \code{tseries}
#'   using the function \code{calc_endpoints()}, with the number of time
#'   periods between the end points equal to \code{step} time periods.
#' 
#'   For example, the rolling variance at \code{25} day end points, with a
#'   \code{75} day look-back, can be calculated using the parameters
#'   \code{step = 25} and \code{lookb = 3}.
#'
#'   The function \code{roll_moment()} calls the function \code{calc_momptr()}
#'   to calculate a pointer to a moment function from the function name
#'   \code{funame} (string). The function pointer is used internally in the
#'   \code{C++} code, but the function \code{calc_momptr()} is not exported to
#'   \code{R}.
#'   
#'   The function \code{roll_moment()} is implemented in \code{RcppArmadillo}
#'   \code{C++} code, which makes it several times faster than \code{R} code.
#'
#' @examples
#' \dontrun{
#' # Define time series of returns using package rutils
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Calculate the rolling variance at 25 day end points, with a 75 day look-back
#' var_rollfun <- HighFreq::roll_moment(retp, fun="calc_var", step=25, lookb=3)
#' # Calculate the rolling variance using roll_var()
#' var_roll <- HighFreq::roll_var(retp, step=25, lookb=3)
#' # Compare the two methods
#' all.equal(var_rollfun, var_roll, check.attributes=FALSE)
#' # Define end points and start points
#' endd <- HighFreq::calc_endpoints(NROW(retp), step=25)
#' startp <- HighFreq::calc_startpoints(endd, lookb=3)
#' # Calculate the rolling variance using RcppArmadillo
#' var_rollfun <- HighFreq::roll_moment(retp, fun="calc_var", startp=startp, endd=endd)
#' # Calculate the rolling variance using R code
#' var_roll <- sapply(1:NROW(endd), function(it) {
#'   var(retp[startp[it]:endd[it]+1, ])
#' })  # end sapply
#' var_roll[1] <- 0
#' # Compare the two methods
#' all.equal(drop(var_rollfun), var_roll, check.attributes=FALSE)
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::roll_moment(retp, fun="calc_var", startp=startp, endd=endd),
#'   Rcode=sapply(1:NROW(endd), function(it) {
#'     var(retp[startp[it]:endd[it]+1, ])
#'   }),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' @export
roll_moment <- function(tseries, funame = "calc_mean", method = "moment", confl = 0.75, startp = 0L, endd = 0L, step = 1L, lookb = 1L, stub = 0L) {
    .Call(`_HighFreq_roll_moment`, tseries, funame, method, confl, startp, endd, step, lookb, stub)
}

#' Simulate or estimate the rolling variance under a \emph{GARCH(1,1)} process
#' using \emph{Rcpp}.
#' 
#' @param \code{omega} Parameter proportional to the long-term average level
#'   of variance.
#' 
#' @param \code{alpha} The weight associated with recent realized variance
#'   updates.
#' 
#' @param \code{beta} The weight associated with the past variance estimates.
#' 
#' @param \code{innov} A single-column \emph{matrix} of innovations.
#' 
#' @param \code{is_random} \emph{Boolean} argument: Are the innovations random
#'   numbers or historical returns? (The default is \code{is_random = TRUE}.)
#'
#' @return A \emph{matrix} with two columns and with the same number of rows as
#'   the argument \code{innov}.  The first column are the simulated returns and
#'   the second column is the variance.
#'
#' @details
#'   The function \code{sim_garch()} simulates or estimates the rolling variance
#'   under a \emph{GARCH(1,1)} process using \emph{Rcpp}.
#'
#'   If \code{is_random = TRUE} (the default) then the innovations \code{innov}
#'   are treated as random numbers \eqn{\xi_i} and the \emph{GARCH(1,1)}
#'   process is given by:
#'   \deqn{
#'     r_i = \sigma_{i-1} \xi_i
#'   }
#'   \deqn{
#'     \sigma^2_i = \omega + \alpha r^2_i + \beta \sigma_{i-1}^2
#'   }
#'   Where \eqn{r_i} and \eqn{\sigma^2_i} are the simulated returns and
#'   variance, and \eqn{\omega}, \eqn{\alpha}, and \eqn{\beta} are the
#'   \emph{GARCH} parameters, and \eqn{\xi_i} are standard normal
#'   \emph{innovations}.
#'
#'   The long-term equilibrium level of the simulated variance is proportional
#'   to the parameter \eqn{\omega}:
#'   \deqn{
#'     \sigma^2 = \frac{\omega}{1 - \alpha - \beta}
#'   }
#'   So the sum of \eqn{\alpha} plus \eqn{\beta} should be less than \eqn{1},
#'   otherwise the volatility becomes explosive.
#'   
#'   If \code{is_random = FALSE} then the function \code{sim_garch()}
#'   \emph{estimates} the rolling variance from the historical returns. The
#'   innovations \code{innov} are equal to the historical returns \eqn{r_i} and
#'   the \emph{GARCH(1,1)} process is simply:
#'   \deqn{
#'     \sigma^2_i = \omega + \alpha r^2_i + \beta \sigma_{i-1}^2
#'   }
#'   Where \eqn{\sigma^2_i} is the rolling variance.
#'   
#'   The above should be viewed as a formula for \emph{estimating} the rolling
#'   variance from the historical returns, rather than simulating them. It
#'   represents exponential smoothing of the squared returns with a decay
#'   factor equal to \eqn{\beta}.
#'
#'   The function \code{sim_garch()} simulates the \emph{GARCH} process using
#'   fast \emph{Rcpp} \code{C++} code.
#'
#' @examples
#' \dontrun{
#' # Define the GARCH model parameters
#' alpha <- 0.79
#' betav <- 0.2
#' om_ega <- 1e-4*(1-alpha-betav)
#' # Calculate matrix of standard normal innovations
#' innov <- matrix(rnorm(1e3))
#' # Simulate the GARCH process using Rcpp
#' garch_data <- HighFreq::sim_garch(omega=om_ega, alpha=alpha,  beta=betav, innov=innov)
#' # Plot the GARCH rolling volatility and cumulative returns
#' plot(sqrt(garch_data[, 2]), t="l", main="Simulated GARCH Volatility", ylab="volatility")
#' plot(cumsum(garch_data[, 1]), t="l", main="Simulated GARCH Cumulative Returns", ylab="cumulative returns")
#' # Calculate historical VTI returns
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Estimate the GARCH volatility of VTI returns
#' garch_data <- HighFreq::sim_garch(omega=om_ega, alpha=alpha,  beta=betav, 
#'   innov=retp, is_random=FALSE)
#' # Plot dygraph of the estimated GARCH volatility
#' dygraphs::dygraph(xts::xts(sqrt(garch_data[, 2]), index(retp)), 
#'   main="Estimated GARCH Volatility of VTI")
#' }
#' 
#' @export
sim_garch <- function(omega, alpha, beta, innov, is_random = TRUE) {
    .Call(`_HighFreq_sim_garch`, omega, alpha, beta, innov, is_random)
}

#' Simulate an \emph{Ornstein-Uhlenbeck} process using \emph{Rcpp}.
#' 
#' @param \code{init_price} The initial price. 
#' 
#' @param \code{eq_price} The equilibrium price. 
#' 
#' @param \code{theta} The strength of mean reversion.
#' 
#' @param \code{innov} A single-column \emph{matrix} of innovations (random
#'   numbers).
#' 
#' @return A single-column \emph{matrix} of simulated prices, with the same
#'   number of rows as the argument \code{innov}.
#'
#' @details
#'   The function \code{sim_ou()} simulates the following
#'   \emph{Ornstein-Uhlenbeck} process:
#'   \deqn{
#'     r_i = p_i - p_{i-1} = \theta \, (\mu - p_{i-1}) + \xi_i
#'   }
#'   \deqn{
#'     p_i = p_{i-1} + r_i
#'   }
#'   Where \eqn{r_i} and \eqn{p_i} are the simulated returns and prices,
#'   \eqn{\theta}, \eqn{\mu}, and \eqn{\sigma} are the
#'   \emph{Ornstein-Uhlenbeck} parameters, and \eqn{\xi_i} are the standard
#'   \emph{innovations}.
#'   The recursion starts with: \eqn{r_1 = \xi_1} and \eqn{p_1 = init\_price}.
#'
#'   The function \code{sim_ou()} simulates the percentage returns as equal to
#'   the difference between the equilibrium price \eqn{\mu} minus the latest
#'   price \eqn{p_{i-1}}, times the mean reversion parameter \eqn{\theta}, plus
#'   a random normal innovation. The log prices are calculated as the sum of
#'   returns (not compounded), so they can become negative.
#'
#'   The function \code{sim_ou()} simulates the \emph{Ornstein-Uhlenbeck}
#'   process using fast \emph{Rcpp} \code{C++} code.
#'
#'   The function \code{sim_ou()} returns a single-column \emph{matrix}
#'   representing the \emph{time series} of simulated prices.
#'
#' @examples
#' \dontrun{
#' # Define the Ornstein-Uhlenbeck model parameters
#' init_price <- 0.0
#' eq_price <- 1.0
#' sigmav <- 0.01
#' thetav <- 0.01
#' innov <- matrix(rnorm(1e3))
#' # Simulate Ornstein-Uhlenbeck process using Rcpp
#' prices <- HighFreq::sim_ou(init_price=init_price, eq_price=eq_price, volat=sigmav, theta=thetav, innov=innov)
#' plot(prices, t="l", main="Simulated Ornstein-Uhlenbeck Prices", ylab="prices")
#' }
#' 
#' @export
sim_ou <- function(init_price, eq_price, theta, innov) {
    .Call(`_HighFreq_sim_ou`, init_price, eq_price, theta, innov)
}

#' Simulate a \emph{Schwartz} process using \emph{Rcpp}.
#' 
#' @param \code{init_price} The initial price. 
#' 
#' @param \code{eq_price} The equilibrium price. 
#' 
#' @param \code{theta} The strength of mean reversion.
#' 
#' @param \code{innov} A single-column \emph{matrix} of innovations (random
#'   numbers).
#' 
#' @return A single-column \emph{matrix} of simulated prices, with the same
#'   number of rows as the argument \code{innov}.
#'
#' @details
#'   The function \code{sim_schwartz()} simulates a \emph{Schwartz} process
#'   using fast \emph{Rcpp} \code{C++} code.
#'   
#'   The \emph{Schwartz} process is the exponential of the
#'   \emph{Ornstein-Uhlenbeck} process, and similar comments apply to it.
#'   The prices are calculated as the exponentially compounded returns, so they
#'   are never negative. The log prices can be obtained by taking the logarithm
#'   of the prices.
#'   
#'   The function \code{sim_schwartz()} simulates the percentage returns as
#'   equal to the difference between the equilibrium price \eqn{\mu} minus the
#'   latest price \eqn{p_{i-1}}, times the mean reversion parameter
#'   \eqn{\theta}, plus a random normal innovation.
#'
#'   The function \code{sim_schwartz()} returns a single-column \emph{matrix}
#'   representing the \emph{time series} of simulated prices.
#'
#' @examples
#' \dontrun{
#' # Define the Schwartz model parameters
#' init_price <- 1.0
#' eq_price <- 2.0
#' thetav <- 0.01
#' innov <- matrix(rnorm(1e3, sd=0.01))
#' # Simulate Schwartz process using Rcpp
#' prices <- HighFreq::sim_schwartz(init_price=init_price, eq_price=eq_price, theta=thetav, innov=innov)
#' plot(prices, t="l", main="Simulated Schwartz Prices", ylab="prices")
#' }
#' 
#' @export
sim_schwartz <- function(init_price, eq_price, theta, innov) {
    .Call(`_HighFreq_sim_schwartz`, init_price, eq_price, theta, innov)
}

#' Simulate \emph{autoregressive} returns by recursively filtering a
#' \emph{matrix} of innovations through a \emph{matrix} of
#' \emph{autoregressive} coefficients.
#' 
#' @param \code{innov} A single-column \emph{matrix} of innovations.
#' 
#' @param \code{coeff} A single-column \emph{matrix} of \emph{autoregressive}
#'   coefficients.
#'
#' @return A single-column \emph{matrix} of simulated returns, with the same
#'   number of rows as the argument \code{innov}.
#'
#' @details
#'   The function \code{sim_ar()} recursively filters the \emph{matrix} of
#'   innovations \code{innov} through the \emph{matrix} of
#'   \emph{autoregressive} coefficients \code{coeff}, using fast
#'   \code{RcppArmadillo} \code{C++} code.
#'
#'   The function \code{sim_ar()} simulates an \emph{autoregressive} process
#'   \eqn{AR(n)} of order \eqn{n}:
#'   \deqn{
#'     r_i = \varphi_1 r_{i-1} + \varphi_2 r_{i-2} + \ldots + \varphi_n r_{i-n} + \xi_i
#'   }
#'   Where \eqn{r_i} is the simulated output time series, \eqn{\varphi_i} are
#'   the \emph{autoregressive} coefficients, and \eqn{\xi_i} are the standard
#'   normal \emph{innovations}.
#'
#'   The order \eqn{n} of the \emph{autoregressive} process \eqn{AR(n)}, is
#'   equal to the number of rows of the \emph{autoregressive} coefficients
#'   \code{coeff}.
#'
#'   The function \code{sim_ar()} performs the same calculation as the standard
#'   \code{R} function \cr\code{filter(x=innov, filter=coeff,
#'   method="recursive")}, but it's several times faster.
#'   
#' @examples
#' \dontrun{
#' # Define AR coefficients
#' coeff <- matrix(c(0.1, 0.3, 0.5))
#' # Calculate matrix of innovations
#' innov <- matrix(rnorm(1e4, sd=0.01))
#' # Calculate recursive filter using filter()
#' innof <- filter(innov, filter=coeff, method="recursive")
#' # Calculate recursive filter using RcppArmadillo
#' retp <- HighFreq::sim_ar(coeff, innov)
#' # Compare the two methods
#' all.equal(as.numeric(retp), as.numeric(innof))
#' # Compare the speed of RcppArmadillo with R code
#' library(microbenchmark)
#' summary(microbenchmark(
#'   Rcpp=HighFreq::sim_ar(coeff, innov),
#'   Rcode=filter(innov, filter=coeff, method="recursive"),
#'   times=10))[, c(1, 4, 5)]  # end microbenchmark summary
#' }
#' 
#' @export
sim_ar <- function(coeff, innov) {
    .Call(`_HighFreq_sim_ar`, coeff, innov)
}

#' Simulate a \emph{Dickey-Fuller} process using \emph{Rcpp}.
#' 
#' @param \code{init_price} The initial price. 
#' 
#' @param \code{eq_price} The equilibrium price. 
#' 
#' @param \code{theta} The strength of mean reversion.
#' 
#' @param \code{coeff} A single-column \emph{matrix} of \emph{autoregressive}
#'   coefficients.
#'
#' @param \code{innov} A single-column \emph{matrix} of innovations (random
#'   numbers).
#' 
#' @return A single-column \emph{matrix} of simulated prices, with the same
#'   number of rows as the argument \code{innov}.
#'
#' @details
#'   The function \code{sim_df()} simulates the following \emph{Dickey-Fuller}
#'   process:
#'   \deqn{
#'     r_i = \theta \, (\mu - p_{i-1}) + \varphi_1 r_{i-1} + \ldots + \varphi_n r_{i-n} + \xi_i
#'   }
#'   \deqn{
#'     p_i = p_{i-1} + r_i
#'   }
#'   Where \eqn{r_i} and \eqn{p_i} are the simulated returns and prices,
#'   \eqn{\theta} and \eqn{\mu} are the \emph{Ornstein-Uhlenbeck} parameters,
#'   \eqn{\varphi_i} are the \emph{autoregressive} coefficients, and
#'   \eqn{\xi_i} are the normal \emph{innovations}.
#'   The recursion starts with: \eqn{r_1 = \xi_1} and \eqn{p_1 = init\_price}.
#'
#'   The \emph{Dickey-Fuller} process is a combination of an
#'   \emph{Ornstein-Uhlenbeck} process and an \emph{autoregressive} process.
#'   The order \eqn{n} of the \emph{autoregressive} process \eqn{AR(n)}, is
#'   equal to the number of rows of the \emph{autoregressive} coefficients
#'   \code{coeff}.
#'
#'   The function \code{sim_df()} simulates the \emph{Dickey-Fuller}
#'   process using fast \emph{Rcpp} \code{C++} code.
#'
#'   The function \code{sim_df()} returns a single-column \emph{matrix}
#'   representing the \emph{time series} of prices.
#'
#' @examples
#' \dontrun{
#' # Define the Ornstein-Uhlenbeck model parameters
#' init_price <- 1.0
#' eq_price <- 2.0
#' thetav <- 0.01
#' # Define AR coefficients
#' coeff <- matrix(c(0.1, 0.3, 0.5))
#' # Calculate matrix of standard normal innovations
#' innov <- matrix(rnorm(1e3, sd=0.01))
#' # Simulate Dickey-Fuller process using Rcpp
#' prices <- HighFreq::sim_df(init_price=init_price, eq_price=eq_price, theta=thetav, coeff=coeff, innov=innov)
#' plot(prices, t="l", main="Simulated Dickey-Fuller Prices")
#' }
#' 
#' @export
sim_df <- function(init_price, eq_price, theta, coeff, innov) {
    .Call(`_HighFreq_sim_df`, init_price, eq_price, theta, coeff, innov)
}

#' Calculate the log-likelihood of a time series of returns assuming a
#' \emph{GARCH(1,1)} process.
#' 
#' @param \code{omega} Parameter proportional to the long-term average level
#'   of variance.
#' 
#' @param \code{alpha} The weight associated with recent realized variance
#'   updates.
#' 
#' @param \code{beta} The weight associated with the past variance estimates.
#' 
#' @param \code{returns} A single-column \emph{matrix} of returns.
#' 
#' @param \code{minval} The floor value applied to the variance, to avoid zero
#'   values. (The default is \code{minval = 0.000001}.)
#' 
#' @return The log-likelihood value.
#'
#' @details
#'   The function \code{lik_garch()} calculates the log-likelihood of a time
#'   series of returns assuming a \emph{GARCH(1,1)} process.
#'   
#'   It first estimates the rolling variance of the \code{returns} argument
#'   using function \code{sim_garch()}:
#'   \deqn{
#'     \sigma^2_i = \omega + \alpha r^2_i + \beta \sigma_{i-1}^2
#'   }
#'   Where \eqn{r_i} is the time series of returns, and \eqn{\sigma^2_i} is the
#'   estimated rolling variance.
#'   And \eqn{\omega}, \eqn{\alpha}, and \eqn{\beta} are the \emph{GARCH}
#'   parameters.
#'   It applies the floor value \code{minval} to the variance, to avoid zero
#'   values.  So the minimum value of the variance is equal to \code{minval}.
#'
#'   The function \code{lik_garch()} calculates the log-likelihood assuming a
#'   normal distribution of returns conditional on the variance
#'   \eqn{\sigma^2_{i-1}} in the previous period, as follows:
#'   \deqn{
#'     likelihood = - \sum_{i=1}^n (\frac{r^2_i}{\sigma^2_{i-1}} + \log(\sigma^2_{i-1}))
#'   }
#'
#' @examples
#' \dontrun{
#' # Define the GARCH model parameters
#' alpha <- 0.79
#' betav <- 0.2
#' om_ega <- 1e-4*(1-alpha-betav)
#' # Calculate historical VTI returns
#' retp <- na.omit(rutils::etfenv$returns$VTI)
#' # Calculate the log-likelihood of VTI returns assuming GARCH(1,1)
#' HighFreq::lik_garch(omega=om_ega, alpha=alpha,  beta=betav, returns=retp)
#' }
#' 
#' @export
lik_garch <- function(omega, alpha, beta, returns, minval = 0.000001) {
    .Call(`_HighFreq_lik_garch`, omega, alpha, beta, returns, minval)
}

#' Simulate a portfolio optimization strategy using online (recursive) updating
#' of the covariance matrix.
#' 
#' @param \code{rets} A \emph{time series} or \emph{matrix} of asset returns.
#' 
#' @param \code{dimax} An \emph{integer} equal to the number of \emph{eigen
#'   values} used for calculating the reduced inverse of the covariance
#'   matrix (the default is \code{dimax = 0} - standard matrix inverse using
#'   all the \emph{eigen values}).
#'   
#' @param \code{lambda} A decay factor which multiplies the past asset returns.
#'   
#' @param \code{lambdacov} A decay factor which multiplies the past covariance.
#'   
#' @param \code{lambdaw} A decay factor which multiplies the past portfolio
#'   weights.
#'   
#' @return A \emph{matrix} of strategy returns and the portfolio weights, with
#'   the same number of rows as the argument \code{rets}.
#'   
#' @details
#'   The function \code{sim_portfoptim()} simulates a portfolio optimization
#'   strategy. The strategy calculates the maximum Sharpe portfolio weights
#'   \emph{in-sample} at every point in time, and applies them in the
#'   \emph{out-of-sample} time interval.  It updates the trailing covariance
#'   matrix recursively, instead of using past batches of data. The function
#'   \code{sim_portfoptim()} uses three different decay factors for averaging past
#'   values, to reduce the variance of its forecasts.
#'   
#'   The function \code{sim_portfoptim()} first scales the returns by their
#'   trailing volatilities:
#'   \deqn{
#'     r^s_t = \frac{r_t}{\sigma_{t-1}}
#'   }
#'   Returns scaled by their volatility are more stationary so they're easier
#'   to model.
#'   
#'   Then at every point in time, the function \code{sim_portfoptim()} calls
#'   the function \code{HighFreq::push_covar()} to update the trailing
#'   covariance matrix of the returns:
#'   \deqn{
#'     \bar{r}_t = \lambda_c \bar{r}_{t-1} + (1-\lambda_c) r^s_t
#'   }
#'   \deqn{
#'     \hat{r}_t = r^s_t - \bar{r}_t
#'   }
#'   \deqn{
#'     {cov}_t = \lambda_c {cov}_{t-1} + (1-\lambda_c) \hat{r}^T_t \hat{r}_t
#'   }
#'   Where \eqn{\lambda_c} is the decay factor which multiplies the past mean
#'   and covariance.
#'   
#'   It then calls the function \code{HighFreq::calc_inv()} to calculate the
#'   \emph{reduced inverse} of the covariance matrix using its eigen
#'   decomposition:
#'   \deqn{
#'     \strong{C}^{-1} = \strong{O}_{dimax} \, \Sigma^{-1}_{dimax} \, \strong{O}^T_{dimax}
#'   }
#'   See the function \code{HighFreq::calc_inv()} for details.
#'   
#'   It then calculates the \emph{in-sample} weights of the maximum Sharpe
#'   portfolio, by multiplying the inverse covariance matrix times the trailing
#'   means of the asset returns:
#'   \deqn{
#'     \bar{r}_t = \lambda \bar{r}_{t-1} + (1-\lambda) r^s_t
#'   }
#'   \deqn{
#'     \strong{w}_t = \strong{C}^{-1} \bar{r}_t
#'   }
#'   Note that the decay factor \eqn{\lambda} is different from the decay
#'   factor \eqn{\lambda_c} used for updating the trailing covariance
#'   matrix.
#'   
#'   It then scales the weights so their sum of squares is equal to one:
#'   \deqn{
#'     \strong{w}_t = \frac{\strong{w}_t}{\sqrt{\sum{\strong{w}^2_t}}}
#'   }
#'   
#'   It then calculates the trailing mean of the weights:
#'   \deqn{
#'     \bar{\strong{w}}_t = \lambda_w \bar{\strong{w}}_{t-1} + (1-\lambda_w) \strong{w}_t
#'   }
#'   Note that the decay factor \eqn{\lambda_w} is different from the decay
#'   factor \eqn{\lambda} used for updating the trailing means.
#'   
#'   It finally calculates the \emph{out-of-sample} portfolio returns by
#'   multiplying the trailing mean weights times the scaled asset returns:
#'   \deqn{
#'     r^p_t = \bar{\strong{w}}_{t-1} r^s_t
#'   }
#'   Applying weights to scaled returns means trading stock amounts with unit
#'   dollar volatility.  So if the weight is equal to \code{2} then we should
#'   purchase an amount of stock with dollar volatility equal to \code{2}
#'   dollars.  Trading stock amounts with unit dollar volatility improves
#'   portfolio diversification.
#'   
#'   The function \code{sim_portfoptim()} uses three different decay factors
#'   for averaging past values, to reduce the variance of its forecasts. The
#'   value of the decay factor \eqn{\lambda} must be in the range between
#'   \code{0} and \code{1}.
#'   If \eqn{\lambda} is close to \code{1} then the decay is weak and past
#'   values have a greater weight, so the trailing values have a greater
#'   dependence on past data.  This is equivalent to a long look-back
#'   interval.
#'   If \eqn{\lambda} is much less than \code{1} then the decay is strong and
#'   past values have a smaller weight, so the trailing values have a weaker
#'   dependence on past data.  This is equivalent to a short look-back
#'   interval.
#' 
#'   The function \code{sim_portfoptim()} returns multiple columns of data,
#'   with the same number of rows as the input argument \code{rets}. The first
#'   column contains the strategy returns and the remaining columns contain the
#'   portfolio weights.
#'   
#' @examples
#' \dontrun{
#' # Load ETF returns
#' retp <- rutils::etfenv$returns[, c("VTI", "TLT", "DBC", "USO", "XLF", "XLK")]
#' retp <- na.omit(retp)
#' datev <- zoo::index(retp) # dates
#' # Simulate a portfolio optimization strategy
#' dimax <- 6
#' lambdaf <- 0.978
#' lambdacov <- 0.995
#' lambdaw <- 0.9
#' pnls <- HighFreq::sim_portfoptim(retp, dimax, lambdaf, lambdacov, lambdaw)
#' colnames(pnls) <- c("pnls", "VTI", "TLT", "DBC", "USO", "XLF", "XLK")
#' pnls <- xts::xts(pnls, order.by=datev)
#' # Plot dygraph of strategy
#' wealthv <- cbind(retp$VTI, pnls$pnls*sd(retp$VTI)/sd(pnls$pnls))
#' colnames(wealthv) <- c("VTI", "Strategy")
#' endd <- rutils::calc_endpoints(wealthv, interval="weeks")
#' dygraphs::dygraph(cumsum(wealthv)[endd], main="Portfolio Optimization Strategy Returns") %>%
#'  dyOptions(colors=c("blue", "red"), strokeWidth=2) %>%
#'  dyLegend(width=300)
#' # Plot dygraph of weights
#' symbolv <- "VTI"
#' stockweights <- cbind(cumsum(get(symbolv, retp)), get(symbolv, pnls))
#' colnames(stockweights)[2] <- "Weight"
#' colnamev <- colnames(stockweights)
#' endd <- rutils::calc_endpoints(pnls, interval="weeks")
#' dygraphs::dygraph(stockweights[endd], main="Returns and Weight") %>%
#'   dyAxis("y", label=colnamev[1], independentTicks=TRUE) %>%
#'   dyAxis("y2", label=colnamev[2], independentTicks=TRUE) %>%
#'   dySeries(axis="y", label=colnamev[1], strokeWidth=2, col="blue") %>%
#'   dySeries(axis="y2", label=colnamev[2], strokeWidth=2, col="red")
#' }
#' 
#' @export
sim_portfoptim <- function(rets, dimax, lambda, lambdacov, lambdaw) {
    .Call(`_HighFreq_sim_portfoptim`, rets, dimax, lambda, lambdacov, lambdaw)
}

#' Calculate the optimal portfolio weights using a variety of different
#' objective functions.
#' 
#' @param \code{returns} A \emph{time series} or a \emph{matrix} of returns
#'   data (the returns in excess of the risk-free rate).
#'   
#' @param \code{controlv} A \emph{list} of portfolio optimization model
#'   parameters (see Details).
#'
#'   
#' @return A column \emph{vector} of the same length as the number of columns
#'   of \code{returns}.
#'
#' @details
#'   The function \code{calc_weights()} calculates the optimal portfolio
#'   weights using a variety of different objective functions.
#' 
#'   The function \code{calc_weights()} accepts a list of portfolio
#'   optimization parameters through the argument \code{controlv}.
#'   The list of portfolio optimization parameters can be created using
#'   the function \code{param_portf()}.  Below is a description of the
#'   parameters.
#'
#'   If \code{method = "maxsharpe"} (the default) then \code{calc_weights()}
#'   calculates the weights of the maximum Sharpe portfolio, by multiplying the
#'   \emph{reduced inverse} of the \emph{covariance matrix}
#'   \eqn{\strong{C}^{-1}} times the mean column returns \eqn{\bar{r}}:
#'   \deqn{
#'     \strong{w} = \strong{C}^{-1} \bar{r}
#'   }
#'   
#'   If \code{method = "maxsharpemed"} then \code{calc_weights()} uses the
#'   medians instead of the means.
#'   
#'   If \code{method = "minvarlin"} then it calculates the weights of the
#'   minimum variance portfolio under linear constraint, by multiplying the
#'   \emph{reduced inverse} of the \emph{covariance matrix} times the unit
#'   vector:
#'   \deqn{
#'     \strong{w} = \strong{C}^{-1} \strong{1}
#'   }
#'   
#'   If \code{method = "minvarquad"} then it calculates the weights of the
#'   minimum variance portfolio under quadratic constraint (which is the
#'   highest order principal component).
#'
#'   If \code{method = "sharpem"} then it calculates the momentum weights equal
#'   to the Sharpe ratios (the \code{returns} divided by their standard
#'   deviations):
#'   \deqn{
#'     \strong{w} = \frac{\bar{r}}{\sigma}
#'   }
#'
#'   If \code{method = "kellym"} then it calculates the momentum weights equal
#'   to the Kelly ratios (the \code{returns} divided by their variance):
#'   \deqn{
#'     \strong{w} = \frac{\bar{r}}{\sigma^2}
#'   }
#'
#'   \code{calc_weights()} calls the function \code{calc_inv()} to calculate
#'   the \emph{reduced inverse} of the \emph{covariance matrix} of
#'   \code{returns}. It performs regularization by selecting only the largest
#'   eigenvalues equal in number to \code{dimax}.
#'   
#'   In addition, \code{calc_weights()} applies shrinkage to the columns of
#'   \code{returns}, by shrinking their means to their common mean value:
#'   \deqn{
#'     r^{\prime}_i = (1 - \alpha) \, \bar{r}_i + \alpha \, \mu
#'   }
#'   Where \eqn{\bar{r}_i} is the mean of column \eqn{i} and \eqn{\mu} is the
#'   average of all the column means.
#'   The shrinkage intensity \code{alpha} determines the amount of shrinkage
#'   that is applied, with \code{alpha = 0} representing no shrinkage (with the
#'   column means \eqn{\bar{r}_i} unchanged), and \code{alpha = 1} representing
#'   complete shrinkage (with the column means all equal to the single mean of
#'   all the columns: \eqn{\bar{r}_i = \mu}).
#'
#'   After the weights are calculated, they are scaled, depending on several
#'   arguments.
#'
#'   If \code{rankw = TRUE} then the weights are converted into their ranks.
#'   The default is \code{rankw = FALSE}.
#'
#'   If \code{centerw = TRUE} then the weights are centered so that their sum
#'   is equal to \code{0}.  The default is \code{centerw = FALSE}.
#'
#'   If \code{scalew = "voltarget"} (the default) then the weights are
#'   scaled (multiplied by a factor) so that the weighted portfolio has an
#'   in-sample volatility equal to \code{voltarget}.
#'   
#'   If \code{scalew = "voleqw"} then the weights are scaled so that the
#'   weighted portfolio has the same volatility as the equal weight portfolio.
#'   
#'   If \code{scalew = "sumone"} then the weights are scaled so that their
#'   sum is equal to \code{1}.
#'   If \code{scalew = "sumsq"} then the weights are scaled so that their
#'   sum of squares is equal to \code{1}.
#'   If \code{scalew = "none"} then the weights are not scaled.
#' 
#'   The function \code{calc_weights()} is written in \code{C++}
#'   \code{RcppArmadillo} code.
#'   
#' @examples
#' \dontrun{
#' # Calculate covariance matrix and eigen decomposition of ETF returns
#' retp <- na.omit(rutils::etfenv$returns[, 1:16])
#' ncols <- NCOL(retp)
#' eigend <- eigen(cov(retp))
#' # Calculate reduced inverse of covariance matrix
#' dimax <- 3
#' eigenvec <- eigend$vectors[, 1:dimax]
#' eigenval <- eigend$values[1:dimax]
#' invmat <- eigenvec %*% (t(eigenvec) / eigenval)
#' # Define shrinkage intensity and apply shrinkage to the mean returns
#' alpha <- 0.5
#' colmeans <- colMeans(retp)
#' colmeans <- ((1-alpha)*colmeans + alpha*mean(colmeans))
#' # Calculate weights using R
#' weightr <- drop(invmat %*% colmeans)
#' # Apply weights scaling
#' weightr <- weightr*sd(rowMeans(retp))/sd(retp %*% weightr)
#' weightr <- 0.01*weightr/sd(retp %*% weightr)
#' weightr <- weightr/sqrt(sum(weightr^2))
#' # Create a list of portfolio optimization parameters
#' controlv <- HighFreq::param_portf(method="maxsharpe", dimax=dimax, alpha=alpha, scalew="sumsq")
#' # Calculate weights using RcppArmadillo
#' weightcpp <- drop(HighFreq::calc_weights(retp, controlv=controlv))
#' all.equal(weightcpp, weightr)
#' }
#' 
#' @export
calc_weights <- function(returns, controlv) {
    .Call(`_HighFreq_calc_weights`, returns, controlv)
}

#' Simulate (backtest) a rolling portfolio optimization strategy, using
#' \code{RcppArmadillo}.
#' 
#' @param \code{retp} A \emph{time series} or a \emph{matrix} of asset
#'   returns data.
#'   
#' @param \code{retx} A \emph{time series} or a \emph{matrix} of excess
#'   returns data (the returns in excess of the risk-free rate).
#'   
#' @param \code{controlv} A \emph{list} of portfolio optimization model
#'   parameters (see Details).
#'
#' @param \code{startp} An \emph{integer vector} of start points.
#' 
#' @param \code{endd} An \emph{integer vector} of end points.
#' 
#' @param \code{lambda} A decay factor which multiplies the past portfolio
#'   weights.  (The default is \code{lambda = 0} - no memory.)
#'   
#' @param \code{coeff} A \emph{numeric} multiplier of the weights.  (The
#'   default is \code{1})
#'   
#' @param \code{bidask} A \emph{numeric} bid-ask spread (the default is
#'   \code{0})
#'   
#'   
#' @return A column \emph{vector} of strategy returns, with the same length as
#'   the number of rows of \code{retp}.
#'
#' @details
#'   The function \code{back_test()} performs a backtest simulation of a
#'   rolling portfolio optimization strategy over a \emph{vector} of the end
#'   points \code{endd}.
#'   
#'   It performs a loop over the end points \code{endd}, and subsets the
#'   \emph{matrix} of the excess asset returns \code{retx} along its rows,
#'   between the corresponding \emph{start point} and the \emph{end point}. 
#'   
#'   The function \code{back_test()} passes the subset matrix of excess returns
#'   into the function \code{calc_weights()}, which calculates the optimal
#'   portfolio weights at each \emph{end point}.
#'   It also passes to \code{calc_weights()} the argument \code{controlv},
#'   which is the list of portfolio optimization parameters.
#'   See the function \code{calc_weights()} for more details.
#'   The list of portfolio optimization parameters can be created using the
#'   function \code{param_portf()}.
#'   
#'   The function \code{back_test()} then recursively averages the weights
#'   \eqn{w_i} at the \emph{end point = i} with the weights \eqn{w_{i-1}} from
#'   the previous \emph{end point = (i-1)}, using the decay factor \code{lambda
#'   = \eqn{\lambda}}:
#'   \deqn{
#'     w_i = (1-\lambda) w_i + \lambda w_{i-1}
#'   }
#'   The purpose of averaging the weights is to reduce their variance, and
#'   improve their out-of-sample performance.  It is equivalent to extending
#'   the portfolio holding period beyond the time interval between neighboring
#'   \emph{end points}.
#'   
#'   The function \code{back_test()} then calculates the out-of-sample strategy
#'   returns by multiplying the average weights times the future asset returns.
#'   
#'   The function \code{back_test()} multiplies the out-of-sample strategy
#'   returns by the coefficient \code{coeff} (with default equal to \code{1}),
#'   which allows simulating either a trending strategy (if \code{coeff = 1}),
#'   or a reverting strategy (if \code{coeff = -1}).
#'   
#'   The function \code{back_test()} calculates the transaction costs by
#'   multiplying the bid-ask spread \code{bidask} times the absolute
#'   difference between the current weights minus the weights from the previous
#'   period. Then it subtracts the transaction costs from the out-of-sample
#'   strategy returns.
#'   
#'   The function \code{back_test()} returns a \emph{time series} (column
#'   \emph{vector}) of strategy returns, of the same length as the number of
#'   rows of \code{retp}.
#'
#' @examples
#' \dontrun{
#' # Calculate the ETF daily excess returns
#' retp <- na.omit(rutils::etfenv$returns[, 1:16])
#' # riskf is the daily risk-free rate
#' riskf <- 0.03/260
#' retx <- retp - riskf
#' # Define monthly end points without initial warmup period
#' endd <- rutils::calc_endpoints(retp, interval="months")
#' endd <- endd[endd > 0]
#' nrows <- NROW(endd)
#' # Define 12-month look-back interval and start points over sliding window
#' lookb <- 12
#' startp <- c(rep_len(1, lookb-1), endd[1:(nrows-lookb+1)])
#' # Define return shrinkage and dimension reduction
#' alpha <- 0.5
#' dimax <- 3
#' # Create a list of portfolio optimization parameters
#' controlv <- HighFreq::param_portf(method="maxsharpe", dimax=dimax, alpha=alpha, scalew="sumsq")
#' # Simulate a monthly rolling portfolio optimization strategy
#' pnls <- HighFreq::back_test(retx, retp, controlv=controlv, startp=(startp-1), endd=(endd-1))
#' pnls <- xts::xts(pnls, index(retp))
#' colnames(pnls) <- "strategy"
#' # Plot dygraph of strategy
#' dygraphs::dygraph(cumsum(pnls), 
#'   main="Cumulative Returns of Max Sharpe Portfolio Strategy")
#' }
#' 
#' @export
back_test <- function(retx, retp, controlv, startp, endd, lambda = 0.0, coeff = 1.0, bidask = 0.0) {
    .Call(`_HighFreq_back_test`, retx, retp, controlv, startp, endd, lambda, coeff, bidask)
}
algoquant/HighFreq documentation built on Feb. 9, 2024, 8:15 p.m.