Nothing
#' @importFrom stats lm formula
#' @importFrom xts xts
#' @importFrom zoo index
#' @keywords internal
estimhar <- function(y, x, externalRegressor){ #Potentially add stuff here
colnames(y) <- "y"
if(is.xts(y)){
output <- lm(formula(y ~ .), data = xts(cbind(y, x, externalRegressor), order.by = index(y))) ##Changed this to y~. because I got an error that the data was a list, even though class(data) was data.frame (only happened when using leverage)
} else {
## Shouldn't actually ever get reached
output <- lm(formula(y ~ .), data = cbind(y, x, externalRegressor))
}
return(output)
}
# Help function to get nicely formatted formula's for print/summary methods.
#' @keywords internal
getHarmodelformula <- function(x) {
modelnames = colnames(x$model)[-1] ##Changed because the formula is now y~. which makes model a single matrix-like object
if(grepl("-X", x$type)){ ## We have external regressors
externals <- grepl("external", modelnames)
modelnames[externals] <- paste0("X", 1:sum(externals))
}
if (!is.null(x$transform)) {
modelnames <- paste(x$transform,"(",modelnames,")",sep="")
} #Added visual tingie for plotting transformed RV
betas <- paste("beta", (1:length(modelnames)),"",sep="")
betas2 <- paste(" + ", betas,"*")
rightside <- paste(betas2, modelnames,collapse="")
h <- x$h
left <- paste("RV", h, sep = "")
if (!is.null(x$transform)) {
left <- paste(x$transform,"(",left,")",sep="" )
}
modeldescription <- paste(left, "= beta0", rightside)
return(list(modeldescription,betas))
}
#Insanity filter of BPQ, not exported
#' @keywords internal
harInsanityFilter <- function(fittedValues, lower, upper, replacement) {
fittedValues[(fittedValues < lower | fittedValues > upper)] <- replacement
return(fittedValues)
}
#' Heterogeneous autoregressive (HAR) model for realized volatility model estimation
#'
#' @description Function returns the estimates for the heterogeneous autoregressive model (HAR)
#' for realized volatility discussed in Andersen et al. (2007) and Corsi (2009).
#' This model is mainly used to forecast the next day's volatility based on the high-frequency returns of the past.
#'
#' @param data an \code{xts} object containing either: intraday (log-)returns or realized measures already computed from such returns.
#' In case more than one realized measure is needed, the object should have the as many columns as realized measures needed.
#' The first column should always be the realized variance proxy.
#' In case type is either \code{"HARQJ"} or \code{"CHARQ"} the order should be
#' \code{"RV"}, \code{"BPV"}, \code{"RQ"}, or the relevant proxies.
#' @param periods a vector of integers indicating over how days the realized measures in the model should be aggregated.
#' By default \code{periods = c(1,5,22)}, which corresponds to one day, one week and one month respectively. This default is in line with Andersen et al. (2007).
#' @param periodsJ a vector of integers indicating over what time periods the jump components in the model should be aggregated.
#' By default \code{periodsJ = c(1,5,22)}, which corresponds to one day, one week and one month respectively.
#' @param periodsQ a vector of integers indicating over what time periods the realized quarticity in the model should be aggregated.
#' By default \code{periodsQ = c(1,5,22)}, which corresponds to one day, one week and one month respectively.
#' @param leverage a vector of integers indicating over what periods the negative returns should be aggregated.
#' See Corsi and Reno (2012) for more information. By default \code{leverage = NULL} and the model assumes the absence of a leverage effect.
#' Set \code{leverage = c(1,5,22)} to mimic the analysis in Corsi and Reno (2012).
#' @param RVest a character vector with one, two, or three elements. The first element always refers to the name of the function to estimate the daily integrated variance (non-jump-robust).
#' The second and third element depends on which type of model is estimated:
#' If \code{type = "HARJ"}, \code{type = "HARCJ"}, \code{type = "HARQJ"} the second element refers to the name of the function to estimate the continuous component of daily volatility (jump robust).
#' If type = "HARQ", the second element refers to the name of the function used to estimate the integrated quarticity.
#' If \code{type = "HARQJ"} the third element always refers to the name of the function used to estimate integrated quarticity.
#' By default \code{RVest = c("rCov","rBPCov","rQuar")}, i.e. using the realized volatility, realized bi-power variance, and realized quarticity.
#' @param type a string referring to the type of HAR model you would like to estimate. By default \code{type = "HAR"}, the most basic model.
#' Other valid options for type are \code{"HARJ"}, \code{"HARCJ"}, \code{"HARQ"}, \code{"HARQJ"}, \code{"CHAR"}, or \code{"CHARQ"}.
#' @param inputType a string denoting if the input data consists of realized measures or high-frequency returns.
#' Default "RM" is the only way to denote realized measures and everything else denotes returns.
#' @param jumpTest the function name of a function used to test whether the test statistic which determines whether the jump variability is significant that day. By default \code{jumpTest = "ABDJumptest"}, hence using the test statistic in Equation or Equation (18) of Andersen et al. (2007).
#' It is also possible to provide pre-computed test statistics for jump tests by setting \code{jumpTest} to \code{"testStat"}. These test statistics should still be passed as the third column.
#' @param alpha a real indicating the confidence level used in testing for jumps. By default \code{alpha = 0.05}.
#' @param h an integer indicating the number over how many days the dependent variable should be aggregated.
#' By default, \code{h = 1}, i.e. no aggregation takes place, you just model the daily realized volatility.
#' @param transform optionally a string referring to a function that transforms both the dependent and explanatory variables in the model. By default \code{transform = NULL}, so no transformation is done. Typical other choices in this context would be "log" or "sqrt".
#' @param externalRegressor an \code{xts} object of same number of rows as \code{data}, and one column. This is used as an external regressor. Default is \code{NULL}.
#' @param periodsExternal a vector of integers indicating over how days \code{externalRegressor} should be aggregated.
#' @param ... extra arguments for jump test.
#'
#' @return The function outputs an object of class \code{HARmodel} and \code{\link{lm}} (so \code{HARmodel} is a subclass of \code{\link{lm}}). Objects
#' of class \code{HARmodel} has the following methods \code{\link{plot.HARmodel}}, \code{\link{predict.HARmodel}}, \code{\link{print.HARmodel}}, and \code{\link{summary.HARmodel}}.
#'
#' @details The basic specification in Corsi (2009) is as follows.
#' Let \eqn{RV_{t}} be the realized variances at day \eqn{t} and \eqn{RV_{t-k:t}} the average
#' realized variance in between \eqn{t-k} and \eqn{t}, \eqn{k \geq 0}.
#'
#' The dynamics of the model are given by
#' \deqn{
#' RV_{t+1} = \beta_0 + \beta_1 \ RV_{t} + \beta_2 \ RV_{t-4:t} + \beta_3 \ RV_{t-21:t} + \varepsilon_{t+1}
#' }
#' which is estimated by ordinary least squares under the assumption that at time \eqn{t},
#' the conditional mean of \eqn{\varepsilon_{t+1}} is equal to zero.
#'
#' For other specifications, please refer to the cited papers.
#'
#' The standard errors reporting in the \code{print} and \code{summary} methods are Newey-West standard errors calculated with the \pkg{sandwich} package.
#'
#' @references
#' Andersen, T. G., Bollerslev, T., and Diebold, F. (2007). Roughing it up: Including jump components in the measurement, modelling and forecasting of return volatility. \emph{The Review of Economics and Statistics}, 89, 701-720.
#'
#' Corsi, F. (2009). A simple approximate long memory model of realized volatility. \emph{Journal of Financial Econometrics}, 7, 174-196.
#'
#' Corsi, F. and Reno R. (2012). Discrete-time volatility forecasting with persistent leverage effect and the link with continuous-time volatility modeling. \emph{Journal of Business & Economic Statistics}, 30, 368-380.
#'
#' Bollerslev, T., Patton, A., and Quaedvlieg, R. (2016). Exploiting the errors: A simple approach for improved volatility forecasting, \emph{Journal of Econometrics}, 192, 1-18.
#'
#' @keywords forecasting
#'
#' @examplesIf !grepl("debian", sessionInfo()["platform"], fixed = TRUE)
#' \dontshow{data.table::setDTthreads(2)}
#' # Example 1: HAR
#' # Forecasting daily Realized volatility for the S&P 500 using the basic HARmodel: HAR
#' library(xts)
#' RVSPY <- as.xts(SPYRM$RV5, order.by = SPYRM$DT)
#'
#' x <- HARmodel(data = RVSPY , periods = c(1,5,22), RVest = c("rCov"),
#' type = "HAR", h = 1, transform = NULL, inputType = "RM")
#' class(x)
#' x
#' summary(x)
#' plot(x)
#' predict(x)
#'
#'
#' # Example 2: HARQ
#' # Get the highfrequency returns
#' dat <- as.xts(sampleOneMinuteData[, makeReturns(STOCK), by = list(DATE = as.Date(DT))])
#' x <- HARmodel(dat, periods = c(1,5,10), periodsJ = c(1,5,10),
#' periodsQ = c(1), RVest = c("rCov", "rQuar"),
#' type="HARQ", inputType = "returns")
#' # Estimate the HAR model of type HARQ
#' class(x)
#' x
#' # plot(x)
#' # predict(x)
#'
#' # Example 3: HARQJ with already computed realized measures
#' dat <- SPYRM[, list(DT, RV5, BPV5, RQ5)]
#' x <- HARmodel(as.xts(dat), periods = c(1,5,22), periodsJ = c(1),
#' periodsQ = c(1), type = "HARQJ")
#' # Estimate the HAR model of type HARQJ
#' class(x)
#' x
#' # plot(x)
#' predict(x)
#'
#' # Example 4: CHAR with already computed realized measures
#' dat <- SPYRM[, list(DT, RV5, BPV5)]
#'
#' x <- HARmodel(as.xts(dat), periods = c(1, 5, 22), type = "CHAR")
#' # Estimate the HAR model of type CHAR
#' class(x)
#' x
#' # plot(x)
#' predict(x)
#'
#' # Example 5: CHARQ with already computed realized measures
#' dat <- SPYRM[, list(DT, RV5, BPV5, RQ5)]
#'
#' x <- HARmodel(as.xts(dat), periods = c(1,5,22), periodsQ = c(1), type = "CHARQ")
#' # Estimate the HAR model of type CHARQ
#' class(x)
#' x
#' # plot(x)
#' predict(x)
#'
#' # Example 6: HARCJ with pre-computed test-statistics
#' ## BNSJumptest manually calculated.
#' testStats <- sqrt(390) * (SPYRM$RV1 - SPYRM$BPV1)/sqrt((pi^2/4+pi-3 - 2) * SPYRM$medRQ1)
#' model <- HARmodel(cbind(as.xts(SPYRM[, list(DT, RV5, BPV5)]), testStats), type = "HARCJ")
#'
#'
#' @author Jonathan Cornelissen, Kris Boudt, Onno Kleen, and Emil Sjoerup.
#' @export
HARmodel <- function(data, periods = c(1, 5, 22), periodsJ = c(1, 5, 22), periodsQ = c(1),
leverage = NULL, RVest = c("rCov","rBPCov", "rQuar"), type = "HAR", inputType = "RM",
jumpTest = "ABDJumptest", alpha = 0.05, h = 1, transform = NULL, externalRegressor = NULL, periodsExternal = c(1), ...){
nperiods <- length(periods) # Number of periods to aggregate over
nest <- length(RVest) # Number of RV estimators
nperiodsQ <- length(periodsQ) #Number of periods to aggregate realized quarticity over
jumpModels <- c("HARJ", "HARCJ", "HARQJ", "CHAR", "CHARQ")
quarticityModels <- c("HARQ", "HARQJ", "CHARQ")
bpvModels <- c("CHAR", "CHARQ")
if(!is.xts(data)){
stop("The data in the HARmodel function must be of xts format.")
}
if (!is.null(transform)) {
Ftransform = match.fun(transform)
}
if (!(type %in% c("HAR", jumpModels, quarticityModels))) {
warning("Please provide a valid argument for type, see documentation.")
}
if(!is.null(externalRegressor)){
if(!is.xts(externalRegressor) && !(nrow(externalRegressor) != nrow(data) | ncol(externalRegressor) != 1)){
stop("When using the externalRegressor argument, the input must be of type xts and have same number of rows as the data input. externalRegressor must have one column")
}
}
if (inputType != "RM") { #If it is returns as input
# Get the daily RMs
RV1 <- match.fun(RVest[1])
RM1 <- apply.daily(data, RV1 )
# save dates:
alldates = index(RM1)
if (type %in% jumpModels ) {
RV2 <- match.fun( RVest[2])
RM2 <- apply.daily(data, RV2)
}
if( type %in% quarticityModels ) {
if(type %in% c("HARQJ","CHARQ")){##HARQJ and CHARQ are the only models that need both BPV and realized quarticity.
RV2 <- match.fun(RVest[2])
RM2 <- apply.daily(data, RV2)
RV3 <- match.fun(RVest[3])
RM3 <- apply.daily(data, RV3)
}
if(type == "HARQ"){
RV3 <- match.fun( RVest[2])
RM3 <- apply.daily( data, RV3 )
periodsJ <- periods
}
}
}
if (inputType == "RM") { #The input is already realized measures
dimdata <- dim(data)[2]
alldates <- index(data)
RM1 <- data[,1]
if (type %in% jumpModels) {
RM2 <- data[,2]
}
if (type == "HARQ") {
RM3 <- data[,2]
}
if (type %in% c("HARQJ", "CHARQ", "HARCJ")) {
RM2 <- data[, 2]
RM3 <- data[, 3]
}
}
# Get the matrix for estimation of linear model:
# This looks weird, but we need to make sure that we don't consider periodsJ in maxp when type == "HAR"
# and in other similar cases.
maxp <- max(
c(periods,periodsJ[type %in% jumpModels],periodsQ[type %in% quarticityModels])
) # Max number of aggregation levels
if (!is.null(leverage)) {
maxp <- max(maxp,leverage)
}
n <- length(RM1) #Number of Days
# Aggregate RV:
#RVmatrix1 <- aggRV(RM1,periods)
RVmatrix1 <- har_agg(RM1, periods, nperiods)
colnames(RVmatrix1) <- paste0("RV", periods)
# Aggregate and subselect y:
y <- xts(har_agg(RM1, c(h), 1L)[(maxp+h):(n)], order.by = index(RM1)[(maxp+h):(n)])
colnames(y) <- "y"
# Only keep useful parts:
x1 <- RVmatrix1[(maxp:(n-h)), ]
if (type %in% jumpModels) {
RVmatrix2 <- har_agg(RM2,periods, nperiods)
colnames(RVmatrix2) <- paste0("J", periods)
x2 <- RVmatrix2[(maxp:(n-h)),]
} # In case a jumprobust estimator is supplied
if (type %in% quarticityModels) { #in case realized quarticity estimator is supplied
RQmatrix <- as.matrix(har_agg(RM3,periodsQ, nperiodsQ)[(maxp:(n-h)),])
colnames(RQmatrix) <- paste0("RQ", periodsQ)
if (nperiodsQ == 1){
RQmatrix <- as.matrix(sqrt(RQmatrix) - sqrt(mean(RM3)))
} else {
RQmatrix <- sqrt(RQmatrix) - sqrt(mean(RM3)) #Demeaned realized quarticity estimator as in BPQ(2016)
}
}
# Jumps:
if (type %in% jumpModels && !(type %in% bpvModels)) { # If model type is as such that you need jump component, don't spend time on computing jumps for CHAR.. models
if (any(RM2 == 0) && inputType == "RM") { #The jump contributions were provided
J <- RM2
} else { #compute jump contributions
J <- pmax(RM1 - RM2, 0) # Jump contributions should be positive
}
J <- as.matrix(har_agg(J, periodsJ, length(periodsJ)))
colnames(J) <- paste0("J", periodsJ)
}
if (!is.null(leverage)) {
if (sum(data < 0) == 0) {
stop("You cannot use leverage variables in the model in case your input consists of Realized Measures")
}
# Get close-to-close returns
e <- apply.daily(data,sum) #Sum logreturns daily
# Get the rmins:
rmintemp <- pmin(e,0)
# Aggregate everything:
rmin <- har_agg(rmintemp, leverage, length(leverage))[(maxp:(n-h)),]
colnames(rmin) <- paste0("Rmin", leverage)
# Select:
#rmin <- rmin[(maxp:(n-h)),]
} else {
rmin <- matrix(ncol=0,nrow=dim(x1)[1])
}
if(!is.null(externalRegressor)){
## Aggregate external regressor as mandated by periodsExternal
externalRegressor <- har_agg(externalRegressor, periodsExternal, length(periodsExternal))[(maxp:(n-h)), ,drop = FALSE]
}
# Estimate the model parameters, according to type of model :
# First model type: traditional HAR-RV:
if (type == "HAR") {
if (!is.null(transform)) {
y <- Ftransform(y)
x1 <- Ftransform(x1)
}
x1 <- cbind(x1,rmin)
model <- estimhar(y = y, x = x1, externalRegressor = externalRegressor)
if(!is.null(externalRegressor)){
type <- paste0(type, "-X")
}
model$RVest <- RVest[1]
} #End HAR-RV if cond
if (type == "HARJ") {
if (!is.null(transform) && transform == "log") {
J <- J + 1 # will be transformed below in x <- Ftransform(x)
}
J <- J[(maxp:(n-h)),,drop = FALSE]
x <- cbind(x1,J) # bind jumps to RV data
if (!is.null(transform)) {
y <- Ftransform(y)
x <- Ftransform(x)
}
x <- cbind(x, rmin)
model <- estimhar(y = y, x = x, externalRegressor = externalRegressor)
if(!is.null(externalRegressor)){
type <- paste0(type, "-X")
}
model$RVest <- RVest
}#End HAR-RV-J if cond
if (type == "HARCJ") {
# Are the jumps significant? if not set to zero:
if (jumpTest == "ABDJumptest") {
if(inputType != "RM"){
TQ <- apply.daily(data, rTPQuar)
} else {
TQ <- RM3
}
J <- J[,1]
teststats <- ABDJumptest(RV=RM1,BPV=RM2,TQ=TQ )
} else if(jumpTest == "testStats") {
teststats <- RM3
} else {
jtest <- match.fun(jumpTest)
teststats <- jtest(data,...)
}
Jindicators <- teststats > qnorm(1-alpha)
J[!Jindicators] <- 0
# Get continuous components if necessary RV measures if necessary:
Cmatrix <- matrix(nrow = dim(RVmatrix1)[1], ncol = 1)
Cmatrix[Jindicators,] <- RVmatrix2[Jindicators, 1] #Fill with robust one in case of jump
Cmatrix[(!Jindicators)] <- RVmatrix1[(!Jindicators), 1] #Fill with non-robust one in case of no-jump
# Aggregate again:
Cmatrix <- har_agg(Cmatrix, periods, nperiods)
colnames(Cmatrix) <- paste0("C", periods)
Jmatrix <- har_agg(J, periodsJ, length(periodsJ))
colnames(Jmatrix) <- paste0("J", periodsJ)
# subset again:
Cmatrix <- Cmatrix[(maxp:(n-h)), ]
Jmatrix <- Jmatrix[(maxp:(n-h)), ]
if (!is.null(transform) && transform=="log") {
Jmatrix <- Jmatrix + 1 # Will be transformed later
}
x <- cbind(Cmatrix, Jmatrix) # bind jumps to RV data
if (!is.null(transform)) {
y <- Ftransform(y)
x <- Ftransform(x)
}
x <- cbind(x,rmin)
model <- estimhar(y = y, x = x, externalRegressor = externalRegressor)
model$RVest <- RVest
model$jumpTest <- jumpTest
model$alpha_jumps <- alpha
if(!is.null(externalRegressor)){
type <- paste0(type, "-X")
}
}
if (type == "HARQ") {
if (!is.null(transform)) {
y <- Ftransform(y)
x1 <- Ftransform(x1)
warning("The realized quarticity is already transformed with sqrt() thus only realized variance is transformed")
}
x1 <- cbind(x1, RQmatrix[,1:nperiodsQ] * x1[,1:nperiodsQ])
if (is.null(colnames(RQmatrix))) { #special case for 1 aggregation period of realized quarticity. This appends the RQ1 name
colnames(x1) <- c(colnames(x1[,1:nperiods]),"RQ1")
}
if(all(colnames(x1) == c(paste0("RV", periods), rep("", nperiodsQ)))){
colnames(x1) <- c(colnames(x1[,1:nperiods]), paste0("RQ", periodsQ))
}
x1 <- cbind(x1,rmin)
model <- estimhar(y = y, x = x1, externalRegressor = externalRegressor)
model$RVest <- RVest[1]
if(!is.null(externalRegressor)){
type <- paste0(type, "-X")
}
}
if (type == "HARQJ") {
if (!is.null(transform) && transform == "log") {
J <- log(J + 1)
}
J <- J[(maxp:(n-h)),, drop = FALSE]
if (!is.null(transform)) {
y <- Ftransform(y); x1 = Ftransform(x1)
warning("The realized quarticity is already transformed with sqrt() thus only realized variance is transformed")
}
x1 <- cbind(x1, J, RQmatrix[,1:nperiodsQ] * x1[,1:nperiodsQ])
if(is.null(colnames(RQmatrix))){ #special case for 1 aggregation period of realized quarticity. This appends the RQ1 name
colnames(x1) <- c(colnames(x1[,1:(dim(x1)[2]-1)]), "RQ1")
}
if(all(colnames(x1) == c(paste0("RV", periods), paste0("J", periodsJ), rep("", nperiodsQ)))){
colnames(x1) <- c(colnames(x1[,1:(nperiods+length(periodsJ))]), paste0("RQ", periodsQ))
}
x1 <- cbind(x1,rmin);
model <- estimhar(y = y, x = x1, externalRegressor = externalRegressor)
model$RVest <- RVest[1]
if(!is.null(externalRegressor)){
type <- paste0(type, "-X")
}
}
if (type == "CHAR") {
if (!is.null(transform)) {
y <- Ftransform(y)
x2 <- Ftransform(x2)
}
x2 <- cbind(x2,rmin);
model <- estimhar(y = y, x = x2, externalRegressor = externalRegressor)
model$transform <- transform
model$RVest <- RVest[1]
if(!is.null(externalRegressor)){
type <- paste0(type, "-X")
}
} #End CHAR-RV if cond
if (type == "CHARQ") {
if (!is.null(transform)) {
y <- Ftransform(y)
x2 <- Ftransform(x2)
warning("The realized quarticity is already transformed with sqrt() thus only realized variance and bipower variation is transformed")
}
x2 <- cbind(x2, RQmatrix[,1:nperiodsQ] * x2[,1:nperiodsQ])
if (is.null(colnames(RQmatrix))) { #special case for 1 aggregation period of realized quarticity. This appends the RQ1 name
colnames(x2) <- c(colnames(x2[,1:nperiods]), "RQ1")
}
x2 <- cbind(x2,rmin)
model <- estimhar(y = y, x = x2, externalRegressor = externalRegressor)
model$RVest <- RVest[1]
if(!is.null(externalRegressor)){
type <- paste0(type, "-X")
}
} #End CHAR-RVQ if cond
model$fitted.values <- harInsanityFilter(fittedValues = model$fitted.values, lower = min(y, 0), upper = max(y), replacement = mean(y))
model$residuals <- model$model[, "y"] - model$fitted.values
model$dates <- alldates[(maxp+h):n]
model$type <- type
model$transform <- transform
model$inputType <- inputType
model$h <- h
model$leverage <- leverage
model$maxp <- maxp
class(model) <- c("HARmodel", "lm")
return (model)
}
#' Plotting method for HARmodel objects
#' @param x an object of class \code{HARmodel}
#' @param ... extra arguments, see details
#' @details The plotting method has the following optional parameter:
#' \itemize{
#' \item{\code{legend.loc}}{ A string denoting the location of the legend passed on to \code{addLegend} of the \pkg{xts} package}
#' }
#'
#' @importFrom grDevices dev.interactive
#' @importFrom graphics plot panel.smooth points par
#' @importFrom stats residuals
#' @importFrom xts addLegend
#' @export
plot.HARmodel <- function(x, ...){
options <- list(...)
#### List of standard options
opt <- list(legend.loc = "topleft")
#### Override standard options where user passed new options
opt[names(options)] <- options
observed <- x$model$y
fitted <- x$fitted.values
dates <- x$dates
dates <- as.POSIXct(dates)
observed <- xts(observed, order.by=dates)
fitted <- xts(fitted, order.by=dates)
type <- x$type
legend.loc <- opt$legend.loc
g_range <- range(fitted,observed)
g_range[1] <- 0.95 * g_range[1]
g_range[2] <- 1.05 * g_range[2]
#ind = seq(1,length(fitted),length.out=5);
title <- paste("Observed and forecasted RV based on HAR Model:",type);
plot(cbind(fitted,observed), col = c('blue', 'red'), main = title, ylab = "Realized Volatility", lty = c(1,2), yaxis.right = FALSE)
#plot.zoo(observed,col="red",lwd=2,main=title, ylim=g_range,xlab="Time",ylab="Realized Volatility");
# axis(1,time(b)[ind], format(time(b)[ind],), las=2, cex.axis=0.8); not used anymore
# axis(2);
#lines(fitted,col="blue",lwd=2);
#legend("topleft", c("Observed RV","Forecasted RV"), cex=1.1, col=c("red","blue"),lty=1, lwd=2, bty="n");
addLegend(legend.loc = legend.loc, on = 1,
legend.names = c("Forecasted RV", "Observed RV"),
lty = c(1, 2), lwd = c(2, 2),
col = c("blue", "red"))
}
#' Predict method for objects of type \code{HARmodel}
#' @param object an object of class \code{HARmodel}
#' @param ... extra arguments. See details
#' @details
#' The print method has the following optional parameters:
#' \itemize{
#' \item{\code{newdata}}{ new data to use for forecasting}
#' \item{\code{warnings}}{ A logical denoting whether to display warnings, detault is \code{TRUE}}
#' \item{\code{backtransform}}{ A string. If the model is estimated with transformation this parameter can be set to transform the prediction back into variance
#' The possible values are \code{"simple"} which means inverse of transformation, i.e. \code{exp} when log-transformation is applied. If using log transformation,
#' the option \code{"parametric"} can also be used to transform back. The parametric method adds a correction that stems from using the log-transformation }
#' }
#' @importFrom stats var
#' @export
predict.HARmodel <- function(object, ... ){
options <- list(...)
#### List of standard options
opt <- list(newdata = NULL, warnings = TRUE, backtransform = "simple")
#### Override standard options where user passed new options
opt[names(options)] <- options
newdata <- opt$newdata
warnings <- opt$warnings
backtransform <- opt$backtransform
# If no new data is provided - just forecast on the last day of your estimation sample
# If new data with colnames as in object$model$x is provided, i.e. right measures for that model, just use that data
##### These 4 lines are added to make adding new models (hopefully) easier
jumpModels <- c("HARJ", "HARCJ", "HARQJ", "CHAR", "CHARQ")
quarticityModels <- c("HARQ", "HARQJ", "CHARQ")
bpvModels <- c("CHAR", "CHARQ")
#### Initialization
type <- object$type
inputType <- object$inputType
if (is.null(newdata)) {
if (is.null(object$transform)) {
return(as.numeric(as.numeric(c(1, xts::last(object$model[,-1]))) %*% object$coefficients))
}
if (object$transform == "log") {
if(backtransform == "parametric"){
return(as.numeric(exp(as.numeric(c(1, xts::last(object$model[,-1]))) %*% object$coefficients + 1/2 * var(object$residuals))))
} else if(backtransform == "simple"){
return(exp(as.numeric(as.numeric(c(1, xts::last(object$model[,-1]))) %*% object$coefficients)))
} else {
return(as.numeric(as.numeric(c(1, xts::last(object$model[,-1]))) %*% object$coefficients))
}
}
if (object$transform == "sqrt") {
if(backtransform == "simple"){
return(as.numeric(as.numeric(c(1, xts::last(object$model[,-1]))) %*% object$coefficients)^2)
} else if(backtransform == "parametric"){
stop("parametric backtransform is not available for the sqrt transformation")
} else {
return(as.numeric(as.numeric(c(1, xts::last(object$model[,-1]))) %*% object$coefficients))
}
}
}
# Check whether newdata is in right format
if (sum(colnames(newdata) == colnames(object$model[,-1])) == length(colnames(object$model[,-1]))) {
if (is.null(object$transform)) {
return(as.numeric(cbind(1, newdata) %*% object$coefficients))
}
if (object$transform == "log") {
warning("Due to log-transform, forecast of RV is derived under assumption of log-normality.")
return(as.numeric(exp(cbind(1, newdata) %*% object$coefficients + 1/2 * var(object$residuals))))
}
if (object$transform == "sqrt") {
warning("Forecast for sqrt(RV) due to transform == \"sqrt\".")
return(as.numeric(cbind(1, newdata) %*% object$coefficients))
}
} else {
# Aggregate price data as in HARmodel function
# Extract periods from coefficient names
if (type == "HAR") {
# RV component
periods <- as.numeric(substring(names(object$coefficients[-1])[grep("RV", names(object$coefficients[-1]))], first = 4))
periodsJ <- 0
periodsQ <- 0
nperiodsQ <- length(periodsQ)
}
if (type == "HARJ") {
# RV component
periods <- as.numeric(substring(names(object$coefficients[-1])[grep("RV", names(object$coefficients[-1]))], first = 4))
# Jump components
periodsJ <- as.numeric(substring(names(object$coefficients[-1])[grep("J", names(object$coefficients[-1]))], first = 3))
# RQ component
periodsQ <- 0
nperiodsJ <- length(periodsJ)
}
if (type == "HARCJ") {
# Continuous component
periods <- as.numeric(substring(names(object$coefficients[-1])[grep("C", names(object$coefficients[-1]))], first = 3))
# Jump component
periodsJ <- as.numeric(substring(names(object$coefficients[-1])[grep("J", names(object$coefficients[-1]))], first = 3))
# RQ component
periodsQ <- 0
nperiodsJ <- length(periodsJ)
}
if (type == "HARQ") {
# RV component
periods <- as.numeric(substring(names(object$coefficients[-1])[grep("RV", names(object$coefficients[-1]))], first = 4))
# Jump component
periodsJ <- 0
# RQ component
periodsQ <- as.numeric(substring(names(object$coefficients[-1])[grep("RQ", names(object$coefficients[-1]))], first = 4))
nperiodsQ <- length(periodsQ)
}
if (type == "HARQJ") {
# RV component
periods <- as.numeric(substring(names(object$coefficients[-1])[grep("RV", names(object$coefficients[-1]))], first = 4))
# Jump component
periodsJ <- as.numeric(substring(names(object$coefficients[-1])[grep("J", names(object$coefficients[-1]))], first = 3))
# RQ component
periodsQ <- as.numeric(substring(names(object$coefficients[-1])[grep("RQ", names(object$coefficients[-1]))], first = 4))
nperiodsQ <- length(periodsQ)
nperiodsJ <- length(periodsJ)
}
if (type == "CHAR") {
# Continuous component
periods <- as.numeric(substring(names(object$coefficients[-1])[grep("RV", names(object$coefficients[-1]))], first = 4))
# Jump component
periodsJ <- 0
# RQ component
periodsQ <- 0
nperiodsQ <- length(periodsQ)
}
if (type == "CHARQ") {
# Continuous component
periods <- as.numeric(substring(names(object$coefficients[-1])[grep("RV", names(object$coefficients[-1]))], first = 4))
# Jump component
periodsJ <- 0
# RQ component
periodsQ <- as.numeric(substring(names(object$coefficients[-1])[grep("RQ", names(object$coefficients[-1]))], first = 4))
nperiodsQ <- length(periodsQ)
}
RVest <- object$RVest
nperiods <- length(periods) # Number of periods to aggregate over
nest <- length(RVest) # Number of RV estimators
if (!is.null(object$transform)) {
Ftransform <- match.fun(transform)
}
if (inputType != "RM") { #If it are returns as input
# Get the daily RMs
RV1 <- match.fun(RVest[1])
RM1 <- apply.daily(newdata, RV1)
if (type %in% jumpModels) {
RV2 <- match.fun(RVest[2])
RM2 <- apply.daily(newdata, RV2)
}
if (type %in% quarticityModels ) {
if(type %in% c("HARQJ","CHARQ")){##HARQJ and CHARQ are the only models that need both BPV and realized quarticity.
RV2 <- match.fun( RVest[2])
RM2 <- apply.daily(newdata, RV2 )
RV3 <- match.fun( RVest[3])
RM3 <- apply.daily(newdata, RV3 )
}
if(type == "HARQ"){
RV3 <- match.fun(RVest[2])
RM3 <- apply.daily(newdata, RV3)
}
}
}
if (inputType == "RM") { #The input is already realized measures
stop(paste0(c("If your data is already aggregated, newdata column names should be", colnames(object$model$x),
"as harModel-type is", type, "."),
collapse = " "))
}
leverage <- object$leverage
maxp <- max(periods,periodsJ,periodsQ); # Max number of aggregation levels
if (!is.null(leverage)) {
maxp <- max(maxp,leverage)
}
n <- length(RM1) #Number of Days
# Aggregate RV:
h <- object$h
RVmatrix1 <- har_agg(RM1, periods, nperiods)
colnames(RVmatrix1) <- paste0("RV", periods)
# Aggregate and subselect y:
y <- as.data.frame(har_agg(RM1, c(h), 1L)[(maxp+h):(n), , drop = FALSE])
colnames(y) <- "y"
# Only keep useful parts:
x1 <- RVmatrix1[(maxp:(n-h)), ]
if (type %in% jumpModels ){
RVmatrix2 <- har_agg(RM2, periodsJ, nperiodsJ)
colnames(RVmatrix2) <- paste0("J", periods)
x2 <- RVmatrix2[(maxp:(n-h)), ]
} # In case a jumprobust estimator is supplied
if (type %in% quarticityModels) { #in case realized quarticity estimator is supplied
# RQmatrix <- aggRQ(RM3,periodsQ)[(maxp:(n - h)), ]
RQmatrix <- har_agg(RM3, periodsQ, nperiodsQ)
colnames(RVmatrix1) <- paste0("RV", periods)
if(nperiodsQ == 1){
RQmatrix <- as.matrix(sqrt(RQmatrix) - sqrt(mean(RM3)))
} else {
RQmatrix <- sqrt(RQmatrix) - sqrt(mean(RM3)) #Demeaned realized quarticity estimator as in BPQ(2016)
}
}
# Jumps:
if (type %in% jumpModels && !(type %in% bpvModels)) { # If model type is as such that you need jump component, don't spend time on computing jumps for CHAR.. models
if (any(RM2 == 0) && inputType == "RM") { #The jump contributions were provided
J <- RM2
} else { #compute jump contributions
J <- pmax(RM1 - RM2, 0) # Jump contributions should be positive
}
J <- as.data.frame(har_agg(J, periodsJ, length(periodsJ)))
colnames(J) = paste0("J", periodsJ)
}
if (!is.null(leverage)) {
if (inputType == "RM") {
warning("You cannot use leverage variables in the model in case your input consists of Realized Measures")
}
# Get close-to-close returns
e <- apply.daily(newdata, sum) #Sum logreturns daily
# Get the rmins:
rmintemp <- pmin(e, 0)
# Aggregate everything:
rmin <- as.data.frame(har_agg(rmintemp, leverage, length(leverage))[(maxp:(n-h)), ,drop = FALSE])
colnames(rmin) <- paste0("Rmin", leverage)
#rmin <- aggRV(rmintemp, periods = leverage, type = "Rmin")
# Select:
#rmin <- rmin[(maxp:n),]
} else {
rmin <- matrix(ncol=0,nrow=dim(x1)[1])
}
if (type == "HAR") {
if (!is.null(object$transform)) {
x1 <- Ftransform(x1)
}
x <- cbind(x1, rmin)
}
if (type == "HARJ") {
if (!is.null(object$transform) && transform=="log") {
J <- J + 1
}
J <- J[(maxp:(n)),]
x <- cbind(x1,J) # bind jumps to RV data
if (!is.null(transform)) {
x <- Ftransform(x)
}
x <- cbind(x,rmin)
}
if (type == "HARCJ") {
if (object$jumpTest=="ABDJumptest") {
TQ <- apply.daily(newdata, rTPQuar)
J <- J[, 1]
teststats <- ABDJumptest(RV = RM1, BPV = RM2,TQ = TQ)
} else {
jtest <- match.fun(object$jumpTest)
teststats <- jtest(newdata, ...)
}
Jindicators <- teststats > qnorm(1 - object$alpha_jumps)
J[!Jindicators] <- 0
# Get continuous components if necessary RV measures if necessary:
Cmatrix <- matrix(nrow = dim(RVmatrix1)[1], ncol = 1)
Cmatrix[Jindicators,] <- RVmatrix2[Jindicators, 1] #Fill with robust one in case of jump
Cmatrix[(!Jindicators)] <- RVmatrix1[(!Jindicators), 1] #Fill with non-robust one in case of no-jump
# Aggregate again:
Cmatrix <- har_agg(Cmatrix, periods, nperiods)
colnames(Cmatrix) <- paste0("C", periods)
Jmatrix <- har_agg(J, periodsJ, nperiods)
colnames(Jmatrix) <- paste0("J", periodsJ)
# subset again:
Cmatrix <- Cmatrix[(maxp:(n-h)), ]
Jmatrix <- Jmatrix[(maxp:(n-h)), ]
if (!is.null(object$transform) && object$transform == "log") {
Jmatrix <- Jmatrix + 1
}
x <- cbind(Cmatrix, Jmatrix) # bind jumps to RV data
if (!is.null(transform)) {
x <- Ftransform(x)
}
x <- cbind(x, rmin)
}
if (type == "HARQ") {
if (!is.null(transform)) {
y <- Ftransform(y)
x1 <- Ftransform(x1)
warning("The realized quarticity is already transformed with sqrt() thus only realized variance is transformed")
}
x1 <- cbind(x1, RQmatrix[,1:nperiodsQ] * x1[,1:nperiodsQ])
if (is.null(colnames(RQmatrix))) { #special case for 1 aggregation period of realized quarticity. This appends the RQ1 name
colnames(x1) <- c(colnames(x1[, 1:nperiods]),"RQ1")
}
x <- cbind(x1,rmin)
}
if (type == "HARQJ") {
if (!is.null(transform) && transform=="log") {
J <- J + 1
}
J <- J[(maxp:(n-h)), ]
if (!is.null(transform)) {
y <- Ftransform(y)
x1 <- Ftransform(x1)
warning("The realized quarticity is already transformed with sqrt() thus only realized variance is transformed")
}
x1 <- cbind(x1, J, RQmatrix[,1:nperiodsQ] * x1[,1:nperiodsQ])
if(is.null(colnames(RQmatrix))){ #special case for 1 aggregation period of realized quarticity. This appends the RQ1 name
colnames(x1) <- c(colnames(x1[,1:(dim(x1)[2]-1)]), "RQ1")
}
x <- cbind(x1,rmin)
}
if (type == "CHAR") {
if (!is.null(transform)) {
y <- Ftransform(y)
x2 <- Ftransform(x2)
}
x <- cbind(x2,rmin)
} #End CHAR-RV if cond
if (type == "CHARQ") {
if (!is.null(transform)) {
y <- Ftransform(y)
x2 <- Ftransform(x2)
warning("The realized quarticity is already transformed with sqrt() thus only realized variance and bipower variation is transformed")
}
x2 = cbind(x2, RQmatrix[,1:nperiodsQ] * x2[,1:nperiodsQ])
if(is.null(colnames(RQmatrix))){ #special case for 1 aggregation period of realized quarticity. This appends the RQ1 name
colnames(x2) = c(colnames(x2[,1:nperiods]), "RQ1")
}
x = cbind(x2,rmin)
} #End CHAR-RVQ if cond
if (is.null(object$transform)) {
return(as.numeric(as.matrix(cbind(1, x)) %*% object$coefficients))
}
if (object$transform == "log") {
if(backtransform == "parametric"){
return(as.numeric(exp(as.matrix(cbind(1, x)) %*% object$coefficients + 1/2 * var(object$residuals))))
} else if(backtransform == "simple"){
return(exp(as.numeric(as.matrix(cbind(1, x)) %*% object$coefficients)))
} else {
return(as.numeric(as.matrix(cbind(1, x)) %*% object$coefficients))
}
}
if (object$transform == "sqrt") {
if(backtransform == "simple"){
return(as.numeric(as.matrix(cbind(1, x)) %*% object$coefficients)^2)
} else if(backtransform == "parametric"){
stop("parametric backtransform is not available for the sqrt transformation")
} else {
return(as.numeric(as.matrix(cbind(1, x)) %*% object$coefficients))
}
}
}
}
#' Printing method for \code{HARmodel} objects
#' @param x object of type \code{HARmodel}
#' @param ... extra options
#' @details The printing method has the extra option \code{digits} which can be used to set the number of digits for printing pass \code{lag} to determine the maximum order of the Newey West estimator. Default is \code{22}
#' @importFrom stats coef
#' @importFrom sandwich NeweyWest
#' @export
print.HARmodel <- function(x, ...){
options <- list(...)
opt <- list(digits = max(3, getOption("digits") - 3), lag = x$maxp)
opt[names(options)] <- options
digits <- opt$digits
formula <- getHarmodelformula(x); modeldescription = formula[[1]]; betas = formula[[2]];
cat("\nModel:\n", paste(modeldescription, sep = "\n", collapse = "\n"),
"\n\n", sep = "")
coefs <- coef(x);
x$NeweyWestSE <- sandwich::NeweyWest(x, lag = opt$lag)
NeweyWestSE <- x$NeweyWestSE
names(coefs) <- c("beta0",betas)
colnames(NeweyWestSE) <- rownames(NeweyWestSE) <- c("beta0",betas)
if (length(coef(x))){
cat("Coefficients:\n")
print.default(format(coefs, digits = digits), print.gap = 2,quote = FALSE);
cat("Newey-West Standard Errors:\n");
print.default(format(sqrt(diag(NeweyWestSE)), digits = digits), print.gap = 2,quote = FALSE);
cat("\n\n");
Rs <- summary(x)[c("r.squared", "adj.r.squared")]
zz <- c(Rs$r.squared,Rs$adj.r.squared);
names(zz) <- c("r.squared","adj.r.squared")
print.default((format(zz,digits=digits) ),print.gap = 2,quote=FALSE)
}
else cat("No coefficients\n")
cat("\n")
invisible(x)
}
#' Summary for \code{HARmodel} objects
#' @param object An object of class \code{HARmodel}
#' @param ... pass \code{lag} to determine the maximum order of the Newey West estimator. Default is \code{22}
#' @return A modified \code{summary.lm}
#' @importFrom stats summary.lm pt
#' @importFrom sandwich NeweyWest
#' @export
summary.HARmodel <- function(object, ...){
op <- list(lag = 22)
op[names(options)] <- list(...)
dd <- summary.lm(object)
dd$coefficients[,"Std. Error"] <- sqrt(diag(NeweyWest(object, lag = op$lag)))
dd$coefficients[,"t value"] <- dd$coefficients[,"Estimate"] / dd$coefficients[,"Std. Error"]
dd$coefficients[,"Pr(>|t|)"] <- 2 * pt(abs(dd$coefficients[, "t value"]), object$df.residual, lower.tail = FALSE)
formula <- getHarmodelformula(object)
modeldescription <- formula[[1]]
betas <- formula[[2]]
dd$call <- modeldescription
rownames(dd$coefficients) <- c("beta0", betas)
return(dd)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.