R/rgarch-tests.R

#################################################################################
##
##   R package rgarch by Alexios Ghalanos Copyright (C) 2008, 2009, 2010, 2011
##   This file is part of the R package rgarch.
##
##   The R package rgarch is free software: you can redistribute it and/or modify
##   it under the terms of the GNU General Public License as published by
##   the Free Software Foundation, either version 3 of the License, or
##   (at your option) any later version.
##
##   The R package rgarch is distributed in the hope that it will be useful,
##   but WITHOUT ANY WARRANTY; without even the implied warranty of
##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##   GNU General Public License for more details.
##
#################################################################################
.information.test = function(LLH, nObs, nPars)
{
	AIC  = (-2*LLH)/nObs + 2 * nPars/nObs
	BIC  = (-2*LLH)/nObs + nPars * log(nObs)/nObs
	SIC  = (-2*LLH)/nObs + log((nObs+2*nPars)/nObs)
	HQIC = (-2*LLH)/nObs + (2*nPars*log(log(nObs)))/nObs
	informationTests = list(AIC = AIC, BIC = BIC, SIC = SIC, HQIC = HQIC)
	return(informationTests)
}

# Q-Statistics on Standardized Residuals
.box.test = function(stdresid, p=1, df = 0)
{
	if(any(!is.finite(stdresid))) stdresid[!is.finite(stdresid)]=0
	# p=1 normal case, p=2 squared std. residuals
	# Q-Statistics on Standardized Residuals
	#H0 : No serial correlation ==> Accept H0 when prob. is High [Q < Chisq(lag)]
	box10 = Box.test(stdresid^p, lag = 10, type = "Ljung-Box", fitdf = df)
	box15 = Box.test(stdresid^p, lag = 15, type = "Ljung-Box", fitdf = df)
	box20 = Box.test(stdresid^p, lag = 20, type = "Ljung-Box", fitdf = df)
	LBSR<-matrix(NA,ncol=2,nrow=3)
	LBSR[1:3,1] = c(box10$statistic[[1]],box15$statistic[[1]],box20$statistic[[1]])
	LBSR[1:3,2] = c(box10$p.value[[1]],box15$p.value[[1]],box20$p.value[[1]])
	rownames(LBSR) = c("Lag10","Lag15","Lag20")
	#LBSR<-as.data.frame(cbind(LBSR,.stars(as.vector(1-LBSR[,2]))))
	colnames(LBSR) = c("statistic","p-value")
	return(LBSR)
}


.archlmtest = function (x, lags, demean = FALSE)
{
	if(any(!is.finite(x))) x[!is.finite(x)] = 0
	x = as.vector(x)
	if(demean) x = scale(x, center = TRUE, scale = FALSE)
	lags = lags + 1
	mat = embed(x^2, lags)
	arch.lm = summary(lm(mat[, 1] ~ mat[, -1]))
	STATISTIC = arch.lm$r.squared * length(resid(arch.lm))
	names(STATISTIC) = "Chi-squared"
	PARAMETER = lags - 1
	names(PARAMETER) = "df"
	PVAL = 1 - pchisq(STATISTIC, df = PARAMETER)
	METHOD = "ARCH LM-test"
	result = list(statistic = STATISTIC, parameter = PARAMETER,
			p.value = PVAL, method = METHOD)
	class(result) = "htest"
	return(result)
}

.nyblomTest = function(object)
{
	fixid = object@model$fixid
	if(!is.null(fixid)) Names = names(object@fit$coef[-fixid]) else Names = names(object@fit$coef)
	res= object@fit$residuals/object@fit$sigma
	if(any(!is.finite(res))) res[!is.finite(res)]=0
	grad = object@fit$scores
	nn = length(res)
	hes = t(grad)%*%(grad)
	shes = try(solve(hes), silent = TRUE)
	if(inherits(shes,"try-error")){
		shes = solve(object@fit$hessian)
	}
	zx = matrix(cbind(res, res^2, res^3, res^4), ncol = 4)
	zs = apply(zx, 2, FUN = function(x) scale(x, center = TRUE, scale = FALSE))
	x = as.matrix(apply(grad, 2, FUN = function(x) cumsum(x)))
	xx = t(x)%*%x
	nyblomj = sum(diag(xx%*%shes))/nn
	nyblomt = diag(xx)/(diag(hes)*nn)
	IndividualStat = matrix(nyblomt, ncol=1)
	IndividualCritical = .nyblomCritical(1)
	JointCritical = .nyblomCritical(length(Names))
	rownames(IndividualStat) = Names
	JointStat = nyblomj
	return(list(IndividualStat=IndividualStat,JointStat=JointStat,
					IndividualCritical=IndividualCritical,JointCritical=JointCritical))
}
.nyblomCritical = function(n){
	# Test for Constancy of Parameters: Hansen Tests (1990 mimeo paper)
	# Null Hypothesis: constant parameters
	cval = matrix(c(0.353, 0.470, 0.748,
					0.610, 0.749, 1.07,
					0.846, 1.01, 1.35,
					1.07,  1.24, 1.60,
					1.28,  1.47, 1.88,
					1.49,  1.68, 2.12,
					1.69,  1.90, 2.35,
					1.89,  2.11, 2.59,
					2.10,  2.32, 2.82,
					2.29,  2.54, 3.05,
					2.49,  2.75, 3.27,
					2.69,  2.96, 3.51,
					2.89,  3.15, 3.69,
					3.08,  3.34, 3.90,
					3.26,  3.54, 4.07,
					3.46,  3.75, 4.30,
					3.64,  3.95, 4.51,
					3.83,  4.14, 4.73,
					4.03,  4.33, 4.92,
					4.22,  4.52, 5.13), byrow=T, ncol=3)
	colnames(cval)=c("10%","5%","1%")
	if(n<=20){
		ans = cval[n,]
		names(ans)=c("10%","5%","1%")
	} else {
		ans="too many parameters"
	}
	return(ans)
}

.signbiasTest = function(object)
{
	if(is(object, "uGARCHfilter")) z = object@filter$z else z = z = object@fit$z
	res = residuals(object)
	z2 = z^2
	n = length(z)
	zminus = as.integer(z<0)
	zplus = 1-zminus
	czminus = zminus*res
	czplus = zplus*res
	cz = cbind(rep(1, n), zminus, czminus, czplus)
	cz = cz[1:(n-1),]
	z2 = matrix(z2[2:n],ncol = 1)
	cm = data.frame(y = z2, const = cz[,1], zminus = cz[,2], czminus = cz[,3], czplus = cz[,4])
	fitA = lm(y~const+zminus+czminus+czplus-1, data = cm)
	resA = residuals(fitA)
	sgmatrix = matrix(ncol = 3, nrow = 4)
	rownames(sgmatrix) = c("Sign Bias","Negative Sign Bias","Positive Sign Bias","Joint Effect")
	colnames(sgmatrix) = c("t-value","prob","sig")
	sgmatrix[1:3,1:2] = abs(summary(fitA)$coef[2:4,3:4])
	jeffect = .linear.hypothesis(fitA, c("zminus = 0", "czminus = 0","czplus  =  0"), test  =  "Chisq")
	sgmatrix[4,1] = jeffect[5]$Chisq[2]
	sgmatrix[4,2] = jeffect[6][2,]
	sgmatrix = as.data.frame(sgmatrix)
	sgmatrix[,3] = .stars(sgmatrix[,2])
	return(sgmatrix)
}

.gofTest = function(object, groups)
{
	if(is(object, "uGARCHfilter")){
		pars = coef(object)
		z = object@filter$z
	} else{
		fixid = object@model$fixid
		if(!is.null(fixid)) pars = object@fit$coef[-fixid] else pars = object@fit$coef
		z = object@fit$z
	}
	n = length(pars)
	# must remove fixed parameters
	dist = object@model$distribution
	cdf = .getDistribution(dist)
	skew = shape = dlambda = 0
	include.dlambda = object@model$include.dlambda
	include.skew = object@model$include.skew
	include.shape = object@model$include.shape
	if(include.dlambda) dlambda = object@fit$coef["dlambda"]
	if(include.skew) skew = object@fit$coef["skew"]
	if(include.shape) shape = object@fit$coef["shape"]
	cdfv = cdf(z = sort(z), hh = 0, dlambda, skew, shape)
	j = length(groups)
	gofmat = matrix(NA, ncol = 3, nrow = j)
	gofmat[,1] = groups
	for(i in 1:j){
		sq = seq(1/groups[i], 1, by = 1/groups[i])
		ni = tabulate(findInterval(cdfv, c(0, sq), rightmost.closed = TRUE, 
						all.inside = FALSE)) 
		ExpValue = length(cdfv)/groups[i]
		gofmat[i,2] = sum( ( ( ni - ExpValue ) ^2 )/ExpValue )
		gofmat[i,3] = pchisq(q=gofmat[i,2], df = groups[i]-1, lower.tail = FALSE)
	}
	colnames(gofmat) = c("group", "statistic", "p-value(g-1)")
	rownames(gofmat) = 1:j
	return(gofmat)
}

# returns various loss functions (vectors)
lossfn.var = function(realized, forecast)
{
	x = cbind(realized, forecast)
	#exc = which(is.na(x))
	#if(length(exc)>0) x = x[-exc,]
	mse  = (x[,1] - x[,2])^2
	# qlike: quasi likelihood (implied by gaussian likelihood)
	qlike = log(x[,2]) + x[,1]/x[,2]
	# r2log: penalizes forecasts asymmetrically in low and high volatility periods
	r2log = ( log(x[,1]/x[,2]) )^2
	mad = abs(x[,1] - x[,2])
	#hmse = (x[,1]*(1/x[,2]))^2
	lossdf = data.frame(cbind(mse, mad, qlike, r2log))
	return(lossdf)
}

lossfn.ret = function(realized, forecast)
{
	x = cbind(realized, forecast)
	#exc = which(is.na(x), arr.ind = TRUE)
	#if(length(exc)>0) x = x[-exc[,1],]
	mse  = (x[,1] - x[,2])^2
	mad = abs(x[,1] - x[,2])
	#hmse = (x[,1]*(1/x[,2]))^2
	# we count zero as positive for investment purposes
	dac = as.integer(.signpluszero(x[,1])==.signpluszero(x[,2]))
	lossdf = data.frame(cbind(mse, mad, dac))
	return(lossdf)
}

.meanlossfn = function(lossdf)
{
	n = dim(lossdf)[1]
	nm = colnames(lossdf)
	ans = apply(lossdf, 2, FUN = function(x) 1/(n-1) * sum(x[!is.na(x)]))
	names(ans) = nm
	return(ans)
}

.medianlossfn = function(lossdf)
{
	n = dim(lossdf)[1]
	xn = dim(lossdf)[2]
	nm = colnames(lossdf)
	ans = apply(lossdf[,-xn], 2, FUN = function(x) median(x, na.rm = TRUE))
	# obviously we cannot have the median of a 0/1 series.
	ans = c(ans, .meanlossfn(lossdf[,xn]))
	names(ans) = nm
	return(ans)
}

# Diebold Mariano Test (JBES 1995)
.DM.test = function(x, lagq)
{
	# x = vector of differences btw the loss function of the benchmark model and the loss fn of the competing model
	# lagq = lag considered to calculate the newey-west var-cov matrix
	#n.ahead = forecast horizon
	# Returns:
	# an anova class object from the linear.hypothesis test of library car
	# x = rnorm(100, 0, 0.01) - rnorm(100,0.0001,0.01)
	exc = which(is.na(x))
	if(length(exc)>0) x = x[-exc]
	dm = lm(x~1)
	dm.test = .linear.hypothesis(dm,  c("(Intercept) = 0"), test = "Chisq", vcov  =  NeweyWest(dm, lag = lagq, 
					order.by = NULL, prewhite = TRUE, adjust = TRUE))
	return(dm.test)
}

# we re-write this to do the following:
# 1. Provide Basic fpm across the rolls if any
# 2. Provide if any 1-ahead rolling fpm
# minimum 10 points

# this method will now call functions which take
# realized and forecast vectors (so that we have ability to use rv etc)
# offer 2 options:
# roll ahead 1 fpm and normal fpm
.fpm = function(object, realized = NULL, realized.type = c("both", "sigma", "series"), type = 1)
{
	ans = switch(type,
			.forctests1(object, realized, realized.type),
			.forctests2(object, realized, realized.type))
	return(ans)
}

.forctests2 = function(object, realized = NULL, realized.type = c("both", "sigma", "series"))
{
	n.ahead = object@forecast$n.ahead
	n.start = object@forecast$n.start
	origdata = object@model$origdata
	N = length(origdata)
	origdates = object@forecast$fdates
	n.roll = object@forecast$n.roll
	if(n.roll<10)
		stop("a minimum of 10 1-ahead n.roll forecasts required for fpm (type 2)", call. = FALSE)
	fsigma  = as.data.frame(object, which = "sigma", rollframe = "all", type = 1, aligned = FALSE)[1,]
	fseries = as.data.frame(object, which = "series", rollframe = "all", type = 1, aligned = FALSE)[1,]
	data = origdata[(N-n.start + 1):(N - n.start + n.roll + 1)]
	# get the realized data
	if(!is.null(realized)){
		if(realized.type == "both" && !is.matrix(realized))
			stop("for realized.type using both sigma and series fpm expects a matrix", call. = FALSE)
		if(realized.type != "both" && !is.vector(realized))
			stop("for realized.type using either sigma or series fpm expects a vector", call. = FALSE)
		if(is.matrix(realized)){
			nr = dim(realized)[1]
			nc = dim(realized)[2]
			if(nr<n.roll)
				stop(paste("\ninsufficient data points provided (expected ", n.roll, ", got ", nr, "\n", sep=""), call. = FALSE)
			if(is.null(colnames(realized))) colnames(realized) = c("sigma", "series")
			rsigma  = realized[, "sigma"]
			rseries = realized[, "series"]
		}
		if(is.vector(realized)){
			nr = length(realized)
			if(nr<n.roll)
				stop(paste("\ninsufficient data points provided (expected ", n.roll, ", got ", nr, "\n", sep=""), call. = FALSE)
			if(realized.type == "sigma"){
				rsigma = realized
				rseries = data
			} else{
				rseries = realized
				rsigma = sqrt(data^2)
			}
		}
	} else{
		rseries = data
		rsigma = sqrt(data^2)
	}
	
	lossm.x = vector(mode = "numeric", length = 1)
	lossm.s = vector(mode = "numeric", length = 1)
	lossm.x = lossfn.ret(as.numeric(rseries), as.numeric(fseries))
	lossm.s = lossfn.var(as.numeric(rsigma)^2, as.numeric(fsigma)^2)
	
	meanloss.x = apply(lossm.x, 2, FUN = function(z) mean(z, na.rm = TRUE))
	medianloss.x = apply(lossm.x, 2, FUN = function(z) median(z, na.rm = TRUE))
	
	meanloss.s = apply(lossm.s, 2, FUN = function(z) mean(z, na.rm = TRUE))
	medianloss.s = apply(lossm.s, 2, FUN = function(z) median(z, na.rm = TRUE))
	# now we have forecast and realized
	
	seriesfpm = list()
	seriesfpm$lossfn = lossm.x
	seriesfpm$meanloss = meanloss.x
	seriesfpm$medianloss = medianloss.x
	
	sigmafpm = list()
	sigmafpm$lossfn = lossm.s
	sigmafpm$meanloss = meanloss.s
	sigmafpm$medianloss = medianloss.s
	
	fcst = list()
	fcst$series = fseries
	fcst$sigma = fsigma
	
	realized = list()
	realized$series = rseries
	realized$sigma = rsigma
	
	details = list()
	details$n.ahead = n.ahead
	details$n.start = n.start
	details$n.roll = n.roll
	details$type = 2
	details$model = object@model$model
	details$submodel = object@model$submodel
	
	details$fdates = sapply(origdates, FUN = function(x) as.character(x[1]))
	
	ans = new("uGARCHfpm",
			seriesfpm = seriesfpm,
			sigmafpm = sigmafpm,
			forecasts = fcst,
			realized = realized,
			details = details)
	
	
}
.forctests1 = function(object, realized = NULL, realized.type = c("both", "sigma", "series"))
{
	n.ahead = object@forecast$n.ahead
	n.start = object@forecast$n.start
	if(n.ahead<10)
		stop("a minimum of 10 n.ahead forecasts required for fpm (type 1)", call. = FALSE)
	if(n.start<10)
		stop("a minimum of 10 out.sample forecasts required for fpm (type 1)", call. = FALSE)
	
	origdata = object@model$origdata
	N = length(origdata)
	origdates = object@forecast$fdates
	n.roll = object@forecast$n.roll
	tlength = n.ahead + n.roll
	insample = min(n.start, n.ahead + n.roll)
	# get data.frame of those which have insample data
	fsigma  = as.data.frame(object, which = "sigma", rollframe = "all", type = 1, aligned = FALSE)
	fseries = as.data.frame(object, which = "series", rollframe = "all", type = 1, aligned = FALSE)
	data = c(origdata[(N-n.start + 1):(N - n.start + insample)], rep(NA, length = n.ahead))
	indices = apply(.embed(1:tlength, n.ahead, by = 1), 1, FUN = function(x) rev(x))
	rdata = indices
	# get the realized data
	if(!is.null(realized)){
		if(realized.type == "both" && !is.matrix(realized))
			stop("for realized.type using both sigma and series fpm expects a matrix", call. = FALSE)
		if(realized.type != "both" && !is.vector(realized))
			stop("for realized.type using either sigma or series fpm expects a vector", call. = FALSE)
		if(is.matrix(realized)){
			nr = dim(realized)[1]
			nc = dim(realized)[2]
			if(nr<tlength)
				stop(paste("\ninsufficient data points provided (expected ", tlength, ", got ", nr, "\n", sep=""), call. = FALSE)
			if(is.null(colnames(realized))) colnames(realized) = c("sigma", "series")
			rsigma  = realized[, "sigma"]
			rseries = realized[, "series"]
		}
		if(is.vector(realized)){
			nr = length(realized)
			if(nr<tlength)
				stop(paste("\ninsufficient data points provided (expected ", tlength, ", got ", nr, "\n", sep=""), call. = FALSE)
			if(realized.type == "sigma"){
				rsigma = realized
				rseries = data
			} else{
				rseries = realized
				rsigma = sqrt(data^2)
			}
		}
	} else{
		rseries = data
		rsigma = sqrt(data^2)
	}
	# create matrices
	rseriesm = apply(indices, 2, FUN = function(x) rseries[x])
	rsigmam = apply(indices, 2, FUN = function(x) rsigma[x])
	lossm.x = vector(mode = "list", length = n.roll + 1)
	lossm.s = vector(mode = "list", length = n.roll + 1)
	for(i in 1:(n.roll+1)){
		lossm.x[[i]] = lossfn.ret(rseriesm[,i], fseries[,i])
		lossm.s[[i]] = lossfn.var(rsigmam[,i]^2, fsigma[,i]^2)
	}
	
	meanloss.x = sapply(lossm.x, FUN = function(x) apply(x, 2, FUN = function(z) mean(z, na.rm = TRUE)))
	medianloss.x = sapply(lossm.x, FUN = function(x) apply(x, 2, FUN = function(z) median(z, na.rm = TRUE)))

	meanloss.s = sapply(lossm.s, FUN = function(x) apply(x, 2, FUN = function(z) mean(z, na.rm = TRUE)))
	medianloss.s = sapply(lossm.s, FUN = function(x) apply(x, 2, FUN = function(z) median(z, na.rm = TRUE)))
	# now we have forecast and realized
	
	seriesfpm = list()
	seriesfpm$lossfn = lossm.x
	seriesfpm$meanloss = meanloss.x
	seriesfpm$medianloss = medianloss.x
	
	sigmafpm = list()
	sigmafpm$lossfn = lossm.s
	sigmafpm$meanloss = meanloss.s
	sigmafpm$medianloss = medianloss.s
	
	fcst = list()
	fcst$series = fseries
	fcst$sigma = fsigma
	
	realized = list()
	realized$series = rseriesm
	realized$sigma = rsigmam
	
	details = list()
	details$n.ahead = n.ahead
	details$n.start = n.start
	details$n.roll = n.roll
	details$model = object@model$model
	details$submodel = object@model$submodel
	details$type = 1
	
	details$fdates = origdates
	
	
	ans = new("uGARCHfpm",
			seriesfpm = seriesfpm,
			sigmafpm = sigmafpm,
			forecasts = fcst,
			realized = realized,
			details = details)
	
	return(ans)
}


# this is a custom one for the rolling function
.forctests3 = function(fsigma, fseries, n.start, n.roll, n.ahead, data, fdates, model, submodel, type = 1)
{
	N = length(data)
	if(n.roll<10)
		stop("a minimum of 10 1-ahead n.roll forecasts required for fpm", call. = FALSE)
	rseries = data
	rsigma = sqrt(data^2)
	lossm.x = vector(mode = "numeric", length = 1)
	lossm.s = vector(mode = "numeric", length = 1)
	lossm.x = lossfn.ret(as.numeric(rseries), as.numeric(fseries))
	lossm.s = lossfn.var(as.numeric(rsigma)^2, as.numeric(fsigma)^2)
	
	meanloss.x = apply(lossm.x, 2, FUN = function(z) mean(z, na.rm = TRUE))
	medianloss.x = apply(lossm.x, 2, FUN = function(z) median(z, na.rm = TRUE))
	
	meanloss.s = apply(lossm.s, 2, FUN = function(z) mean(z, na.rm = TRUE))
	medianloss.s = apply(lossm.s, 2, FUN = function(z) median(z, na.rm = TRUE))
	# now we have forecast and realized
	
	seriesfpm = list()
	seriesfpm$lossfn = lossm.x
	seriesfpm$meanloss = meanloss.x
	seriesfpm$medianloss = medianloss.x
	
	sigmafpm = list()
	sigmafpm$lossfn = lossm.s
	sigmafpm$meanloss = meanloss.s
	sigmafpm$medianloss = medianloss.s
	
	fcst = list()
	fcst$series = fseries
	fcst$sigma = fsigma
	
	realized = list()
	realized$series = rseries
	realized$sigma = rsigma
	
	details = list()
	details$n.ahead = n.ahead
	details$n.start = n.start
	details$n.roll = n.roll
	details$type = type
	details$model = model
	details$submodel = submodel
	
	details$fdates = fdates
	
	ans = new("uGARCHfpm",
			seriesfpm = seriesfpm,
			sigmafpm = sigmafpm,
			forecasts = fcst,
			realized = realized,
			details = details)
	
	
}

#-----------------------------------------------------------------------------------------------
# ARFIMA section
# this is a custom one for the arfima rolling function
.forctestsarfima3 = function(fseries, n.start, n.roll, n.ahead, data, fdates, type = 1)
{
	N = length(data)
	if(n.roll<10)
		stop("a minimum of 10 1-ahead n.roll forecasts required for fpm", call. = FALSE)
	rseries = data
	lossm.x = vector(mode = "numeric", length = 1)
	lossm.x = lossfn.ret(as.numeric(rseries), as.numeric(fseries))
	
	meanloss.x = apply(lossm.x, 2, FUN = function(z) mean(z, na.rm = TRUE))
	medianloss.x = apply(lossm.x, 2, FUN = function(z) median(z, na.rm = TRUE))
	
	# now we have forecast and realized	
	seriesfpm = list()
	seriesfpm$lossfn = lossm.x
	seriesfpm$meanloss = meanloss.x
	seriesfpm$medianloss = medianloss.x
		
	fcst = list()
	fcst$series = fseries
	
	realized = list()
	realized$series = rseries
	
	details = list()
	details$n.ahead = n.ahead
	details$n.start = n.start
	details$n.roll = n.roll
	details$type = type
	details$fdates = fdates
	
	ans = new("ARFIMAfpm",
			seriesfpm = seriesfpm,
			forecasts = fcst,
			realized = realized,
			details = details)
}


.fpmarfima = function(object, realized = NULL, type = 1)
{
	ans = switch(type,
			.forctestsarfima1(object, realized),
			.forctestsarfima2(object, realized))
	return(ans)
}

.forctestsarfima2 = function(object, realized = NULL)
{
	n.ahead = object@forecast$n.ahead
	n.start = object@forecast$n.start
	origdata = object@model$origdata
	N = length(origdata)
	origdates = object@forecast$fdates
	n.roll = object@forecast$n.roll
	if(n.roll<10)
		stop("a minimum of 10 1-ahead n.roll forecasts required for fpm (type 2)", call. = FALSE)
	fseries = as.data.frame(object, rollframe = "all", type = 1, aligned = FALSE)[1,]
	data = origdata[(N-n.start + 1):(N - n.start + n.roll + 1)]
	# get the realized data
	if(!is.null(realized)){
		if(is.matrix(realized)){
			nr = dim(realized)[1]
			nc = dim(realized)[2]
			if(nr<n.roll)
				stop(paste("\ninsufficient data points provided (expected ", n.roll, ", got ", nr, "\n", sep=""), call. = FALSE)
			if(is.null(colnames(realized))) colnames(realized) = "series"
			rseries = realized[, "series"]
		}
		if(is.vector(realized)){
			nr = length(realized)
			if(nr<n.roll)
				stop(paste("\ninsufficient data points provided (expected ", n.roll, ", got ", nr, "\n", sep=""), call. = FALSE)
				rseries = realized
			}
		} else{
		rseries = data
	}
	
	lossm.x = vector(mode = "numeric", length = 1)
	lossm.x = lossfn.ret(as.numeric(rseries), as.numeric(fseries))
	
	meanloss.x = apply(lossm.x, 2, FUN = function(z) mean(z, na.rm = TRUE))
	medianloss.x = apply(lossm.x, 2, FUN = function(z) median(z, na.rm = TRUE))
	# now we have forecast and realized
	
	seriesfpm = list()
	seriesfpm$lossfn = lossm.x
	seriesfpm$meanloss = meanloss.x
	seriesfpm$medianloss = medianloss.x
		
	fcst = list()
	fcst$series = fseries
	
	realized = list()
	realized$series = rseries
	
	details = list()
	details$n.ahead = n.ahead
	details$n.start = n.start
	details$n.roll = n.roll
	details$type = 2
	
	details$fdates = sapply(origdates, FUN = function(x) as.character(x[1]))
	
	ans = new("ARFIMAfpm",
			seriesfpm = seriesfpm,
			forecasts = fcst,
			realized = realized,
			details = details)
		
}
.forctestsarfima1 = function(object, realized = NULL)
{
	n.ahead = object@forecast$n.ahead
	n.start = object@forecast$n.start
	if(n.ahead<10)
		stop("a minimum of 10 n.ahead forecasts required for fpm (type 1)", call. = FALSE)
	if(n.start<10)
		stop("a minimum of 10 out.sample forecasts required for fpm (type 1)", call. = FALSE)
	
	origdata = object@model$origdata
	N = length(origdata)
	origdates = object@forecast$fdates
	n.roll = object@forecast$n.roll
	tlength = n.ahead + n.roll
	insample = min(n.start, n.ahead + n.roll)
	# get data.frame of those which have insample data
	fseries = as.data.frame(object, rollframe = "all", type = 1, aligned = FALSE)
	data = c(origdata[(N-n.start + 1):(N - n.start + insample)], rep(NA, length = n.ahead))
	indices = apply(.embed(1:tlength, n.ahead, by = 1), 1, FUN = function(x) rev(x))
	rdata = indices
	if(!is.null(realized)){
		if(is.matrix(realized)){
			nr = dim(realized)[1]
			nc = dim(realized)[2]
			if(nr<n.roll)
				stop(paste("\ninsufficient data points provided (expected ", n.roll, ", got ", nr, "\n", sep=""), call. = FALSE)
			if(is.null(colnames(realized))) colnames(realized) = "series"
			rseries = realized[, "series"]
		}
		if(is.vector(realized)){
			nr = length(realized)
			if(nr<n.roll)
				stop(paste("\ninsufficient data points provided (expected ", n.roll, ", got ", nr, "\n", sep=""), call. = FALSE)
			rseries = realized
		}
	} else{
		rseries = data
	}
	# create matrices
	rseriesm = apply(indices, 2, FUN = function(x) rseries[x])
	lossm.x = vector(mode = "list", length = n.roll + 1)
	for(i in 1:(n.roll+1)){ lossm.x[[i]] = lossfn.ret(rseriesm[,i], fseries[,i]) }
	
	meanloss.x = sapply(lossm.x, FUN = function(x) apply(x, 2, FUN = function(z) mean(z, na.rm = TRUE)))
	medianloss.x = sapply(lossm.x, FUN = function(x) apply(x, 2, FUN = function(z) median(z, na.rm = TRUE)))
	
	# now we have forecast and realized
	
	seriesfpm = list()
	seriesfpm$lossfn = lossm.x
	seriesfpm$meanloss = meanloss.x
	seriesfpm$medianloss = medianloss.x
		
	fcst = list()
	fcst$series = fseries
	
	realized = list()
	realized$series = rseriesm
	
	details = list()
	details$n.ahead = n.ahead
	details$n.start = n.start
	details$n.roll = n.roll
	details$type = 1
	
	details$fdates = origdates
	
	
	ans = new("ARFIMAfpm",
			seriesfpm = seriesfpm,
			forecasts = fcst,
			realized = realized,
			details = details)
	
	return(ans)
}


.signpluszero = function(x)
{
	z = as.numeric(x)
	ans = rep(0, length(z))
	ans[z>=0] = 1
	ans
}

# From his 2001 paper. Also added the Jarque Bera Test as reccomended by Dowd
# since the test does not really account for residuals being from the Normal distribution.

BerkowitzLR = function(data, lags = 1, significance = 0.05)
{
	if( lags < 1 ) stop("\nlags must be 1 or greater!") else lags = as.integer(lags)
	x = as.numeric(data) - mean(data)
	n = length(data)
	xlag = NULL
	for(i in 1:lags) xlag = cbind(xlag, rgarch:::.lagx(x, n.lag = i, pad = 0))
	ans = lm(x~xlag-1)
	uLL = sum(dnorm(residuals(ans)[-c(1:lags)], sd = summary(ans)$sigma, log = TRUE))
	rLL = sum(dnorm(data[-c(1:lags)], log = TRUE))
	LR = 2*(uLL - rLL)
	chid = 1-pchisq(LR, 2 + lags)
	if(chid < significance) res = paste("reject NULL") else res = paste("fail to reject NULL")
	H0 = paste("Normal(0,1) with no autocorrelation")
	m1 = sum(x)/n
	xm = (x - m1)
	m2 = sum(xm^2)/n
	m3 =  sum(xm^3)/n
	m4 = sum(xm^4)/n
	k1 = (m3/m2^(3/2))^2
	k2 = (m4/m2^2)
	JB = n * k1/6 + n * (k2 - 3)^2/24
	JBp = 1 - pchisq(JB, df = 2)
	
	return(list(uLL = uLL, rLL = rLL, LR = LR, LRp = chid, H0 = H0, Test = res, 
					mu = mean(data), sigma = summary(ans)$sigma, rho =  coef(ans)[1:(lags)],
			JB = JB, JBp = JBp))
}

# Tests of Directional Accuracy
DACTest = function(forecast, actual, test = c("PT", "AG"), conf.level = 0.95)
{
  n = length(actual)
  if( length(forecast) != n ) stop("Length of forecast and actual must be the same")
  if( test == "PT"){
    x_t = z_t = y_t = rep(0, n)
    x_t[which(actual>0)] = 1
    y_t[which(forecast>0)] = 1
    p_y = mean(y_t)
    p_x = mean(x_t)
    z_t[which( (forecast*actual)>0 )] = 1
    p_hat = mean(z_t)
    p_star = p_y*p_x + (1 - p_y)*(1-p_x)
    p_hat_var = (p_star*(1-p_star))/n
    p_star_var = ((2*p_y-1)^2*(p_x*(1-p_x)))/n + ((2*p_x-1)^2*(p_y*(1-p_y)))/n + (4*p_x*p_y*(1-p_x)*(1-p_y))/n^2
    s_n = (p_hat - p_star)/sqrt(p_hat_var - p_star_var)
    ans = list(Test = "Pesaran and Timmermann", Stat = s_n, p.value = 1-pnorm(s_n), H0 = "Independently Distributed",
    Decision = if( s_n >  qnorm(conf.level) ) "Reject  H0" else "Fail to Reject H0",
    DirAcc = p_hat)
  } else{
    r_t=sign(forecast)*actual
    A_t = mean(r_t)
    B_t = mean(sign(forecast))*mean(actual)
    p_y = 0.5 * (1 + mean(sign(forecast)))
    
    V_EP = (4/(n^2))*p_y*(1-p_y)*sum((actual-mean(actual))^2)
    EP = (A_t-B_t)/sqrt(V_EP)
    ans = list(Test = "Anatolyev and Gerko", Stat = EP, p.value = 1-pnorm(EP), H0 = "No Predictability",
    Decision = if( EP >  qnorm(conf.level) ) "Reject  H0" else "Fail to Reject H0",
    DirAcc = sum(r_t>0)/n)
  }
  return( ans )
}

Try the rgarch package in your browser

Any scripts or data that you put into this service are public.

rgarch documentation built on May 2, 2019, 5:22 p.m.