# Functions for computing likelihoods, likelihood ratios and likelihood ratio
# tests for partially cointegrated (PCI) series
# Copyright (C) 2016 Matthew Clegg
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# http://www.r-project.org/Licenses/
utils::globalVariables(c("PCI.SAMPLES", "PCI.SAMPLES.DT"))
pci.nu.default <- function () 5
par.nu.default <- function () {
# The default value to be used for the degrees of freedom parameter
# when using robust estimation.
5
}
loglik.par.fkf <- function (Y, rho, sigma_M, sigma_R, M0=0, R0=Y[1]) {
# Given a sequence Y and a parameterization (rho, sigma_M, sigma_R) of an
# associated PAR process, calculates the negative log likelihood that
# Y would be observed under these process parameters.
if (length(Y) < 1) return(NA_real_)
if (length(dim(Y)) > 0) Y <- Y[,1]
Y <- coredata(Y)
M0 <- as.numeric(M0)
R0 <- as.numeric(R0)
a0 <- c(M0, R0)
dt <- matrix(0, 2, 1)
ct <- matrix(0, 1, 1)
Zt <- matrix(c(1, 1), 1, 2)
GGt <- matrix(0, 1, 1)
P0 <- matrix(c(sigma_M^2, 0, 0, sigma_R^2), 2, 2)
Tt <- matrix(c(rho, 0, 0, 1), 2, 2)
HHt <- matrix(c(sigma_M^2, 0, 0,sigma_R^2), 2, 2)
sp <- list(a0 = a0, P0 = P0, ct = ct, dt = dt, Zt = Zt, Tt = Tt,
GGt = GGt, HHt=HHt)
ans <- fkf(a0 = sp$a0, P0 = sp$P0, dt = sp$dt, ct = sp$ct, Tt = sp$Tt,
Zt = sp$Zt, HHt = sp$HHt, GGt = sp$GGt, yt = rbind(Y))
# cat("Kalman gain = ", ans$Kt, "\n")
-ans$logLik
}
loglik.par.ss <- function (Y, rho, sigma_M, sigma_R, M0=0, R0=Y[1]) {
# Given a sequence Y and a parametrization (rho, sigma_M, sigma_R) of an
# associated PAR process, calculates the negative log likelihood that Y would
# be observed under these process parameters, using a steady state
# Kalman filter.
#
# Despite the fact that this is hand-coded, the likelihood function that uses
# the fast kalman filter implementation given above is about twice as fast,
# even after turning on the byte compiler.
if (length(dim(Y)) > 0) Y <- Y[,1]
Y <- coredata(Y)
M0 <- as.numeric(M0)
R0 <- as.numeric(R0)
n <- length(Y)
if (n < 1) return(NA_real_)
K <- kalman.gain.par(rho, sigma_M, sigma_R)
esumsq <- 0
M <- M0
R <- R0
tvar <- sigma_M^2 + sigma_R^2
for (i in 1:n) {
xhat <- rho * M + R
e <- Y[i] - xhat
esumsq <- esumsq + e*e
M <- rho * M + e * K[1]
R <- R + e * K[2]
}
nll <- (n/2)*log(tvar * 2*pi) + esumsq/(2*tvar)
nll
}
llst <- function(X, mu, sigma, df) {
# Computes the negative log likelihood that (X - mu)/sigma is distributed
# according to a t-distribution with df degrees of freedom.
# Input values:
# X: Vector of observations whose likelihood is to be computed
# mu: The location parameter of the distribution
# sigma: The scale parameter -- similar to standard deviation
# df: The degrees of freedom of the distribution
#
# This likelihood function has been optimized by computing the
# values of the gamma function and some other constants only once,
# rather than re-computing these values for each element of X.
# This results in a factor of 10 (or more) speedup over the
# naive implementation.
if ((sigma <= 0) || (df <= 1) || (length(X) < 1)) return (NA_real_)
# Following is the non-optimized version of the code:
# D <- dt((X - mu) / sigma, df) / sigma
# ll <- -sum(log(D))
# return(ll)
# Following is an equivalent optimized version:
N <- length(X)
C <- N * (lgamma((df+1)/2) - 0.5*log(df * pi) - lgamma(df/2) - log(sigma))
S <- -((df + 1)/2) * sum(log(1 + ((X-mu)/sigma)^2/df))
ll <- -(S+C)
ll
}
loglik.par.ss.t <- function (Y, rho, sigma_M, sigma_R, M0=0, R0=Y[1], nu=par.nu.default()) {
# Given a sequence Y and a parametrization (rho, sigma_M, sigma_R) of an
# associated PAR process, calculates the negative log likelihood that Y would
# be observed under these process parameters, using a steady state
# Kalman filter.
if (length(dim(Y)) > 0) Y <- Y[,1]
Y <- coredata(Y)
M0 <- as.numeric(M0)
R0 <- as.numeric(R0)
K <- kalman.gain.par(rho, sigma_M, sigma_R)
if (is.na(K[1])) return(NA_real_)
n <- length(Y)
if (n < 1) return(NA_real_)
esumsq <- 0
M <- M0
R <- R0
tvar <- sigma_M^2 + sigma_R^2
tsd <- sqrt(tvar)
evec <- numeric(n)
for (i in 1:n) {
xhat <- rho * M + R
e <- Y[i] - xhat
evec[i] <- e
M <- rho * M + e * K[1]
R <- R + e * K[2]
}
# nll <- llst(evec, 0, tsd * sqrt((nu - 2) / nu), nu)
nll <- llst(evec, 0, tsd, nu)
nll
}
loglik.pci.fkf <- function (Y, X, alpha, beta, rho, sigma_M, sigma_R, M0=0, R0=0) {
# Given a sequence Y, a basis X, and a parameterization
# (beta, rho, sigma_M, sigma_R) of an
# associated PCI process, calculates the negative log likelihood that
# Y would be observed under these process parameters.
if (is.null(dim(X)) && (length(beta) == 1)) {
Z <- as.numeric(Y - X * beta - alpha)
} else {
Z <- as.numeric(Y - X %*% beta - alpha)
}
if (missing(R0)) R0 <- Z[1]
loglik.par.fkf (Z, rho, sigma_M, sigma_R, M0, R0)
}
loglik.pci.ss <- function (Y, X, alpha, beta, rho, sigma_M, sigma_R, M0=0, R0=0) {
# Given a sequence Y, basis X, and a parametrization
# (beta, rho, sigma_M, sigma_R) of an
# associated PCI process, calculates the negative log likelihood that Y would
# be observed under these process parameters, using a steady state
# Kalman filter.
if (is.null(dim(X)) && (length(beta) == 1)) {
Z <- as.numeric(Y - X * beta - alpha)
} else {
Z <- as.numeric(Y - X %*% beta - alpha)
}
if (missing(R0)) R0 <- Z[1]
loglik.par.ss (Z, rho, sigma_M, sigma_R, M0, R0)
}
loglik.pci.css <- function (Y, X, alpha, beta, rho, sigma_M, sigma_R, M0=0, R0=0) {
# Given a sequence Y, basis X, and a parametrization
# (beta, rho, sigma_M, sigma_R) of an
# associated PCI process, calculates the negative log likelihood that Y would
# be observed under these process parameters, using a steady state
# Kalman filter. Uses a C implementation.
if (is.null(dim(X)) && (length(beta) == 1)) {
Z <- as.numeric(Y - X * beta - alpha)
} else {
Z <- as.numeric(Y - X %*% beta - alpha)
}
if (missing(R0)) R0 <- Z[1]
loglik_par_c (Z, rho, sigma_M, sigma_R, M0, R0)
}
loglik.pci.sst <- function (Y, X, alpha, beta, rho, sigma_M, sigma_R, M0=0, R0=0, nu=pci.nu.default()) {
# Given a sequence Y, basis X, and a parametrization
# (beta, rho, sigma_M, sigma_R) of an
# associated PCI process, calculates the negative log likelihood that Y would
# be observed under these process parameters, using a steady state
# Kalman filter and a robust probability distribution.
if (is.null(dim(X)) && (length(beta) == 1)) {
Z <- as.numeric(Y - X * beta - alpha)
} else {
Z <- as.numeric(Y - X %*% beta - alpha)
}
if (missing(R0)) R0 <- Z[1]
loglik.par.ss.t (Z, rho, sigma_M, sigma_R, M0, R0, nu=nu)
}
loglik.pci.csst <- function (Y, X, alpha, beta, rho, sigma_M, sigma_R, M0=0, R0=0, nu=pci.nu.default()) {
# Given a sequence Y, basis X, and a parametrization
# (beta, rho, sigma_M, sigma_R) of an
# associated PCI process, calculates the negative log likelihood that Y would
# be observed under these process parameters, using a steady state
# Kalman filter and a robust probability distribution. Uses a C implementation.
if (is.null(dim(X)) && (length(beta) == 1)) {
Z <- as.numeric(Y - X * beta - alpha)
} else {
Z <- as.numeric(Y - X %*% beta - alpha)
}
if (missing(R0)) R0 <- Z[1]
loglik_par_t_c (Z, rho, sigma_M, sigma_R, M0, R0, nu)
}
loglik.pci <- function (Y, X, alpha, beta, rho, sigma_M, sigma_R, M0=0, R0=0,
calc_method=c("css", "fkf", "ss", "sst", "csst"), nu=pci.nu.default()) {
# Given a sequence Y, basis X, and a parameterization (beta, rho, sigma_M, sigma_R) of an
# associated PCI process, calculates the negative log likelihood that
# Y would be observed under these process parameters. The method used
# for calculating the log likelihood is determined by "par_method":
# fkf: Uses the Fast Kalman Filter (fkf) package
# ss: Uses a steady state Kalman filter
# css: Uses a steady state Kalman filter coded in C
switch(match.arg(calc_method),
fkf=loglik.pci.fkf(Y, X, alpha, beta, rho, sigma_M, sigma_R, M0, R0),
ss=loglik.pci.ss(Y, X, alpha, beta, rho, sigma_M, sigma_R, M0, R0),
css=loglik.pci.css(Y, X, alpha, beta, rho, sigma_M, sigma_R, M0, R0),
sst=loglik.pci.sst(Y, X, alpha, beta, rho, sigma_M, sigma_R, M0, R0, nu=nu),
csst=loglik.pci.csst(Y, X, alpha, beta, rho, sigma_M, sigma_R, M0, R0)
)
}
likelihood_ratio.pci <- function (
Y, # The series which is being fit
X, # The basis used for hedging X
robust=FALSE, # If TRUE, robust estimations are performed
null_model=c("rw", "ar1"), # Specifies the null hypothesis
# rw = null model estimates sigma_R, assuming rho = sigma_M = 0.
# This is the default.
# ar1 = null model estimates rho and sigma_M, assuming sigma_R = 0.
pci_opt_method=c("jp", "twostep"),
# Method to be used for fitting Y to X.
# jp: The coefficients of Y are jointly optimized
# with the parameters of the AAR fit of the residuals
# twostep: A modified Engle-Granger procedure is used, where
# the coefficients of Y are first estimated, and then an AAR
# model is fit to the residuals.
nu=5 # If robust is TRUE, the degrees of freedom parameter
) {
null_model <- match.arg(null_model)
pci_opt_method <- match.arg(pci_opt_method)
f.alt <- fit.pci(Y, X, robust=robust, pci_opt_method=pci_opt_method, nu=nu)
f.null <- fit.pci(Y, X, robust=robust, par_model=null_model, pci_opt_method=pci_opt_method, nu=nu)
f.alt$negloglik - f.null$negloglik
}
sample.likelihood_ratio.pci <- function (n=500,
alpha=1.0, beta=1.0, sigma_C=1.0,
rho=0.8, sigma_M=1.0, sigma_R=1.0, nrep=1000, use.multicore=TRUE,
pci_opt_method=c("jp", "twostep"),
robust=FALSE, nu=pci.nu.default(), seed.start=0) {
# Generates random PCI sequences with the specified parameters for
# alpha, beta, sigma_C, rho, sigma_M, sigma_R and nu
# For each such random sequence, performs RW, AR(1) and AAR fits to the residuals
# of an PCI model using the optimization method pci_opt_method. Creates a
# data.frame containing the statistics obtained for each of the fits.
pci_opt_method <- match.arg(pci_opt_method)
sample1 <- function (seed) {
set.seed(seed)
XY <- rpci(n, alpha, beta, sigma_C, rho, sigma_M, sigma_R, robust=robust)
Y <- XY[,1,drop=FALSE]
X <- XY[,2:ncol(XY),drop=FALSE]
frw <- fit.pci(Y, X, robust=robust, pci_opt_method=pci_opt_method, par_model="rw", nu=nu)
fmr <- fit.pci(Y, X, robust=robust, pci_opt_method=pci_opt_method, par_model="ar1", nu=nu)
fpar <- fit.pci(Y, X, robust=robust, pci_opt_method=pci_opt_method, par_model="par", nu=nu)
c(n, alpha, beta, sigma_C,
rho, sigma_M, sigma_R, pci_opt_method, robust, nu, seed,
frw$rho, frw$sigma_M, frw$sigma_R, frw$negloglik,
fmr$rho, fmr$sigma_M, fmr$sigma_R, fmr$negloglik,
fpar$rho, fpar$sigma_M, fpar$sigma_R, fpar$negloglik,
fpar$negloglik - frw$negloglik, fpar$negloglik - fmr$negloglik,
fpar$pvmr)
}
# debug(sample1)
pvec_sample <- function(N) {
lapply(N, function(k) sample1(k))
}
if (use.multicore) {
samples <- parallel::pvec(seed.start + 1:nrep, pvec_sample)
} else {
samples <- lapply(seed.start + 1:nrep, sample1)
}
df <- as.data.frame(do.call("rbind", samples))
beta_names <- paste("beta", 1:length(beta), sep="")
sigma_C_names <- paste("sigma_C", 1:length(sigma_C), sep="")
colnames(df) <- c("n", "alpha", beta_names, sigma_C_names,
"rho", "sigma_M", "sigma_R", "pci_opt", "robust", "nu", "seed",
"rw_rho", "rw_sigma_M", "rw_sigma_R", "rw_negloglik",
"mr_rho", "mr_sigma_M", "mr_sigma_R", "mr_negloglik",
"pci_rho", "pci_sigma_M", "pci_sigma_R", "pci_negloglik",
"rw_lrt", "mr_lrt", "pvmr")
df
}
pci.generate.likelihood_ratio.samples <- function (sample_dir = "samples",
nrep = 1000,
sample_size=c(50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 750, 800, 900, 1000, 1250,
1500, 1750, 2000, 2250, 2500),
# rho=c(0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0),
rho=c(0.9),
sigma_M=c(0.0, 1.0),
sigma_R=c(0.0, 1.0),
pci_opt_method=c("jp", "twostep"),
robust=c(FALSE, TRUE),
skip.existing=FALSE,
...
) {
# For each combination of sample size, rho and robust, generates
# nrep likelihood_ratio samples with those parameters,
# and writes the result to a file whose name is in the form
# sample_dir/PCI.SAMPLES.nn.rr[.ROB]
params.df <- expand.grid(sample_size, robust, rho, sigma_M, sigma_R, pci_opt_method, stringsAsFactors=FALSE)
colnames(params.df) <- c("sample_size", "robust", "rho", "sigma_M", "sigma_R", "pci_opt")
# Ensure that exactly one of (sigma_M, sigma_R) is non-zero
params.df <- params.df[params.df$sigma_M != 0.0 | params.df$sigma_R != 0.0,]
params.df <- params.df[params.df$sigma_M == 0.0 | params.df$sigma_R == 0.0,]
dir.create(sample_dir, showWarnings=FALSE, recursive=TRUE)
seed.start <- 1
for (i in 1:nrow(params.df)) {
robstring <- if (params.df$robust[i]) ".ROB" else ""
fn <- sprintf("%s/PCI.SAMPLES.%d.%02d.%02d.%02d.%s%s", sample_dir,
params.df$sample_size[i],
floor(100.0 * params.df$rho[i]),
floor(100.0 * params.df$sigma_M[i]),
floor(100.0 * params.df$sigma_R[i]),
params.df$pci_opt[i],
robstring)
if (!skip.existing || !file.exists(fn)) {
printf("%s ", Sys.time())
lrdf <- sample.likelihood_ratio.pci(n=params.df$sample_size[i],
alpha = 1.0, beta=1.0, sigma_C=1.0,
rho=params.df$rho[i],
sigma_M=params.df$sigma_M[i], sigma_R=params.df$sigma_R[i],
nrep=nrep, robust=params.df$robust[i],
pci_opt_method=params.df$pci_opt[i],
seed.start=seed.start, ...)
write.table(lrdf, fn)
printf("%s\n", fn)
}
seed.start <- seed.start + nrep
}
}
pci.load.likelihood_ratio.samples <- function (sample_dir = "samples") {
PCI.SAMPLES<-NULL
# Reads all of the samples in the specified samples dir, and returns
# a data.frame containing the results
files <- list.files(sample_dir, "PCI.SAMPLES", full.names=TRUE)
PCI.SAMPLES <<- do.call("rbind", lapply(files,
function(f) read.table(f, header=TRUE, stringsAsFactors=FALSE)))
"Samples loaded into PCI.SAMPLES"
}
sample.pci.lrt.nullrw.likelihood_ratio <- function (n=500, nrep=1000, ncol=1, use.multicore=TRUE) {
# Performs a likelihood ratio test of random walk null hypothesis on nrep randomly generated
# PCI sequences and returns the log likelihood ratios that are found.
do_rep <- function (dummy=0) {
# I tried two different approaches to generating random PCI sequences.
# In the first approach, all of the coefficients and s.d.'s are set identically to 1.
# In the second approach, coefficients and s.d.'s are chosen randomly.
# With the second approach, the likelihood scores were slightly more tightly
# distributed. So, the first approach was taken, as this seems more conservative.
#
# Also, I looked at the distribution of likelihood scores as a function of the number
# of columns. No detectable difference was found.
# beta <- rnorm(ncol)*10
# sigma_C <- rnorm(ncol)^2
# Y <- rpci(n, 1, beta, sigma_C, 0, 0, 1)
Y <- rpci(n, 1, rep(1, ncol), rep(1, ncol), 0, 0, 1)
likelihood_ratio.pci(Y[,1], Y[,2:ncol(Y),drop=FALSE], null_model="rw")
}
multirep <- function (X) {
sapply(X, do_rep)
}
if (use.multicore) {
parallel::pvec(1:nrep, multirep)
} else {
replicate(nrep, do_rep())
}
}
sample.pci.lrt.nullmr.likelihood_ratio <- function (n=500, rho=1.0, nrep=1000, ncol=1, use.multicore=TRUE) {
# Performs a likelihood ratio test of pure AR(1) null hypothesis on nrep randomly generated
# pure mean-reverting sequences and returns the log likelihood ratios that are found.
do_rep <- function (dummy=0) {
# x <- rpar(n, rho, 1, 0)
# likelihood_ratio.par.rw0(x)
Y <- rpci(n, 1, rep(1, ncol), rep(1, ncol), rho, 1, 0)
likelihood_ratio.pci(Y[,1], Y[,2:ncol(Y),drop=FALSE], null_model="ar1")
}
multirep <- function (X) {
sapply(X, do_rep)
}
if (use.multicore) {
parallel::pvec(1:nrep, multirep)
} else {
replicate(nrep, do_rep())
}
}
pci.find.joint.critical.values <- function (alpha, n=500, robust=FALSE, nest=TRUE,
pci_opt_method=c("jp","twostep")) {
# Given an acceptance value alpha (or a vector of such acceptance values),
# calculates likelihood scores Lambda_R and Lambda_M such that the probability
# is no more than alpha that a random sample will have likelihood ratio for the
# random walk no more than Lambda_R and likelihood ratio for the AR(1) hypothesis
# no more than Lambda_M. For the inverse, see par.joint.dist.
pci_opt_method <- match.arg(pci_opt_method)
pn <- n
probust <- robust
rw <- NULL # Make R CMD check happy
mr <- NULL # Make R CMD check happy
PCI.SAMPLES.DT<-NULL
sigma_M <- NULL
sigma_R <- NULL
pci_opt <- NULL
if (!exists("PCI.SAMPLES.DT")) {
if (!exists("PCI.SAMPLES")) pci.load.likelihood_ratio.samples()
PCI.SAMPLES.DT <<- data.table(PCI.SAMPLES)
}
setkey(PCI.SAMPLES.DT, n, sigma_M, sigma_R, robust, pci_opt)
AM <- PCI.SAMPLES.DT[list(pn, 1, 0, probust, pci_opt_method),]
AR <- PCI.SAMPLES.DT[list(pn, 0, 1, probust, pci_opt_method),]
if (nrow(AM) == 0 || nrow(AR) == 0) stop ("No matching samples found")
AM <- AM[,c("rw_lrt", "mr_lrt"),with=FALSE]
AR <- AR[,c("rw_lrt", "mr_lrt"),with=FALSE]
setnames(AM, c("rw", "mr"))
setnames(AR, c("rw", "mr"))
LR_min <- -Inf
LM_min <- -Inf
find1 <- function(a) {
cv <- function(p) {
LR <- p[1]
LM <- p[2]
if (nest && ((LR < LR_min) || (LM < LM_min))) return(3)
am <- AM[,sum(rw < LR & mr < LM)] / nrow(AM)
ar <- AR[,sum(rw < LR & mr < LM)] / nrow(AR)
(am - a)^2 + (ar - a)^2
}
LR0 <- max(quantile(AR$rw, a), LR_min)
LM0 <- max(quantile(AM$mr, a), LM_min)
p <- optim(c(LR0, LM0), cv)
if (nest) {
LR_min <<- p$par[1]
LM_min <<- p$par[2]
}
QM <- AM[,sum(rw < p$par[1] & mr < p$par[2])] / nrow(AM)
QR <- AR[,sum(rw < p$par[1] & mr < p$par[2])] / nrow(AR)
v <- data.frame(n, robust, pci_opt_method, a, LR0, LM0, p$par[1], p$par[2], QR, QM)
colnames(v) <- c("n", "robust", "pci_opt", "alpha", "C_R0", "C_M0", "C_R", "C_M", "QR", "QM")
v
}
do.call("rbind", lapply(alpha, find1))
}
pci.findall.joint.critical.values <- function (alpha=seq(0.01,0.99,by=0.01),
nest=TRUE,
use.multicore=TRUE, ...) {
if (!exists("PCI.SAMPLES")) pci.load.likelihood_ratio.samples()
idf <- expand.grid(n=sort(unique(PCI.SAMPLES$n)), robust=sort(unique(PCI.SAMPLES$robust)),
pci_opt=sort(unique(PCI.SAMPLES$pci_opt)), stringsAsFactors=FALSE)
if (use.multicore) lapply <- mclapply
do.call("rbind", lapply(1:nrow(idf), function(k)
pci.find.joint.critical.values(alpha, n=idf$n[k], robust=idf$robust[k], nest=nest,
pci_opt_method=idf$pci_opt[k], ...)))
}
pci.rw.pvalue <- function (
stat.rw, # Statistic used for testing RW hypothesis.
# This is a likelihood ratio
n, # Sample size
pci_opt_method=c("jp", "twostep"), # Optimization method
robust=FALSE # TRUE if robust models should be used
) {
# Given a statistic on the RW hypothesis (e.g., the value of the
# likelihood ratio function), returns the corresponding p-value.
pci_opt_method <- match.arg(pci_opt_method)
if ((pci_opt_method == "jp") && robust) {
p.value <- quantile_table_interpolate(PCI.RWNULL.ROB.JP.LRQT, n, stat.rw)
} else if (pci_opt_method == "jp") {
p.value <- quantile_table_interpolate(PCI.RWNULL.JP.LRQT, n, stat.rw)
} else if ((pci_opt_method == "twostep") && robust) {
p.value <- quantile_table_interpolate(PCI.RWNULL.ROB.TWOSTEP.LRQT, n, stat.rw)
} else if (pci_opt_method == "twostep") {
p.value <- quantile_table_interpolate(PCI.RWNULL.TWOSTEP.LRQT, n, stat.rw)
}
p.value
}
pci.mr.pvalue <- function (
stat.mr, # Statistic used for testing AR(1) hypothesis
n, # Sample size
pci_opt_method=c("jp", "twostep"), # Optimization method
robust=FALSE # TRUE if robust models should be used
) {
# Given a statistic on the AR(1) hypothesis, returns the corresponding
# p-value.
pci_opt_method <- match.arg(pci_opt_method)
if ((pci_opt_method == "jp") && robust) {
p.value <- quantile_table_interpolate(PCI.MRNULL.ROB.JP.LRQT, n, stat.mr)
} else if (pci_opt_method == "jp") {
p.value <- quantile_table_interpolate(PCI.MRNULL.JP.LRQT, n, stat.mr)
} else if ((pci_opt_method == "twostep") && robust) {
p.value <- quantile_table_interpolate(PCI.MRNULL.ROB.TWOSTEP.LRQT, n, stat.mr)
} else if (pci_opt_method == "twostep") {
p.value <- quantile_table_interpolate(PCI.MRNULL.TWOSTEP.LRQT, n, stat.mr)
}
p.value
}
pci.joint.pvalue <- function (
stat.rw, # Statistic used for testing RW hypothesis.
# This is a likelihood ratio
stat.mr, # Statistic used for testing AR(1) hypothesis
# This is also a likelihood ratio.
n, # Sample size
robust=FALSE, # TRUE if robust models should be used
pci_opt_method=c("jp", "twostep") # Optimization method
) {
# Given statistics on the RW hypothesis and AR(1) hypothesis, calculates
# a p-value associated with those statistics. The p-value is an estimate
# of the probability that a random sequence satisfying the null hypothesis
# of either RW or AR(1) would have statistics more extreme than the
# statistics (stat.rw, stat.mr)
pci_opt_method <- match.arg(pci_opt_method)
if (!exists("PCI.JOINT.CRITICAL.VALUES")) stop ("Could not find table of critical values")
AJCV <- PCI.JOINT.CRITICAL.VALUES
nvals <- unique(AJCV[,"n"])
if (!any(nvals <= n)) {
warning("Sample size too small (", n, ") to provide accurate p-value")
nlower <- 1
plower <- 1
} else {
nlower <- max(nvals[nvals <= n])
lower.matches <- (AJCV[,"n"] == nlower) &
(AJCV[,"robust"] == robust) &
(AJCV[,"pci_opt"] == pci_opt_method) &
(stat.rw <= AJCV[,"C_R"]) &
(stat.mr <= AJCV[,"C_M"])
if (!any(lower.matches)) {
plower <- 1
} else {
plower <- min(AJCV[lower.matches,"alpha"])
}
}
if (any(nvals >= n)) {
nupper <- min(nvals[nvals > n])
upper.matches <- (AJCV[,"n"] == nlower) &
(AJCV[,"robust"] == robust) &
(AJCV[,"pci_opt"] == pci_opt_method) &
(stat.rw <= AJCV[,"C_R"]) &
(stat.mr <= AJCV[,"C_M"])
if (!any(upper.matches)) {
pupper <- 1
} else {
pupper <- min(AJCV[upper.matches,"alpha"])
}
} else {
nupper <- n
pupper <- plower
}
pval.joint <- (nupper - n)/(nupper - nlower) * plower +
(n - nlower) / (nupper - nlower) * pupper
pval.joint
}
test.pci.nullrw <- function (Y, X, pci_opt_method=c("jp", "twostep"), robust=FALSE) {
# Tests the hypothesis that Y with basis X is a random walk vs. the alternative
# that Y,X is an PCI series.
DNAME <- paste(deparse(substitute(Y)), "/", deparse(substitute(X)))
pci_opt_method = match.arg(pci_opt_method)
STAT <- likelihood_ratio.pci (Y, X, null_model="rw", pci_opt_method=pci_opt_method, robust=robust)
PVAL <- pci.rw.pvalue(STAT, length(Y), pci_opt_method, robust)
if (robust) {
METHOD <- "Likelihood ratio test of Robust Random Walk vs Robust PCI(1)"
alternative <- "RPCI(1)"
} else {
METHOD <- "Likelihood ratio test of Random Walk vs PCI(1)"
alternative <- "PCI(1)"
}
if (pci_opt_method == "jp") {
METHOD <- paste(METHOD, "(joint penalty method)")
} else {
METHOD <- paste(METHOD, "(two step method)")
}
names(STAT) <- "LL"
structure(list(statistic = STAT, alternative = alternative,
p.value = PVAL, method = METHOD, data.name = DNAME),
class = "htest")
}
test.pci.nullmr <- function (Y, X, pci_opt_method=c("jp", "twostep"), robust=FALSE) {
# Tests whether a sequence Y with basis X conforms to the PCI model. The null
# hypothesis is that Y,X is AR(1).
DNAME <- paste(deparse(substitute(Y)), "/", deparse(substitute(X)))
pci_opt_method = match.arg(pci_opt_method)
STAT <- likelihood_ratio.pci (Y, X, null_model="ar1", pci_opt_method=pci_opt_method, robust=robust)
PVAL <- pci.mr.pvalue(STAT, length(Y), pci_opt_method, robust)
if (robust) {
METHOD <- "Likelihood ratio test of Robust Cointegrated vs Robust PCI(1)"
alternative <- "RPCI(1)"
} else {
METHOD <- "Likelihood ratio test of cointegrated vs partially cointegrated"
alternative <- "PCI(1)"
}
if (pci_opt_method == "jp") {
METHOD <- paste(METHOD, "(joint penalty method)")
} else {
METHOD <- paste(METHOD, "(two step method)")
}
names(STAT) <- "LL"
structure(list(statistic = STAT, alternative = alternative,
p.value = PVAL, method = METHOD, data.name = DNAME),
class = "htest")
}
test.pci <- function (Y,
X,
alpha=0.05, # Critical value to be used in deciding whether to reject the null.
null_hyp=c("rw", "ar1"), # Specifies the null hypothesis. Can be either or both.
# rw = null model estimates sigma_R, assuming rho = sigma_M = 0.
# ar1 = null model estimates rho and sigma_M, assuming sigma_R = 0.
# Default is both
robust=FALSE, # TRUE if robust models should be used
pci_opt_method=c("jp", "twostep") # Method used to search for partially cointegrating relations
) {
# Tests the null hypothesis that Y is either a random walk or an AR(1)
# series (or both) against the alternative hypothesis that Y is AAR.
DNAME <- deparse(substitute(Y))
pci_opt_method <- match.arg(pci_opt_method)
if (is(Y, "pci.hedge")) {
Yhedge <- Y
Y <- Yhedge$pci
}
if (is(Y, "pci.fit")) {
Yfit <- Y
Y <- Yfit$data
X <- Yfit$basis
if (missing(robust)) robust <- Yfit$robust
if (missing(pci_opt_method)) {
pci_opt_method <- Yfit$pci.fit
}
}
if (length(null_hyp) == 1) {
if (null_hyp == "rw") return(test.pci.nullrw(Y, X, pci_opt_method=pci_opt_method, robust=robust))
if ((null_hyp == "ar1") || (null_hyp == "mr")) return(test.pci.nullmr(Y, X, pci_opt_method=pci_opt_method, robust=robust))
stop("Error in parameter null_hyp: ", null_hyp)
}
Yorig <- Y
if (!is.null(dim(Y))) {
if (dim(Y)[2] > 1) {
if (missing(X)) {
X <- Y[,2:ncol(Y),drop=FALSE]
} else {
stop("Y must be a single column")
}
}
Y <- Y[,1]
}
Y <- coredata(Y)
X <- as.matrix(coredata(X))
frw <- fit.pci(Y, X, par_model="rw", pci_opt_method=pci_opt_method, robust=robust)
fmr <- fit.pci(Y, X, par_model="ar1", pci_opt_method=pci_opt_method, robust=robust)
fpar <- fit.pci(Y, X, par_model="par", pci_opt_method=pci_opt_method, robust=robust)
stat.mr <- fpar$negloglik - fmr$negloglik
stat.rw <- fpar$negloglik - frw$negloglik
STAT <- c(stat.rw, stat.mr)
pval.mr <- pci.mr.pvalue(stat.mr, length(Y), pci_opt_method=pci_opt_method, robust=robust)
pval.rw <- pci.rw.pvalue(stat.rw, length(Y), pci_opt_method=pci_opt_method, robust=robust)
pval.joint <- pci.joint.pvalue(stat.rw, stat.mr, length(Y), pci_opt_method = pci_opt_method,
robust=robust)
if (!robust) {
METHOD <- "Likelihood ratio test of [Random Walk or CI(1)] vs Almost PCI(1)"
alternative <- "Almost PCI(1)"
} else {
METHOD <- "Likelihood ratio test of [Robust Random Walk or Robust CI(1)] vs Robust Almost PCI(1)"
alternative <- "Robust Almost PCI(1)"
}
if (pci_opt_method == "jp") {
METHOD <- sprintf("%s (joint penalty method)", METHOD)
} else {
METHOD <- sprintf("%s (two step method)", METHOD)
}
PVAL <- c(RW=pval.rw, MR=pval.mr, JOINT=pval.joint)
names(STAT) <- "LL"
structure(list(statistic = STAT, alternative = alternative,
p.value = PVAL, method = METHOD, data.name = DNAME, alpha=alpha, robust=robust),
class = c("pcitest", "htest"))
}
print.pcitest <- function (x, ...) {
print.pcitest.internal (x, ...)
}
print.pcitest.internal <- function (AT, alpha=0.05, ...) {
# See stats:::print.htest
cat("\n")
cat(strwrap(AT$method, prefix = "\t"), sep = "\n")
cat("\n")
cat("data: ", AT$data.name, "\n", sep = "")
cat("\n")
if (AT$robust) {
h0a <- "Robust RW"
h0b <- "Robust AR(1)"
h1 <- "Robust PAR"
} else {
h0a <- "Random Walk"
h0b <- "AR(1)"
h1 <- "PAR"
}
cat(sprintf("%-12s %20s %10s\n", "Hypothesis", "Statistic", "p-value"))
cat(sprintf("%-12s %20.2f %10.3f\n", h0a, AT$statistic[1], AT$p.value[1]))
cat(sprintf("%-12s %20.2f %10.3f\n", h0b, AT$statistic[2], AT$p.value[2]))
cat(sprintf("%-12s %20s %10.3f\n", "Combined", "", AT$p.value[3]))
cat("\n")
invisible(AT)
}
which.hypothesis.pcitest <- function(AT) {
if (is.na(AT$alpha)) {
return(NA)
}
if (AT$p.value[3] < AT$alpha) {
result <- "PCI"
} else if (AT$alpha < AT$p.value[1]) {
result <- "RW"
} else {
result <- "AR1"
}
if (AT$robust) result <- paste("R", result, sep="")
result
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.