Nothing
# fit.R -- functions for fitting partially AR(1) models
# Copyright (C) 2015 Matthew Clegg
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available at
# http://www.r-project.org/Licenses/
par.rho.cutoff <- function (n, p=0.05) {
# Calculates a cutoff value c for rho, such that the probability
# is no more than p that fitting a random walk of length n to a PAR
# model will produce a model with rho less than c.
#
# The model is calibrated according to the formula
# (1 - rho) * n = c_p
if (!is.numeric(n)) stop("argument n must be numeric")
pvalues <- c(0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 0.8, 0.9)
cp_list <- c(28.1, 20.3, 16.5, 13.8, 11.1, 8, 3.4, 0.33, 0)
cp <- approx(pvalues, cp_list, p, rule=2)
rho <- 1 - cp$y/n
rho[n < 50] <- NA_real_
rho
}
par.nu.default <- function () {
# The default value to be used for the degrees of freedom parameter
# when using robust estimation.
5
}
estimate.rho.par <- function (X) {
# Computes an estimate of mean reversion for the mean-reverting
# portion of a PAR process. If v[k] = Var[X[t+k]-X[t]], then
# rho is given by the variance formula:
# rho = - (v[1] - 2 * v[2] + v[3]) / (2 * v[1] - v[2])
# This method seems to have a larger standard error than the
# median of ratios method used in the previous function.
Xc <- coredata(X)
if (length(Xc) < 5) return(NA_real_)
I <- 1:(length(Xc)-3)
X1X0 <- Xc[I+1] - Xc[I]
X2X0 <- Xc[I+2] - Xc[I]
X3X0 <- Xc[I+3] - Xc[I]
xv1 <- var(X1X0)
xv2 <- var(X2X0)
xv3 <- var(X3X0)
rho <- -(xv1 - 2 * xv2 + xv3)/(2 * xv1 - xv2)
if (is.na(rho)) return(NA_real_)
if (rho < -0.99) rho <- -1
if (rho > 0.99) rho <- 1
rho
}
estimate.sigma.par <- function (X, rho=estimate.rho.par(X), rho.max = 1) {
# Estimates sigma^2_m and sigma^2_r using the variance formulas:
# sigma^2_m = (1/2) ((rho + 1)/(rho - 1)) * (v[2] - 2 * v[1])
# sigma^2_r = (1/2) (v[2] - 2 * sigma^2_m)
# where
# v[k] = var(X[t+k] - X[t])
Xc <- coredata(X)
I <- 1:(length(Xc) - 2)
if (rho > rho.max) rho <- rho.max
if (rho > 0.99) return (c(0, sd(diff(X))))
vard1 <- var(Xc[I+1] - Xc[I])
vard2 <- var(Xc[I+2] - Xc[I])
sigma2_M <- (1/2) * ((rho + 1)/(rho - 1)) * (vard2 - 2 * vard1)
if (sigma2_M > 0.5 * vard2) sigma2_M <- 0.5 * vard2
if (sigma2_M < 0) sigma2_M <- 0
sigma2_R <- (1/2) * (vard2 - 2 * sigma2_M)
# if (sigma2_R < 0) sigma2_R <- 0
sigma_M <- sqrt(sigma2_M)
sigma_R <- sqrt(sigma2_R)
c(sigma_M=sigma_M, sigma_R=sigma_R)
}
estimate.par <- function (X, useR = FALSE, rho.max = 1) {
# Estimates the parameters of a PAR process.
# Returns a three component vector (rho, sigma_M, sigma_R).
#
# If useR is TRUE, then the R implementation is used.
# Otherwise, the C++ implementation is used (about 8X faster).
if (useR) {
rho_hat <- estimate.rho.par(X)
sigma_hat <- estimate.sigma.par(X, rho_hat, rho.max)
res <- c(rho=rho_hat, sigma_M=sigma_hat[1], sigma_R=sigma_hat[2])
} else {
res <- estimate_par_c(X, rho.max)
}
names(res) <- c("rho", "sigma_M", "sigma_R")
res
}
pvmr.par <- function(rho, sigma_M, sigma_R) {
# Returns the proportion of variance attributable to mean reversion
# for a PAR process with parameters rho, sigma_M and sigma_R
if (abs(rho) > 1 || sigma_M < 0 || sigma_R < 0) return(c(pvmr=NA_real_))
if (missing(sigma_M) && missing(sigma_R) && (length(rho) == 3)) {
sigma_M <- rho[2]
sigma_R <- rho[3]
rho <- rho[1]
}
r2 <- (2 * sigma_M^2) / (2 * sigma_M^2 + (1 + rho) * sigma_R^2)
names(r2) <- "pvmr"
r2
}
kalman.gain.par <- function(rho, sigma_M, sigma_R) {
# Returns the Kalman gain vector associated to a steady state PAR
# process with parameters rho, sigma_M and sigma_R
if (missing(sigma_M) && missing(sigma_R) && (length(rho) == 3)) {
sigma_M <- rho[2]
sigma_R <- rho[3]
rho <- rho[1]
}
if (sigma_M == 0 && sigma_R == 0) return (c(NA_real_, NA_real_))
if (sigma_M == 0) return (c(0,1))
if (sigma_R == 0) return (c(1,0))
rad <- sqrt((rho + 1)^2 * sigma_R^2 + 4 * sigma_M^2)
num1 <- 2 * sigma_M^2
den1 <- sigma_R*(rad + (1+ rho) * sigma_R) + 2 * sigma_M^2
num2 <- 2 * sigma_R
den2 <- rad + (1 - rho) * sigma_R
c(num1/den1, num2/den2)
}
kalman.gain.from.pvmr <- function (rho, pvmr) {
# Computes the Kalman gain as a function of rho and R^2[MR]
if (is.na(rho) || is.na(pvmr)) return(c(NA_real_, NA_real_))
if (rho < -1 || rho > 1) stop("invalid value for rho: ", rho)
if (pvmr < 0 || pvmr > 1) stop("invalid value for pvmr: ", pvmr)
if (pvmr == 0) return(c(0,1))
if (pvmr == 1) return(c(1,0))
sigma_M <- sqrt((1 + rho) * pvmr / (2 - 2 * pvmr))
sigma_R <- 1
kalman.gain.par(rho, sigma_M, sigma_R)
}
fit.par.both <- function (Y, # The sequence to which a PAR model is to be fit
robust=FALSE, # If TRUE, robust estimations are performed
lambda=0, # A penalty to be applied to the random walk variance,
# intended to drive the solution towards one with the
# minimum random walk variance.
opt_method=c("css", "kfas", "ss"), # Optimization method to be used.
# kfas = KFAS Kalman filter implementation
# ss = Steady State Kalman filter
# css = C-coded steady state Kalman filter
rho.max = 1, # An upper bound to be applied to the estimate
# of rho, the coefficient of mean-reversion.
# See par.rho.cutoff() for a reasonable upper bound.
nu=par.nu.default() # If robust=TRUE, the degrees of freedom parameter
) {
# Given a PAR sequence X, creates a Kalman filter representation
#
# X[t] = [M[t] R[t]],
#
# where
#
# M[t] = rho*M[t-1] + sigma_M * eps_M,t
# R[t] = R[t-1] + sigma_R * eps_R,t
# eps_M,t ~ N(0,1)
# eps_R,t ~ N(0,1)
#
# Estimates the values of rho, sigma_M and sigma_R using
# maximum likelihood fitting of the Kalman filter.
#
# If lambda is positive, then it represents a penalty factor that is
# used to drive the optimal solution towards the mean reverting
# solution. A negative value of lambda will drive the optimal solution
# towards a random walk.
#
# Returns an S3 object of class par.fit, representing the fit
# that was obtained.
opt_method <- match.arg(opt_method)
Y <- as.numeric(Y)
if (any(is.na(Y))) stop("The PAR model cannot be fit to sequences containing NA's")
if (length(Y) < 50) stop("The PAR model cannot be fit to sequences of length < 50")
if (robust) {
ll_calc_method <- switch(opt_method,
ss = "sst",
css = "csst",
kfas = stop("robust estimation not implemented for opt_method = kfas")
)
} else {
ll_calc_method <- opt_method
}
high_value <- NA
objective <- function (p) {
# p = (rho, sigma_M, sigma_R, R0)
if ((p[1] < -rho.max) || (p[1] > rho.max)) return(high_value)
if ((p[2] < 0) || (p[3] < 0)) return(high_value)
if ((p[2] == 0) && (p[3] == 0)) return(high_value)
loglik.par (Y, p[1], p[2], p[3], 0, p[4], calc_method=ll_calc_method, nu=nu) + lambda * p[3]
}
# obj2 <- function(p) { cat(p, " -> ", objective(p), "\n"); objective(p) }
# optim(start, obj2, hessian=TRUE, method="L-BFGS-B", lower=c(0,0,0,-Inf,-Inf), upper=c(rho.max,2*sddy,2*sddy,Inf,Inf), control=list(trace=4))
# Set up some initial values to be used as starting points
# for the optimization
start_list <- list()
sddy <- sd(diff(Y), na.rm=TRUE)
if (is.na(sddy) || (sddy == 0)) sddy <- median(abs(diff(Y)),na.rm=TRUE)
if (sddy == 0) stop("The PAR model cannot be fit to constant sequences")
fit.mr <- fit.par.mr (Y, opt_method=opt_method, rho.max=rho.max, robust=robust, nu=nu)
fit.rw <- fit.par.rw (Y, opt_method=opt_method, robust=robust, nu=nu)
start_list <- c(start_list, list(fit.mr$par[c(1,2,3,5)]))
start_list <- c(start_list, list(fit.rw$par[c(1,2,3,5)]))
start_list <- c(start_list, list(c(estimate.par(Y, rho.max=rho.max), Y[1])))
# start_list <- c(start_list, list(c(0,0,sddy,0,Y[1])))
# start_list <- c(start_list, list(c(0,sddy,0,Y[1],0)))
# start_list <- c(start_list, list(c(0.5, estimate.sigma.par(Y, 0.5)[1:2], 0, Y[1])))
# start_list <- c(start_list, list(c(estimate.par(Y), 0, Y[1])))
# Search for the best global optimum, starting from each of the
# starting points given in start_list
best_value <- objective(start_list[[1]])+1
for (start in start_list) {
if (any(is.na(start))) next
high_value <- objective(start) + 1
rfit.nm <- optim(start, objective)
rfit.lbfgsb <- optim(rfit.nm$par, objective, method="L-BFGS-B",
lower=c(-rho.max,0,0,-Inf), upper=c(rho.max, 2*sddy, 2*sddy, Inf))
# lower=c(0,0,0,-Inf), upper=c(rho.max, 2*sddy, 2*sddy, Inf))
rfit <- if (rfit.nm$value < rfit.lbfgsb$value) rfit.nm else rfit.lbfgsb
if (rfit$value < best_value) {
bestfit <- rfit
best_value <- rfit$value
# cat(sprintf("r %6.2f rho %8.4f sigma_M %8.4f sigma_R %8.4f -> %8.4f\n",
# rrho, bestfit$par[1], bestfit$par[2], bestfit$par[3], bestfit$value))
}
}
# L-BGFS-B seems to terminate prematurely in many cases, so run a few extra
# iterations starting from our best estimate of a local minimum,
# just to be sure we have found a local minimum
# pvalue <- best_value + 1
# maxiter <- 5
# while (abs(pvalue - best_value) > 1e-6 && maxiter > 0) {
# pvalue <- best_value
# bestfit <- optim(bestfit$par, objective, hessian=TRUE, method="L-BFGS-B",
# lower=c(-rho.max,0,0,-Inf), upper=c(rho.max, 2*sddy, 2*sddy, Inf))
# best_value <- bestfit$value
# maxiter <- maxiter - 1
# }
bestfit <- optim(bestfit$par, objective, hessian=TRUE)
# Convert the hessian of the fit to a standard error
ps <- rep(NA_real_, nrow(bestfit$hessian))
suppressWarnings(try(ps <- sqrt(diag(solve(bestfit$hessian))), silent=TRUE))
if (any(is.na(ps))) {
bestfit <- nlm(objective, bestfit$par, hessian=TRUE)
suppressWarnings(try(ps <- sqrt(diag(solve(bestfit$hessian))), silent=TRUE))
bestfit$par <- bestfit$estimate
bestfit$value <- bestfit$minimum
}
ps <- c(ps[1:3], NA, ps[4])
names(ps) <- c("rho.se", "sigma_M.se", "sigma_R.se", "M0.se", "R0.se")
bestpar <- c(bestfit$par[1:3], 0.0, bestfit$par[4])
names(bestpar) <- c("rho", "sigma_M", "sigma_R", "M0", "R0")
fit <- structure(list(data=Y,
rho = bestpar[1],
sigma_M = bestpar[2],
sigma_R = bestpar[3],
M0 = bestpar[4],
R0 = bestpar[5],
par = bestpar,
stderr = ps,
negloglik = bestfit$value,
lambda = lambda,
pvmr = pvmr.par(bestpar[1], bestpar[2], bestpar[3]),
rho.max = rho.max,
robust = robust,
nu = nu,
opt_method = opt_method,
optim.fit = bestfit,
par_opt=c("rho","sigma_M","sigma_R","R0")),
class="par.fit")
fit
}
fit.par.mr <- function (Y, # The sequence to which a PAR model is to be fit
robust=FALSE, # If TRUE, robust estimations are performed
opt_method=c("css","kfas", "ss"), # Optimization method to be used.
# kfas = KFAS Kalman filter implementation
# ss = Steady State Kalman filter
# css = C-coded steady state Kalman filter
rho.max = 1, # An upper bound to be applied to the estimate
# of rho, the coefficient of mean-reversion.
# See par.rho.cutoff() for a reasonable value.
nu=par.nu.default() # If robust=TRUE, the degrees of freedom parameter
) {
# Given a PAR sequence X, creates a Kalman filter representation
#
# X[t] = [M[t] R[t]],
#
# where
#
# M[t] = rho*M[t-1] + sigma_M * eps_M,t
# R[t] = R[t-1] + sigma_R * eps_R,t
# eps_M,t ~ N(0,1)
# eps_R,t ~ N(0,1)
#
# Estimates the values of rho and sigma_M using
# maximum likelihood fitting of the Kalman filter,
# assuming that sigma_R=0.
#
# Returns an S3 object of class par.fit, representing the fit
# that was obtained.
opt_method <- match.arg(opt_method)
Y <- as.numeric(Y)
if (any(is.na(Y))) stop("The PAR model cannot be fit to sequences containing NA's")
if (length(Y) < 50) stop("The PAR model cannot be fit to sequences of length < 50")
if (robust) {
ll_calc_method <- switch(opt_method,
ss = "sst",
css = "csst",
kfas = stop("robust estimation not implemented for opt_method = kfas")
)
} else {
ll_calc_method <- opt_method
}
high_value <- NA
objective <- function (p) {
# (rho, sigma_M, R[0])
if (p[2] <= 0) return(high_value)
if (abs(p[1]) > 1) return(high_value)
# cat(p, " -> ", loglik.par (Y, p[1], p[2], 0, 0, p[3], calc_method=ll_calc_method, nu=nu), "\n")
loglik.par (Y, p[1], p[2], 0, 0, p[3], calc_method=ll_calc_method, nu=nu)
}
# Set up some initial values to be used as starting points
# for the optimization
start_list <- list()
sddy <- sd(diff(Y), na.rm=TRUE)
# if (is.na(sddy) || sddy == 0) sddy <- 1
if (is.na(sddy) || (sddy == 0)) sddy <- median(abs(diff(Y)), na.rm=TRUE)
if (sddy == 0) stop("The PAR model cannot be fit to constant sequences")
start_list <- c(start_list, list(c(0,sddy,Y[1])))
start_list <- c(start_list, list(c(0,sddy,mean(Y))))
start_list <- c(start_list, list(c(estimate.par(Y, rho.max=rho.max)[1:2], Y[1])))
start_list <- c(start_list, list(c(estimate.par(Y, rho.max=rho.max)[1:2], mean(Y))))
# Search for the best global optimum, starting from each of the
# starting points given in start_list
best_value <- objective(start_list[[1]])+1
for (start in start_list) {
if (any(is.na(start))) next;
high_value <- objective(start) + 1
rfit <- optim(start, objective, hessian=TRUE, method="L-BFGS-B",
lower=c(-rho.max,0,-Inf), upper=c(rho.max, 2*sddy, Inf))
# lower=c(0,0,-Inf), upper=c(rho.max, 2*sddy, Inf))
if (rfit$value < best_value) {
bestfit <- rfit
best_value <- rfit$value
# cat(sprintf("r %6.2f rho %8.4f sigma_M %8.4f sigma_R %8.4f -> %8.4f\n",
# rrho, bestfit$par[1], bestfit$par[2], bestfit$par[3], bestfit$value))
}
}
# L-BGFS-B seems to terminate prematurely in many cases, so run a few extra
# iterations starting from our best estimate of a local minimum,
# just to be sure we have found a local minimum
pvalue <- best_value + 1
maxiter <- 5
while (abs(pvalue - best_value) > 1e-6 && maxiter > 0) {
pvalue <- best_value
bestfit <- optim(bestfit$par, objective, hessian=TRUE, method="L-BFGS-B",
lower=c(-rho.max,0,-Inf), upper=c(rho.max, 2*sddy, Inf))
best_value <- bestfit$value
maxiter <- maxiter - 1
}
# Convert the hessian of the fit to a standard error
ps <- rep(NA_real_, nrow(bestfit$hessian))
suppressWarnings(try(ps <- sqrt(diag(solve(bestfit$hessian))), silent=TRUE))
ps <- c(ps[1:2], NA, NA, ps[3])
names(ps) <- c("rho.se", "sigma_M.se", "sigma_R.se", "M0.se", "R0.se")
bestpar <- c(bestfit$par[1:2], 0, 0, bestfit$par[3])
names(bestpar) <- c("rho", "sigma_M", "sigma_R", "M0", "R0")
# recover()
fit <- structure(list(data=Y,
rho = bestpar[1],
sigma_M = bestpar[2],
sigma_R = bestpar[3],
M0 = bestpar[4],
R0 = bestpar[5],
par = bestpar,
stderr = ps,
negloglik = bestfit$value,
lambda = 0,
pvmr = c(pvmr=1),
rho.max = rho.max,
robust = robust,
nu = nu,
opt_method = opt_method,
optim.fit = bestfit,
par_opt=c("rho", "sigma_M", "R0")),
class="par.fit")
fit
}
fit.par.rw <- function (Y, # The sequence to which a PAR model is to be fit
robust=FALSE, # If TRUE, robust estimations are performed
opt_method=c("css", "kfas", "ss"), # Optimization method to be used.
# kfas = KFAS Kalman filter
# ss = Steady State Kalman filter
# css = C-coded steady state Kalman filter
nu=par.nu.default() # If robust=TRUE, the degrees of freedom parameter
) {
# Given a PAR sequence X, creates a Kalman filter representation
#
# X[t] = [M[t] R[t]],
#
# where
#
# M[t] = rho*M[t-1] + sigma_M * eps_M,t
# R[t] = R[t-1] + sigma_R * eps_R,t
# eps_M,t ~ N(0,1)
# eps_R,t ~ N(0,1)
#
# Estimates the values of sigma_R using
# maximum likelihood fitting of the Kalman filter,
# assuming that rho=sigma_M=0.
#
# Returns an S3 object of class par.fit, representing the fit
# that was obtained.
opt_method <- match.arg(opt_method)
Y <- as.numeric(Y)
if (any(is.na(Y))) stop("The PAR model cannot be fit to sequences containing NA's")
if (length(Y) < 50) stop("The PAR model cannot be fit to sequences of length < 50")
if (robust) {
ll_calc_method <- switch(opt_method,
ss = "sst",
css = "csst",
kfas = stop("robust estimation not implemented for opt_method = kfas")
)
} else {
ll_calc_method <- opt_method
}
obj <- function(sr) loglik.par(Y, 0, 0, sr, 0, Y[1], ll_calc_method, nu)
sddy <- sd(diff(Y), na.rm=TRUE)
res <- optimize(obj, lower=sddy*0.00001, upper=2*sddy+1)
sigma_R <- res$minimum
negloglik <- res$objective
se <- sigma_R / sqrt(length(Y))
par <- c(rho=0,sigma_M=0,sigma_R=sigma_R,M0=0,R0=Y[1])
stderr <- c(rho.se=NA_real_, sigma_M.se=NA_real_, sigma_R.se=se, M0.se=NA_real_, R0.se=0)
fit <- structure(list(data=Y,
rho = 0,
sigma_M = 0,
sigma_R = sigma_R,
M0 = par[4],
R0 = par[5],
par = par,
stderr = stderr,
negloglik = negloglik,
lambda = 0,
pvmr = c(pvmr=0),
rho.max = 1.0,
robust = robust,
nu = nu,
opt_method = opt_method,
par_opt=c("sigma_R")),
class="par.fit")
fit
}
fit.par <- function (Y, # The sequence to which a PAR model is to be fit
robust=FALSE, # If TRUE, robust estimations are performed
model=c("par", "ar1", "rw"), # Specifies which parameters are to be estimated
# par = estimate rho, sigma_M and sigma_R
# ar1 = estimate rho and sigma_M, assuming sigma_R = 0
# rw = estimate sigma_R, assuming rho = sigma_M = 0
lambda=0, # A penalty to be applied to the random walk variance,
# intended to drive the solution towards one with the
# minimum random walk variance.
opt_method=c("css", "kfas", "ss"), # Optimization method to be used.
# kfas = KFAS Kalman filter method
# ss = Steady State Kalman filter
# css = C-coded steady state Kalman filter
rho.max = 1, # An upper bound to be applied to the estimate
# of rho, the coefficient of mean-reversion.
# See par.rho.cutoff() for a reasonable approach
nu=par.nu.default() # If robust is TRUE, the degrees of freedom parameter
) {
# Given a PAR sequence X, creates a Kalman filter representation
#
# X[t] = [M[t] R[t]],
#
# where
#
# M[t] = rho*M[t-1] + sigma_M * eps_M,t
# R[t] = R[t-1] + sigma_R * eps_R,t
# eps_M,t ~ N(0,1)
# eps_R,t ~ N(0,1)
#
# Estimates the values of rho, sigma_M and sigma_R using
# maximum likelihood fitting of the Kalman filter.
#
# Returns an S3 object of class par.fit, representing the fit
# that was obtained.
#
# If lambda is non-zero, then it represents a penalty factor that is
# used to drive the optimal solution towards the mean reverting
# solution.
#
# The parameter "par_opt" specifies which of the model parameters to
# optimize.
opt_method <- match.arg(opt_method)
model <- match.arg(model)
Yorig <- Y
if (!is.null(dim(Y))) {
if (dim(Y)[2] > 1) stop("Y must be a single column")
Y <- Y[,1]
}
if (length(Y) < 50) stop("Y does not contain enough data to generate a reliable estimate")
if (any(is.na(Y))) stop("fit.par does not accept data containing NA's")
if (sd(Y) == 0) stop("The input sequence Y is a constant sequence")
Y <- coredata(Y)
A <- switch (model,
par = fit.par.both(Y, lambda=lambda, opt_method=opt_method, rho.max=rho.max, robust=robust, nu=nu),
ar1 = fit.par.mr(Y, opt_method=opt_method, rho.max=rho.max, robust=robust, nu=nu),
rw = fit.par.rw(Y, opt_method=opt_method, robust=robust, nu=nu))
if (is.zoo(Yorig)) {
A$index <- index(Yorig)
} else {
A$index <- 1:length(Y)
}
A$model <- model
A
}
statehistory.par <- function(A, data=A$data) {
# On input, A is an par.fit object as produced by fit.par.
# Creates a data.frame containing the inferred values of
# the states of the mean-revering and random walk components
# of the process, based upon the model parameters that were fit.
#
# Returns a data.frame containing the following columns:
# X: The value of the process at this time
# M: The inferred state of the mean reverting component
# R: The inferred state of the random walk component
# eps_M: The inferred shock to the mean reverting component
# eps_R: The inferred shock to the random walk component
n <- length(data)
M <- numeric(n)
R <- numeric(n)
eps_M <- numeric(n)
eps_R <- numeric(n)
Mprev <- A$par[4]
Rprev <- A$par[5]
K <- kalman.gain.par(A$rho, A$sigma_M, A$sigma_R)
for (i in 1:n) {
xhat <- A$rho * Mprev + Rprev
e <- as.numeric(data[i]) - xhat
eps_M[i] <- e * K[1]
eps_R[i] <- e * K[2]
M[i] <- A$rho * Mprev + eps_M[i]
R[i] <- Rprev + eps_R[i]
Mprev <- M[i]
Rprev <- R[i]
}
df <- data.frame(X=data, M=M, R=R, eps_M=eps_M, eps_R=eps_R)
colnames(df) <- c("X", "M", "R", "eps_M", "eps_R")
# if (is(data, "zoo")) df <- zoo(df, index(data))
df
}
print.par.fit <- function (x, ...) {
# Given a PAR structure A, prints it in summary form
print.internal.par.fit(x)
}
print.internal.par.fit <- function (A) {
# Given a PAR structure A, prints it in summary form
printf("Fitted model:\n")
printf(" X[t] = M[t] + R[t]\n")
printf(" M[t] = %.4f M[t-1] + eps_M,t, ", A$rho)
if (!A$robust) {
printf("eps_M,t ~ N(0, %.4f^2)\n", A$sigma_M)
} else {
printf("eps_M,t ~ t(0, %.4f, %.2f)\n", A$sigma_M, A$nu)
}
if (!is.null(A$stderr)) printf("%8s(%.4f)%33s(%.4f)\n", "", A$stderr[1], "", A$stderr[2])
if (!A$robust) {
printf(" R[t] = R[t-1] + eps_R,t, eps_R,t ~ N(0, %.4f^2)\n", A$sigma_R)
} else {
printf(" R[t] = R[t-1] + eps_R,t, eps_R,t ~ t(0, %.4f, %.2f)\n", A$sigma_R, A$nu)
}
if (!is.null(A$stderr)) printf("%49s(%.4f)\n", "", A$stderr[3])
printf(" M_0 = %.4f, R_0 = %.4f\n", A$M0, A$R0)
if (!is.null(A$stderr)) {
if (is.na(A$stderr[4])) {
printf(" (NA) (%.4f)\n", A$stderr[5])
} else {
printf(" (%.4f) (%.4f)\n", A$stderr[4], A$stderr[5])
}
}
if (!("sigma_M" %in% A$par_opt)) cat("(Only sigma_R was fit; this is a random walk model.)\n")
if (!("sigma_R" %in% A$par_opt)) cat("(Only rho and sigma_M were fit; this is a pure AR(1) model.)\n")
printf("Proportion of variance attributable to mean reversion (pvmr) = %.4f\n", A$pvmr)
printf("Negative log likelihood = %.2f\n", A$negloglik)
}
plot.par.fit <- function (x, ...) {
# Given a PAR structure A, plots it.
plot.internal.par.fit(x)
}
plot.internal.par.fit <- function (A) {
# Given a PAR structure A, plots it.
# Make R CMD check happy
Date <- Value <- Label <- Facet <- ll <- par.rw0.rho_quantile_table <- NULL
sh <- statehistory.par(A)
n <- nrow(sh)
df1.1 <- data.frame(Date=A$index, Label="Actual", Value=sh$X)
df1.2 <- data.frame(Date=A$index, Label="Model", Value=sh$R)
df1 <- rbind(df1.1, df1.2)
p1 <- ggplot (df1, aes(x=Date, y=Value, colour=Label)) + geom_line () +
ylab("Price") + xlab("") + theme(legend.position="top") +
scale_colour_manual(name="",
labels=c("Actual", "Model"),
# values=c("Black", "#0054A6", "#00AEEF")) + # Black, Blue, Cyan
values=c("Black", "#00A651")) + # Black, Green, Cyan
# scale_colour_discrete(name="") +
ggtitle("Price Series")
df2 <- data.frame(Date=A$index, Label="M[t]", Value=sh$M)
sdR <- sd(sh$M)
hlines <- data.frame(Value=c(2 * sdR, sdR, -sdR, -2 * sdR),
Facet=c("two","one","one","two"))
p2 <- ggplot(df2, aes(x=Date, y=Value)) + geom_line() +
ggtitle("Mean Reverting Component") + ylab("Price") + xlab("") +
geom_hline(data=hlines, aes(yintercept=Value, colour=Facet), linetype="dashed")
grid.newpage()
pushViewport(viewport(layout=grid.layout(9, 1)))
print(p1, vp=viewport(layout.pos.row=1:5, layout.pos.col=1))
print(p2, vp=viewport(layout.pos.row=6:9, layout.pos.col=1))
}
plot.par.likelihood.neighborhood <- function (A, rho_lim=c(), sigma_M_lim=c(), sigma_R_lim=c(), R0_lim=c(),
maxview=FALSE) {
if (!is(A, "par.fit")) stop("A must be an par.fit")
ll <- NULL
rho <- A$rho
sigma_M <- A$sigma_M
sigma_R <- A$sigma_R
R0 <- A$R0
dsd <- sd(diff(A$data))
scaled_seq <- function (x, xse, xmin, xmax) {
if (!is.na(xse) & !is.na(x)) {
xlow <- x - xse
xhigh <- x + xse
} else if (!is.na(x)) {
xlow <- max(xmin, x - abs(x) * 0.2)
xhigh <- min(xmax, x + abs(x) * 0.2)
} else {
xlow <- xmin
xhigh <- xmax
}
xlow <- max(xlow, xmin)
xhigh <- min(xhigh, xmax)
seq(xlow, xhigh, length.out=200)
}
if (!missing(rho_lim)) {
rho_inc <- seq(rho_lim[1], rho_lim[2], length.out=200)
} else if (maxview) {
rho_inc <- seq(-1,1,length.out=200)
} else {
rho_inc <- scaled_seq(A$rho, A$stderr["rho.se"], -1, 1)
}
ll_by_rho <- sapply(rho_inc, function(r) loglik.par(A$data, r, A$sigma_M, A$sigma_R, 0, A$R0))
rho.df <- data.frame(rho = rho_inc, ll = ll_by_rho)
if (!missing(sigma_M_lim)) {
sigma_M_inc <- seq(sigma_M_lim[1], sigma_M_lim[2], length.out=200)
} else if (maxview) {
sigma_M_inc <- seq(0, 2 * dsd, length.out=200)
} else {
sigma_M_inc <- scaled_seq(A$sigma_M, A$stderr["sigma_M.se"], 0, 2 * dsd)
}
ll_by_sigma_M <- sapply(sigma_M_inc, function(s) loglik.par(A$data, A$rho, s, A$sigma_R, 0, A$R0))
sigma_M.df <- data.frame(sigma_M = sigma_M_inc, ll=ll_by_sigma_M)
if (!missing(sigma_R_lim)) {
sigma_R_inc <- seq(sigma_R_lim[1], sigma_R_lim[2], length.out=200)
} else if (maxview) {
sigma_R_inc <- seq(0, 2 * dsd, length.out=200)
} else {
sigma_R_inc <- scaled_seq(A$sigma_R, A$stderr["sigma_R.se"], 0, 2 * dsd)
}
ll_by_sigma_R <- sapply(sigma_R_inc, function(s) loglik.par(A$data, A$rho, A$sigma_M, s, 0, A$R0))
sigma_R.df <- data.frame(sigma_R = sigma_R_inc, ll=ll_by_sigma_R)
if (!missing(R0_lim)) {
R0_inc <- seq(R0_lim[1], R0_lim[2], length.out=200)
} else if (maxview) {
R0_inc <- scaled_seq(A$R0, A$stderr["R0.se"], A$data[1] - 2 * dsd, A$data[1] + 2 * dsd)
} else {
R0_inc <- scaled_seq(A$R0, A$stderr["R0.se"], A$data[1] - 2 * dsd, A$data[1] + 2 * dsd)
}
ll_by_R0 <- sapply(R0_inc, function(r) loglik.par(A$data, A$rho, A$sigma_M, A$sigma_R, 0, r))
R0.df <- data.frame(R0 = R0_inc, ll=ll_by_R0)
p.rho <- ggplot(rho.df, aes(x=rho, y=ll)) + geom_line() +
geom_vline(xintercept=rho, colour="red") +
ylab("-Log Likelihood") + xlab(expression(rho))
p.sigma_M <- ggplot(sigma_M.df, aes(x=sigma_M, y=ll)) + geom_line() +
geom_vline(xintercept=sigma_M, colour="red") +
ylab("-Log Likelihood") + xlab(expression(sigma[M]))
p.sigma_R <- ggplot(sigma_R.df, aes(x=sigma_R, y=ll)) + geom_line() +
geom_vline(xintercept=sigma_R, colour="red") +
ylab("-Log Likelihood") + xlab(expression(sigma[R]))
p.R0 <- ggplot(R0.df, aes(x=R0, y=ll)) + geom_line() +
geom_vline(xintercept=R0, colour="red") +
ylab("-Log Likelihood") + xlab(expression(R[0]))
grid.newpage()
pushViewport(viewport(layout=grid.layout(2,2)))
print(p.rho, vp=viewport(layout.pos.row=1, layout.pos.col=1))
print(p.sigma_M, vp=viewport(layout.pos.row=1, layout.pos.col=2))
print(p.sigma_R, vp=viewport(layout.pos.row=2, layout.pos.col=1))
print(p.R0, vp=viewport(layout.pos.row=2, layout.pos.col=2))
}
plot.par.likelihood.neighborhood.3d <- function (A, rho_lim=c(), sigma_M_lim=c(), sigma_R_lim=c(), R0_lim=c()) {
if (!is(A, "par.fit")) stop("A must be an par.fit")
# opar <- par(mfrow=c(3,2))
# on.exit(par(opar))
npoints <- 50
rho <- A$rho
sigma_M <- A$sigma_M
sigma_R <- A$sigma_R
R0 <- A$R0
dsd <- sd(diff(A$data))
scaled_seq <- function (x, xse, xmin, xmax) {
if (!is.na(xse) & !is.na(x)) {
xlow <- x - xse
xhigh <- x + xse
} else if (!is.na(x)) {
xlow <- max(xmin, x - abs(x) * 0.2)
xhigh <- min(xmax, x + abs(x) * 0.2)
} else {
xlow <- xmin
xhigh <- xmax
}
xlow <- max(xlow, xmin)
xhigh <- min(xhigh, xmax)
seq(xlow, xhigh, length.out=npoints)
}
if (missing(rho_lim)) {
rho_inc <- scaled_seq(A$rho, A$stderr["rho.se"], -1, 1)
} else {
rho_inc <- seq(rho_lim[1], rho_lim[2], length.out=npoints)
}
if (missing(sigma_M_lim)) {
sigma_M_inc <- scaled_seq(A$sigma_M, A$stderr["sigma_M.se"], 0, 2 * dsd)
} else {
sigma_M_inc <- seq(sigma_M_lim[1], sigma_M_lim[2], length.out=npoints)
}
if (missing(sigma_R_lim)) {
sigma_R_inc <- scaled_seq(A$sigma_R, A$stderr["sigma_R.se"], 0, 2 * dsd)
} else {
sigma_R_inc <- seq(sigma_R_lim[1], sigma_R_lim[2], length.out=npoints)
}
if (missing(R0_lim)) {
R0_inc <- scaled_seq(A$R0, A$stderr["R0.se"], A$data[1] - 2 * dsd, A$data[1] + 2 * dsd)
} else {
R0_inc <- seq(R0_lim[1], R0_lim[2], length.out=npoints)
}
frho_sigma_M <- function(r,sm) -loglik.par(A$data, r, sm, sigma_R, 0, R0)
frho_sigma_R <- function(r,sr) -loglik.par(A$data, r, sigma_M, sr, 0, R0)
frho_R0 <- function(r,r0) -loglik.par(A$data, r, sigma_M, sigma_R, 0, r0)
fsigma_M_sigma_R <- function(sm,sr) -loglik.par(A$data, rho, sm, sr, 0, R0)
fsigma_M_R0 <- function(sm,r0) -loglik.par(A$data, rho, sm, sigma_R, 0, r0)
fsigma_R_R0 <- function(sr,r0) -loglik.par(A$data, rho, sigma_M, sr, 0, r0)
vfrho_sigma_M <- Vectorize(frho_sigma_M)
vfrho_sigma_R <- Vectorize(frho_sigma_R)
vfrho_R0 <- Vectorize(frho_R0)
vfsigma_M_sigma_R <- Vectorize(fsigma_M_sigma_R)
vfsigma_M_R0 <- Vectorize(fsigma_M_R0)
vfsigma_R_R0 <- Vectorize(fsigma_R_R0)
rho_sigma_M <- outer(rho_inc, sigma_M_inc, vfrho_sigma_M)
rho_sigma_R <- outer(rho_inc, sigma_R_inc, vfrho_sigma_R)
rho_R0 <- outer(rho_inc, R0_inc, vfrho_R0)
sigma_M_sigma_R <- outer(sigma_M_inc, sigma_R_inc, vfsigma_M_sigma_R)
sigma_M_R0 <- outer(sigma_M_inc, R0_inc, vfsigma_M_R0)
sigma_R_R0 <- outer(sigma_R_inc, R0_inc, vfsigma_R_R0)
# title(main="", xlab=expression(rho), ylab=expression(sigma[M]))
# persp(rho_inc, sigma_M_inc, rho_sigma_M, theta=120, phi=30, expand=0.5, col="lightblue",
# ltheta=120, shade=0.75, zlab="", ticktype="detailed")
# require(plot3D)
persp3D(rho_inc, sigma_M_inc, rho_sigma_M, xlab="rho", ylab="sigma_M", clab=c("Log", "Likelihood"))
readline("Press enter for next graph: ")
persp3D(rho_inc, sigma_R_inc, rho_sigma_R, xlab="rho", ylab="sigma_R", clab=c("Log", "Likelihood"))
readline("Press enter for next graph: ")
persp3D(rho_inc, R0_inc, rho_R0, xlab="rho", ylab="R0", clab=c("Log", "Likelihood"))
readline("Press enter for next graph: ")
persp3D(sigma_M_inc, sigma_R_inc, sigma_M_sigma_R, xlab="sigma_M", ylab="sigma_R", clab=c("Log", "Likelihood"))
readline("Press enter for next graph: ")
persp3D(sigma_M_inc, R0_inc, sigma_M_R0, xlab="sigma_M", ylab="R0", clab=c("Log", "Likelihood"))
readline("Press enter for next graph: ")
persp3D(sigma_R_inc, R0_inc, sigma_R_R0, xlab="sigma_R", ylab="R0", clab=c("Log", "Likelihood"))
# return(1)
# persp(rho_inc, sigma_R_inc, rho_sigma_R, theta=30, phi=30, expand=0.5, col="lightblue",
# ltheta=120, shade=0.75, ticktype="detailed", xlab=expression(rho), ylab=expression(sigma[R]))
# persp(rho_inc, R0_inc, rho_R0, theta=30, phi=30, expand=0.5, col="lightblue",
# ltheta=120, shade=0.75, ticktype="detailed", xlab=expression(rho), ylab=expression(R[0]))
# persp(sigma_M_inc, sigma_R_inc, sigma_M_sigma_R, theta=30, phi=30, expand=0.5, col="lightblue",
# ltheta=120, shade=0.75, ticktype="detailed", xlab=expression(sigma[M]), ylab=expression(sigma[R]))
# persp(sigma_M_inc, R0_inc, sigma_M_R0, theta=30, phi=30, expand=0.5, col="lightblue",
# ltheta=120, shade=0.75, ticktype="detailed", xlab=expression(sigma[M]), ylab=expression(R[0]))
# persp(sigma_R_inc, R0_inc, sigma_R_R0, theta=30, phi=30, expand=0.5, col="lightblue",
# ltheta=120, shade=0.75, ticktype="detailed", xlab=expression(sigma[R]), ylab=expression(R[0]))
}
as.data.frame.par.fit <- function (x, row.names, optional, ...) {
# On input, A is the result of fitting a partially autoregressive series.
# Creates a single row data.frame from A containing the key values of the fit.
# The columns in the data.frame are:
# rho
# sigma_M
# sigma_R
# rho.se
# sigma_M.se
# sigma_R.se
# pvmr
as.data.frame.internal.par.fit (x)
}
as.data.frame.internal.par.fit <- function (A) {
# On input, A is the result of fitting a partially autoregressive series.
# Creates a single row data.frame from A containing the key values of the fit.
# The columns in the data.frame are:
# rho
# sigma_M
# sigma_R
# rho.se
# sigma_M.se
# sigma_R.se
# pvmr
df <- data.frame(robust=A$robust,
nu=A$nu,
opt_method=A$opt_method,
n=length(A$data),
rho=A$rho,
sigma_M=A$sigma_M,
sigma_R=A$sigma_R,
M0=A$par[4],
R0=A$par[5],
rho.se=A$stderr[1],
sigma_M.se=A$stderr[2],
sigma_R.se=A$stderr[3],
M0.se=A$stderr[4],
R0.se=A$stderr[5],
lambda=A$lambda,
pvmr=A$pvmr,
negloglik=A$negloglik)
rownames(df) <- NULL
df
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.