# R/fit.R In matthewclegg/partialAR: Partial Autoregression

```# fit.R -- functions for fitting partially AR(1) models
# Copyright (C) 2015 Matthew Clegg

#  This program is free software; you can redistribute it and/or modify
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  A copy of the GNU General Public License is available at

par.rho.cutoff <- function (n, p=0.05) {
# Calculates a cutoff value c for rho, such that the probability
# is no more than p that fitting a random walk of length n to a PAR
# model will produce a model with rho less than c.
#
# The model is calibrated according to the formula
#    (1 - rho) * n = c_p

if (!is.numeric(n)) stop("argument n must be numeric")

pvalues <- c(0.001, 0.01, 0.025, 0.05, 0.1, 0.2, 0.5, 0.8, 0.9)
cp_list <- c(28.1, 20.3, 16.5, 13.8, 11.1, 8, 3.4, 0.33, 0)

cp <- approx(pvalues, cp_list, p, rule=2)
rho <- 1 - cp\$y/n
rho[n < 50] <- NA_real_
rho
}

par.nu.default <- function () {
# The default value to be used for the degrees of freedom parameter
# when using robust estimation.

5
}

estimate.rho.par <- function (X) {
# Computes an estimate of mean reversion for the mean-reverting
# portion of a PAR process.  If v[k] = Var[X[t+k]-X[t]], then
# rho is given by the variance formula:
#   rho = - (v[1] - 2 * v[2] + v[3]) / (2 * v[1] - v[2])
# This method seems to have a larger standard error than the
# median of ratios method used in the previous function.
Xc <- coredata(X)
if (length(Xc) < 5) return(NA_real_)
I <- 1:(length(Xc)-3)

X1X0 <- Xc[I+1] - Xc[I]
X2X0 <- Xc[I+2] - Xc[I]
X3X0 <- Xc[I+3] - Xc[I]

xv1 <- var(X1X0)
xv2 <- var(X2X0)
xv3 <- var(X3X0)

rho <- -(xv1 - 2 * xv2 + xv3)/(2 * xv1 - xv2)
if (is.na(rho)) return(NA_real_)
if (rho < -0.99) rho <- -1
if (rho > 0.99) rho <- 1

rho
}

estimate.sigma.par <- function (X, rho=estimate.rho.par(X), rho.max = 1) {
# Estimates sigma^2_m and sigma^2_r using the variance formulas:
#   sigma^2_m = (1/2) ((rho + 1)/(rho - 1)) * (v[2] - 2 * v[1])
#   sigma^2_r = (1/2) (v[2] - 2 * sigma^2_m)
# where
#   v[k] = var(X[t+k] - X[t])

Xc <- coredata(X)
I <- 1:(length(Xc) - 2)

if (rho > rho.max) rho <- rho.max
if (rho > 0.99) return (c(0, sd(diff(X))))
vard1 <- var(Xc[I+1] - Xc[I])
vard2 <- var(Xc[I+2] - Xc[I])
sigma2_M <- (1/2) * ((rho + 1)/(rho - 1)) * (vard2 - 2 * vard1)
if (sigma2_M > 0.5 * vard2) sigma2_M <- 0.5 * vard2
if (sigma2_M < 0) sigma2_M <- 0

sigma2_R <- (1/2) * (vard2 - 2 * sigma2_M)
#	if (sigma2_R < 0) sigma2_R <- 0

sigma_M <- sqrt(sigma2_M)
sigma_R <- sqrt(sigma2_R)

c(sigma_M=sigma_M, sigma_R=sigma_R)
}

estimate.par <- function (X, useR = FALSE, rho.max = 1) {
# Estimates the parameters of a PAR process.
# Returns a three component vector (rho, sigma_M, sigma_R).
#
# If useR is TRUE, then the R implementation is used.
# Otherwise, the C++ implementation is used (about 8X faster).

if (useR) {
rho_hat <- estimate.rho.par(X)
sigma_hat <- estimate.sigma.par(X, rho_hat, rho.max)
res <- c(rho=rho_hat, sigma_M=sigma_hat[1], sigma_R=sigma_hat[2])
} else {
res <- estimate_par_c(X, rho.max)
}
names(res) <- c("rho", "sigma_M", "sigma_R")
res
}

pvmr.par <- function(rho, sigma_M, sigma_R) {
# Returns the proportion of variance attributable to mean reversion
# for a PAR process with parameters rho, sigma_M and sigma_R

if (abs(rho) > 1 || sigma_M < 0 || sigma_R < 0) return(c(pvmr=NA_real_))

if (missing(sigma_M) && missing(sigma_R) && (length(rho) == 3)) {
sigma_M <- rho[2]
sigma_R <- rho[3]
rho <- rho[1]
}

r2 <- (2 * sigma_M^2) / (2 * sigma_M^2 + (1 + rho) * sigma_R^2)
names(r2) <- "pvmr"
r2
}

kalman.gain.par <- function(rho, sigma_M, sigma_R) {
# Returns the Kalman gain vector associated to a steady state PAR
# process with parameters rho, sigma_M and sigma_R

if (missing(sigma_M) && missing(sigma_R) && (length(rho) == 3)) {
sigma_M <- rho[2]
sigma_R <- rho[3]
rho <- rho[1]
}

if (sigma_M == 0 && sigma_R == 0) return (c(NA_real_, NA_real_))
if (sigma_M == 0) return (c(0,1))
if (sigma_R == 0) return (c(1,0))

rad <- sqrt((rho + 1)^2 * sigma_R^2 + 4 * sigma_M^2)

num1 <- 2 * sigma_M^2
den1 <- sigma_R*(rad + (1+ rho) * sigma_R) + 2 * sigma_M^2
num2 <- 2 * sigma_R
den2 <- rad + (1 - rho) * sigma_R

c(num1/den1, num2/den2)
}

kalman.gain.from.pvmr <- function (rho, pvmr) {
# Computes the Kalman gain as a function of rho and R^2[MR]

if (is.na(rho) || is.na(pvmr)) return(c(NA_real_, NA_real_))
if (rho < -1 || rho > 1) stop("invalid value for rho: ", rho)
if (pvmr < 0 || pvmr > 1) stop("invalid value for pvmr: ", pvmr)
if (pvmr == 0) return(c(0,1))
if (pvmr == 1) return(c(1,0))

sigma_M <- sqrt((1 + rho) * pvmr / (2 - 2 * pvmr))
sigma_R <- 1
kalman.gain.par(rho, sigma_M, sigma_R)
}

fit.par.both <- function (Y,    # The sequence to which a PAR model is to be fit
robust=FALSE,            # If TRUE, robust estimations are performed
lambda=0,                # A penalty to be applied to the random walk variance,
# intended to drive the solution towards one with the
# minimum random walk variance.
opt_method=c("css", "kfas", "ss"), # Optimization method to be used.
# kfas = KFAS Kalman filter implementation
# ss = Steady State Kalman filter
# css = C-coded steady state Kalman filter
rho.max = 1,            # An upper bound to be applied to the estimate
# of rho, the coefficient of mean-reversion.
# See par.rho.cutoff() for a reasonable upper bound.
nu=par.nu.default()     # If robust=TRUE, the degrees of freedom parameter
) {
# Given a PAR sequence X, creates a Kalman filter representation
#
#  X[t] = [M[t] R[t]],
#
# where
#
#   M[t] = rho*M[t-1] + sigma_M * eps_M,t
#   R[t] = R[t-1] + sigma_R * eps_R,t
#   eps_M,t ~ N(0,1)
#   eps_R,t ~ N(0,1)
#
# Estimates the values of rho, sigma_M and sigma_R using
# maximum likelihood fitting of the Kalman filter.
#
# If lambda is positive, then it represents a penalty factor that is
# used to drive the optimal solution towards the mean reverting
# solution.  A negative value of lambda will drive the optimal solution
# towards a random walk.
#
# Returns an S3 object of class par.fit, representing the fit
# that was obtained.

opt_method <- match.arg(opt_method)
Y <- as.numeric(Y)
if (any(is.na(Y))) stop("The PAR model cannot be fit to sequences containing NA's")
if (length(Y) < 50) stop("The PAR model cannot be fit to sequences of length < 50")

if (robust) {
ll_calc_method <- switch(opt_method,
ss = "sst",
css = "csst",
kfas = stop("robust estimation not implemented for opt_method = kfas")
)
} else {
ll_calc_method <- opt_method
}

high_value <- NA
objective <- function (p) {
# p = (rho, sigma_M, sigma_R, R0)
if ((p[1] < -rho.max) || (p[1] > rho.max)) return(high_value)
if ((p[2] < 0) || (p[3] < 0)) return(high_value)
if ((p[2] == 0) && (p[3] == 0)) return(high_value)
loglik.par (Y, p[1], p[2], p[3], 0, p[4], calc_method=ll_calc_method, nu=nu) + lambda * p[3]
}

#    obj2 <- function(p) { cat(p, " -> ", objective(p), "\n"); objective(p) }
# optim(start, obj2, hessian=TRUE, method="L-BFGS-B", lower=c(0,0,0,-Inf,-Inf), upper=c(rho.max,2*sddy,2*sddy,Inf,Inf), control=list(trace=4))

# Set up some initial values to be used as starting points
# for the optimization
start_list <- list()
sddy <- sd(diff(Y), na.rm=TRUE)
if (is.na(sddy) || (sddy == 0)) sddy <- median(abs(diff(Y)),na.rm=TRUE)
if (sddy == 0) stop("The PAR model cannot be fit to constant sequences")

fit.mr <- fit.par.mr (Y, opt_method=opt_method, rho.max=rho.max, robust=robust, nu=nu)
fit.rw <- fit.par.rw (Y, opt_method=opt_method, robust=robust, nu=nu)
start_list <- c(start_list, list(fit.mr\$par[c(1,2,3,5)]))
start_list <- c(start_list, list(fit.rw\$par[c(1,2,3,5)]))
start_list <- c(start_list, list(c(estimate.par(Y, rho.max=rho.max), Y[1])))

#    start_list <- c(start_list, list(c(0,0,sddy,0,Y[1])))
#    start_list <- c(start_list, list(c(0,sddy,0,Y[1],0)))
#    start_list <- c(start_list, list(c(0.5, estimate.sigma.par(Y, 0.5)[1:2], 0, Y[1])))
#    start_list <- c(start_list, list(c(estimate.par(Y), 0, Y[1])))

# Search for the best global optimum, starting from each of the
# starting points given in start_list
best_value <- objective(start_list[[1]])+1
for (start in start_list) {
if (any(is.na(start))) next
high_value <- objective(start) + 1
rfit.nm <- optim(start, objective)
rfit.lbfgsb <- optim(rfit.nm\$par, objective, method="L-BFGS-B",
lower=c(-rho.max,0,0,-Inf), upper=c(rho.max, 2*sddy, 2*sddy, Inf))
#            lower=c(0,0,0,-Inf), upper=c(rho.max, 2*sddy, 2*sddy, Inf))
rfit <- if (rfit.nm\$value < rfit.lbfgsb\$value) rfit.nm else rfit.lbfgsb
if (rfit\$value < best_value) {
bestfit <- rfit
best_value <- rfit\$value
#            cat(sprintf("r %6.2f rho %8.4f sigma_M %8.4f sigma_R %8.4f -> %8.4f\n",
#                rrho, bestfit\$par[1], bestfit\$par[2], bestfit\$par[3], bestfit\$value))
}
}

# L-BGFS-B seems to terminate prematurely in many cases, so run a few extra
# iterations starting from our best estimate of a local minimum,
# just to be sure we have found a local minimum
#    pvalue <- best_value + 1
#    maxiter <- 5
#    while (abs(pvalue - best_value) > 1e-6 && maxiter > 0) {
#        pvalue <- best_value
#        bestfit <- optim(bestfit\$par, objective, hessian=TRUE, method="L-BFGS-B",
#            lower=c(-rho.max,0,0,-Inf), upper=c(rho.max, 2*sddy, 2*sddy, Inf))
#        best_value <- bestfit\$value
#        maxiter <- maxiter - 1
#    }
bestfit <- optim(bestfit\$par, objective, hessian=TRUE)

# Convert the hessian of the fit to a standard error
ps <- rep(NA_real_, nrow(bestfit\$hessian))
suppressWarnings(try(ps <- sqrt(diag(solve(bestfit\$hessian))), silent=TRUE))
if (any(is.na(ps))) {
bestfit <- nlm(objective, bestfit\$par, hessian=TRUE)
suppressWarnings(try(ps <- sqrt(diag(solve(bestfit\$hessian))), silent=TRUE))
bestfit\$par <- bestfit\$estimate
bestfit\$value <- bestfit\$minimum
}
ps <- c(ps[1:3], NA, ps[4])
names(ps) <- c("rho.se", "sigma_M.se", "sigma_R.se", "M0.se", "R0.se")

bestpar <- c(bestfit\$par[1:3], 0.0, bestfit\$par[4])
names(bestpar) <- c("rho", "sigma_M", "sigma_R", "M0", "R0")

fit <- structure(list(data=Y,
rho = bestpar[1],
sigma_M = bestpar[2],
sigma_R = bestpar[3],
M0 = bestpar[4],
R0 = bestpar[5],
par = bestpar,
stderr = ps,
negloglik = bestfit\$value,
lambda = lambda,
pvmr = pvmr.par(bestpar[1], bestpar[2], bestpar[3]),
rho.max = rho.max,
robust = robust,
nu = nu,
opt_method = opt_method,
optim.fit = bestfit,
par_opt=c("rho","sigma_M","sigma_R","R0")),
class="par.fit")

fit
}

fit.par.mr <- function (Y, # The sequence to which a PAR model is to be fit
robust=FALSE,            # If TRUE, robust estimations are performed
opt_method=c("css","kfas", "ss"), # Optimization method to be used.
# kfas = KFAS Kalman filter implementation
# ss = Steady State Kalman filter
# css = C-coded steady state Kalman filter
rho.max = 1,             # An upper bound to be applied to the estimate
# of rho, the coefficient of mean-reversion.
# See par.rho.cutoff() for a reasonable value.
nu=par.nu.default()      # If robust=TRUE, the degrees of freedom parameter
) {
# Given a PAR sequence X, creates a Kalman filter representation
#
#  X[t] = [M[t] R[t]],
#
# where
#
#   M[t] = rho*M[t-1] + sigma_M * eps_M,t
#   R[t] = R[t-1] + sigma_R * eps_R,t
#   eps_M,t ~ N(0,1)
#   eps_R,t ~ N(0,1)
#
# Estimates the values of rho and sigma_M using
# maximum likelihood fitting of the Kalman filter,
# assuming that sigma_R=0.
#
# Returns an S3 object of class par.fit, representing the fit
# that was obtained.

opt_method <- match.arg(opt_method)
Y <- as.numeric(Y)
if (any(is.na(Y))) stop("The PAR model cannot be fit to sequences containing NA's")
if (length(Y) < 50) stop("The PAR model cannot be fit to sequences of length < 50")

if (robust) {
ll_calc_method <- switch(opt_method,
ss = "sst",
css = "csst",
kfas = stop("robust estimation not implemented for opt_method = kfas")
)
} else {
ll_calc_method <- opt_method
}

high_value <- NA

objective <- function (p) {
# (rho, sigma_M, R[0])
if (p[2] <= 0) return(high_value)
if (abs(p[1]) > 1) return(high_value)
#        cat(p, " -> ", loglik.par (Y, p[1], p[2], 0, 0, p[3], calc_method=ll_calc_method, nu=nu), "\n")
loglik.par (Y, p[1], p[2], 0, 0, p[3], calc_method=ll_calc_method, nu=nu)
}

# Set up some initial values to be used as starting points
# for the optimization
start_list <- list()
sddy <- sd(diff(Y), na.rm=TRUE)
#    if (is.na(sddy) || sddy == 0) sddy <- 1
if (is.na(sddy) || (sddy == 0)) sddy <- median(abs(diff(Y)), na.rm=TRUE)
if (sddy == 0) stop("The PAR model cannot be fit to constant sequences")

start_list <- c(start_list, list(c(0,sddy,Y[1])))
start_list <- c(start_list, list(c(0,sddy,mean(Y))))

start_list <- c(start_list, list(c(estimate.par(Y, rho.max=rho.max)[1:2], Y[1])))
start_list <- c(start_list, list(c(estimate.par(Y, rho.max=rho.max)[1:2], mean(Y))))

# Search for the best global optimum, starting from each of the
# starting points given in start_list
best_value <- objective(start_list[[1]])+1
for (start in start_list) {
if (any(is.na(start))) next;
high_value <- objective(start) + 1
rfit <- optim(start, objective, hessian=TRUE, method="L-BFGS-B",
lower=c(-rho.max,0,-Inf), upper=c(rho.max, 2*sddy, Inf))
#            lower=c(0,0,-Inf), upper=c(rho.max, 2*sddy, Inf))
if (rfit\$value < best_value) {
bestfit <- rfit
best_value <- rfit\$value
#            cat(sprintf("r %6.2f rho %8.4f sigma_M %8.4f sigma_R %8.4f -> %8.4f\n",
#                rrho, bestfit\$par[1], bestfit\$par[2], bestfit\$par[3], bestfit\$value))
}
}

# L-BGFS-B seems to terminate prematurely in many cases, so run a few extra
# iterations starting from our best estimate of a local minimum,
# just to be sure we have found a local minimum
pvalue <- best_value + 1
maxiter <- 5
while (abs(pvalue - best_value) > 1e-6 && maxiter > 0) {
pvalue <- best_value
bestfit <- optim(bestfit\$par, objective, hessian=TRUE, method="L-BFGS-B",
lower=c(-rho.max,0,-Inf), upper=c(rho.max, 2*sddy, Inf))
best_value <- bestfit\$value
maxiter <- maxiter - 1
}

# Convert the hessian of the fit to a standard error
ps <- rep(NA_real_, nrow(bestfit\$hessian))
suppressWarnings(try(ps <- sqrt(diag(solve(bestfit\$hessian))), silent=TRUE))
ps <- c(ps[1:2], NA, NA, ps[3])
names(ps) <- c("rho.se", "sigma_M.se", "sigma_R.se", "M0.se", "R0.se")

bestpar <- c(bestfit\$par[1:2], 0, 0, bestfit\$par[3])
names(bestpar) <- c("rho", "sigma_M", "sigma_R", "M0", "R0")
#    recover()
fit <- structure(list(data=Y,
rho = bestpar[1],
sigma_M = bestpar[2],
sigma_R = bestpar[3],
M0 = bestpar[4],
R0 = bestpar[5],
par = bestpar,
stderr = ps,
negloglik = bestfit\$value,
lambda = 0,
pvmr = c(pvmr=1),
rho.max = rho.max,
robust = robust,
nu = nu,
opt_method = opt_method,
optim.fit = bestfit,
par_opt=c("rho", "sigma_M", "R0")),
class="par.fit")

fit
}

fit.par.rw <- function (Y, # The sequence to which a PAR model is to be fit
robust=FALSE,            # If TRUE, robust estimations are performed
opt_method=c("css", "kfas", "ss"), # Optimization method to be used.
# kfas = KFAS Kalman filter
# ss = Steady State Kalman filter
# css = C-coded steady state Kalman filter
nu=par.nu.default()      # If robust=TRUE, the degrees of freedom parameter
) {
# Given a PAR sequence X, creates a Kalman filter representation
#
#  X[t] = [M[t] R[t]],
#
# where
#
#   M[t] = rho*M[t-1] + sigma_M * eps_M,t
#   R[t] = R[t-1] + sigma_R * eps_R,t
#   eps_M,t ~ N(0,1)
#   eps_R,t ~ N(0,1)
#
# Estimates the values of sigma_R using
# maximum likelihood fitting of the Kalman filter,
# assuming that rho=sigma_M=0.
#
# Returns an S3 object of class par.fit, representing the fit
# that was obtained.

opt_method <- match.arg(opt_method)
Y <- as.numeric(Y)
if (any(is.na(Y))) stop("The PAR model cannot be fit to sequences containing NA's")
if (length(Y) < 50) stop("The PAR model cannot be fit to sequences of length < 50")

if (robust) {
ll_calc_method <- switch(opt_method,
ss = "sst",
css = "csst",
kfas = stop("robust estimation not implemented for opt_method = kfas")
)
} else {
ll_calc_method <- opt_method
}

obj <- function(sr) loglik.par(Y, 0, 0, sr, 0, Y[1], ll_calc_method, nu)
sddy <- sd(diff(Y), na.rm=TRUE)
res <- optimize(obj, lower=sddy*0.00001, upper=2*sddy+1)
sigma_R <- res\$minimum
negloglik <- res\$objective
se <- sigma_R / sqrt(length(Y))

par <- c(rho=0,sigma_M=0,sigma_R=sigma_R,M0=0,R0=Y[1])
stderr <- c(rho.se=NA_real_, sigma_M.se=NA_real_, sigma_R.se=se, M0.se=NA_real_, R0.se=0)

fit <- structure(list(data=Y,
rho = 0,
sigma_M = 0,
sigma_R = sigma_R,
M0 = par[4],
R0 = par[5],
par = par,
stderr = stderr,
negloglik = negloglik,
lambda = 0,
pvmr = c(pvmr=0),
rho.max = 1.0,
robust = robust,
nu = nu,
opt_method = opt_method,
par_opt=c("sigma_R")),
class="par.fit")

fit
}

fit.par <- function (Y,    # The sequence to which a PAR model is to be fit
robust=FALSE,            # If TRUE, robust estimations are performed
model=c("par", "ar1", "rw"),  # Specifies which parameters are to be estimated
# par = estimate rho, sigma_M and sigma_R
# ar1 = estimate rho and sigma_M, assuming sigma_R = 0
# rw = estimate sigma_R, assuming rho = sigma_M = 0
lambda=0,                # A penalty to be applied to the random walk variance,
# intended to drive the solution towards one with the
# minimum random walk variance.
opt_method=c("css", "kfas", "ss"), # Optimization method to be used.
# kfas = KFAS Kalman filter method
# ss = Steady State Kalman filter
# css = C-coded steady state Kalman filter
rho.max = 1,             # An upper bound to be applied to the estimate
# of rho, the coefficient of mean-reversion.
# See par.rho.cutoff() for a reasonable approach
nu=par.nu.default()      # If robust is TRUE, the degrees of freedom parameter
) {
# Given a PAR sequence X, creates a Kalman filter representation
#
#  X[t] = [M[t] R[t]],
#
# where
#
#   M[t] = rho*M[t-1] + sigma_M * eps_M,t
#   R[t] = R[t-1] + sigma_R * eps_R,t
#   eps_M,t ~ N(0,1)
#   eps_R,t ~ N(0,1)
#
# Estimates the values of rho, sigma_M and sigma_R using
# maximum likelihood fitting of the Kalman filter.
#
# Returns an S3 object of class par.fit, representing the fit
# that was obtained.
#
# If lambda is non-zero, then it represents a penalty factor that is
# used to drive the optimal solution towards the mean reverting
# solution.
#
# The parameter "par_opt" specifies which of the model parameters to
# optimize.

opt_method <- match.arg(opt_method)
model <- match.arg(model)

Yorig <- Y
if (!is.null(dim(Y))) {
if (dim(Y)[2] > 1) stop("Y must be a single column")
Y <- Y[,1]
}
if (length(Y) < 50) stop("Y does not contain enough data to generate a reliable estimate")
if (any(is.na(Y))) stop("fit.par does not accept data containing NA's")
if (sd(Y) == 0) stop("The input sequence Y is a constant sequence")

Y <- coredata(Y)

A <- switch (model,
par = fit.par.both(Y, lambda=lambda, opt_method=opt_method, rho.max=rho.max, robust=robust, nu=nu),
ar1 = fit.par.mr(Y, opt_method=opt_method, rho.max=rho.max, robust=robust, nu=nu),
rw =  fit.par.rw(Y, opt_method=opt_method, robust=robust, nu=nu))

if (is.zoo(Yorig)) {
A\$index <- index(Yorig)
} else {
A\$index <- 1:length(Y)
}
A\$model <- model

A
}

statehistory.par <- function(A, data=A\$data) {
# On input, A is an par.fit object as produced by fit.par.
# Creates a data.frame containing the inferred values of
# the states of the mean-revering and random walk components
# of the process, based upon the model parameters that were fit.
#
# Returns a data.frame containing the following columns:
#   X:   The value of the process at this time
#   M:   The inferred state of the mean reverting component
#   R:   The inferred state of the random walk component
#   eps_M: The inferred shock to the mean reverting component
#   eps_R: The inferred shock to the random walk component

n <- length(data)
M <- numeric(n)
R <- numeric(n)
eps_M <- numeric(n)
eps_R <- numeric(n)

Mprev <- A\$par[4]
Rprev <- A\$par[5]

K <- kalman.gain.par(A\$rho, A\$sigma_M, A\$sigma_R)

for (i in 1:n) {
xhat <- A\$rho * Mprev + Rprev
e <- as.numeric(data[i]) - xhat
eps_M[i] <- e * K[1]
eps_R[i] <- e * K[2]
M[i] <- A\$rho * Mprev + eps_M[i]
R[i] <- Rprev + eps_R[i]
Mprev <- M[i]
Rprev <- R[i]
}

df <- data.frame(X=data, M=M, R=R, eps_M=eps_M, eps_R=eps_R)
colnames(df) <- c("X", "M", "R", "eps_M", "eps_R")
#    if (is(data, "zoo")) df <- zoo(df, index(data))
df
}

print.par.fit <- function (x, ...) {
# Given a PAR structure A, prints it in summary form
print.internal.par.fit(x)
}

print.internal.par.fit <- function (A) {
# Given a PAR structure A, prints it in summary form
printf("Fitted model:\n")
printf("  X[t] = M[t] + R[t]\n")
printf("  M[t] = %.4f M[t-1] + eps_M,t,  ", A\$rho)
if (!A\$robust) {
printf("eps_M,t ~ N(0, %.4f^2)\n", A\$sigma_M)
} else {
printf("eps_M,t ~ t(0, %.4f, %.2f)\n", A\$sigma_M, A\$nu)
}
if (!is.null(A\$stderr)) printf("%8s(%.4f)%33s(%.4f)\n", "", A\$stderr[1], "", A\$stderr[2])
if (!A\$robust) {
printf("  R[t] = R[t-1] + eps_R,t,         eps_R,t ~ N(0, %.4f^2)\n", A\$sigma_R)
} else {
printf("  R[t] = R[t-1] + eps_R,t,         eps_R,t ~ t(0, %.4f, %.2f)\n", A\$sigma_R, A\$nu)
}
if (!is.null(A\$stderr)) printf("%49s(%.4f)\n", "", A\$stderr[3])

printf("  M_0 = %.4f, R_0 = %.4f\n", A\$M0, A\$R0)
if (!is.null(A\$stderr)) {
if (is.na(A\$stderr[4])) {
printf("           (NA)      (%.4f)\n", A\$stderr[5])
} else {
printf("        (%.4f)      (%.4f)\n", A\$stderr[4], A\$stderr[5])
}
}

if (!("sigma_M" %in% A\$par_opt)) cat("(Only sigma_R was fit; this is a random walk model.)\n")
if (!("sigma_R" %in% A\$par_opt)) cat("(Only rho and sigma_M were fit; this is a pure AR(1) model.)\n")

printf("Proportion of variance attributable to mean reversion (pvmr) = %.4f\n", A\$pvmr)
printf("Negative log likelihood = %.2f\n", A\$negloglik)

}

plot.par.fit <- function (x, ...) {
# Given a PAR structure A, plots it.

plot.internal.par.fit(x)
}

plot.internal.par.fit <- function (A) {
# Given a PAR structure A, plots it.

# Make R CMD check happy
Date <- Value <- Label <- Facet <- ll <- par.rw0.rho_quantile_table <- NULL

sh <- statehistory.par(A)
n <- nrow(sh)
df1.1 <- data.frame(Date=A\$index, Label="Actual", Value=sh\$X)
df1.2 <- data.frame(Date=A\$index, Label="Model", Value=sh\$R)
df1 <- rbind(df1.1, df1.2)
p1 <- ggplot (df1, aes(x=Date, y=Value, colour=Label)) + geom_line () +
ylab("Price") + xlab("") + theme(legend.position="top") +
scale_colour_manual(name="",
labels=c("Actual", "Model"),
#                values=c("Black", "#0054A6", "#00AEEF")) +  # Black, Blue, Cyan
values=c("Black", "#00A651")) +  # Black, Green, Cyan
#    		scale_colour_discrete(name="") +
ggtitle("Price Series")

df2 <- data.frame(Date=A\$index, Label="M[t]", Value=sh\$M)
sdR <- sd(sh\$M)
hlines <- data.frame(Value=c(2 * sdR, sdR, -sdR, -2 * sdR),
Facet=c("two","one","one","two"))
p2 <- ggplot(df2, aes(x=Date, y=Value)) + geom_line() +
ggtitle("Mean Reverting Component") + ylab("Price") + xlab("") +
geom_hline(data=hlines, aes(yintercept=Value, colour=Facet), linetype="dashed")

grid.newpage()
pushViewport(viewport(layout=grid.layout(9, 1)))
print(p1, vp=viewport(layout.pos.row=1:5, layout.pos.col=1))
print(p2, vp=viewport(layout.pos.row=6:9, layout.pos.col=1))
}

plot.par.likelihood.neighborhood <- function (A, rho_lim=c(), sigma_M_lim=c(), sigma_R_lim=c(), R0_lim=c(),
maxview=FALSE) {
if (!is(A, "par.fit")) stop("A must be an par.fit")

ll <- NULL

rho <- A\$rho
sigma_M <- A\$sigma_M
sigma_R <- A\$sigma_R
R0 <- A\$R0
dsd <- sd(diff(A\$data))

scaled_seq <- function (x, xse, xmin, xmax) {
if (!is.na(xse) & !is.na(x)) {
xlow <- x - xse
xhigh <- x + xse
} else if (!is.na(x)) {
xlow <- max(xmin, x - abs(x) * 0.2)
xhigh <- min(xmax, x + abs(x) * 0.2)
} else {
xlow <- xmin
xhigh <- xmax
}
xlow <- max(xlow, xmin)
xhigh <- min(xhigh, xmax)
seq(xlow, xhigh, length.out=200)
}

if (!missing(rho_lim)) {
rho_inc <- seq(rho_lim[1], rho_lim[2], length.out=200)
} else if (maxview) {
rho_inc <- seq(-1,1,length.out=200)
} else  {
rho_inc <- scaled_seq(A\$rho, A\$stderr["rho.se"], -1, 1)
}
ll_by_rho <- sapply(rho_inc, function(r) loglik.par(A\$data, r, A\$sigma_M, A\$sigma_R, 0, A\$R0))
rho.df <- data.frame(rho = rho_inc, ll = ll_by_rho)

if (!missing(sigma_M_lim)) {
sigma_M_inc <- seq(sigma_M_lim[1], sigma_M_lim[2], length.out=200)
} else if (maxview) {
sigma_M_inc <- seq(0, 2 * dsd, length.out=200)
} else {
sigma_M_inc <- scaled_seq(A\$sigma_M, A\$stderr["sigma_M.se"], 0, 2 * dsd)
}
ll_by_sigma_M <- sapply(sigma_M_inc, function(s) loglik.par(A\$data, A\$rho, s, A\$sigma_R, 0, A\$R0))
sigma_M.df <- data.frame(sigma_M = sigma_M_inc, ll=ll_by_sigma_M)

if (!missing(sigma_R_lim)) {
sigma_R_inc <- seq(sigma_R_lim[1], sigma_R_lim[2], length.out=200)
} else if (maxview) {
sigma_R_inc <- seq(0, 2 * dsd, length.out=200)
} else {
sigma_R_inc <- scaled_seq(A\$sigma_R, A\$stderr["sigma_R.se"], 0, 2 * dsd)
}
ll_by_sigma_R <- sapply(sigma_R_inc, function(s) loglik.par(A\$data, A\$rho, A\$sigma_M, s, 0, A\$R0))
sigma_R.df <- data.frame(sigma_R = sigma_R_inc, ll=ll_by_sigma_R)

if (!missing(R0_lim)) {
R0_inc <- seq(R0_lim[1], R0_lim[2], length.out=200)
} else if (maxview) {
R0_inc <- scaled_seq(A\$R0, A\$stderr["R0.se"], A\$data[1] - 2 * dsd, A\$data[1] + 2 * dsd)
} else {
R0_inc <- scaled_seq(A\$R0, A\$stderr["R0.se"], A\$data[1] - 2 * dsd, A\$data[1] + 2 * dsd)
}
ll_by_R0 <- sapply(R0_inc, function(r) loglik.par(A\$data, A\$rho, A\$sigma_M, A\$sigma_R, 0, r))
R0.df <- data.frame(R0 = R0_inc, ll=ll_by_R0)

p.rho <- ggplot(rho.df, aes(x=rho, y=ll)) + geom_line() +
geom_vline(xintercept=rho, colour="red") +
ylab("-Log Likelihood") + xlab(expression(rho))
p.sigma_M <- ggplot(sigma_M.df, aes(x=sigma_M, y=ll)) + geom_line() +
geom_vline(xintercept=sigma_M, colour="red") +
ylab("-Log Likelihood") + xlab(expression(sigma[M]))
p.sigma_R <- ggplot(sigma_R.df, aes(x=sigma_R, y=ll)) + geom_line() +
geom_vline(xintercept=sigma_R, colour="red") +
ylab("-Log Likelihood") + xlab(expression(sigma[R]))
p.R0 <- ggplot(R0.df, aes(x=R0, y=ll)) + geom_line() +
geom_vline(xintercept=R0, colour="red") +
ylab("-Log Likelihood") + xlab(expression(R[0]))

grid.newpage()
pushViewport(viewport(layout=grid.layout(2,2)))
print(p.rho, vp=viewport(layout.pos.row=1, layout.pos.col=1))
print(p.sigma_M, vp=viewport(layout.pos.row=1, layout.pos.col=2))
print(p.sigma_R, vp=viewport(layout.pos.row=2, layout.pos.col=1))
print(p.R0, vp=viewport(layout.pos.row=2, layout.pos.col=2))

}

plot.par.likelihood.neighborhood.3d <- function (A, rho_lim=c(), sigma_M_lim=c(), sigma_R_lim=c(), R0_lim=c()) {
if (!is(A, "par.fit")) stop("A must be an par.fit")

#    opar <- par(mfrow=c(3,2))
#    on.exit(par(opar))

npoints <- 50
rho <- A\$rho
sigma_M <- A\$sigma_M
sigma_R <- A\$sigma_R
R0 <- A\$R0
dsd <- sd(diff(A\$data))

scaled_seq <- function (x, xse, xmin, xmax) {
if (!is.na(xse) & !is.na(x)) {
xlow <- x - xse
xhigh <- x + xse
} else if (!is.na(x)) {
xlow <- max(xmin, x - abs(x) * 0.2)
xhigh <- min(xmax, x + abs(x) * 0.2)
} else {
xlow <- xmin
xhigh <- xmax
}
xlow <- max(xlow, xmin)
xhigh <- min(xhigh, xmax)
seq(xlow, xhigh, length.out=npoints)
}

if (missing(rho_lim)) {
rho_inc <- scaled_seq(A\$rho, A\$stderr["rho.se"], -1, 1)
} else {
rho_inc <- seq(rho_lim[1], rho_lim[2], length.out=npoints)
}

if (missing(sigma_M_lim)) {
sigma_M_inc <- scaled_seq(A\$sigma_M, A\$stderr["sigma_M.se"], 0, 2 * dsd)
} else {
sigma_M_inc <- seq(sigma_M_lim[1], sigma_M_lim[2], length.out=npoints)
}

if (missing(sigma_R_lim)) {
sigma_R_inc <- scaled_seq(A\$sigma_R, A\$stderr["sigma_R.se"], 0, 2 * dsd)
} else {
sigma_R_inc <- seq(sigma_R_lim[1], sigma_R_lim[2], length.out=npoints)
}

if (missing(R0_lim)) {
R0_inc <- scaled_seq(A\$R0, A\$stderr["R0.se"], A\$data[1] - 2 * dsd, A\$data[1] + 2 * dsd)
} else {
R0_inc <- seq(R0_lim[1], R0_lim[2], length.out=npoints)
}

frho_sigma_M <- function(r,sm) -loglik.par(A\$data, r, sm, sigma_R, 0, R0)
frho_sigma_R <- function(r,sr) -loglik.par(A\$data, r, sigma_M, sr, 0, R0)
frho_R0 <- function(r,r0) -loglik.par(A\$data, r, sigma_M, sigma_R, 0, r0)
fsigma_M_sigma_R <- function(sm,sr) -loglik.par(A\$data, rho, sm, sr, 0, R0)
fsigma_M_R0 <- function(sm,r0) -loglik.par(A\$data, rho, sm, sigma_R, 0, r0)
fsigma_R_R0 <- function(sr,r0) -loglik.par(A\$data, rho, sigma_M, sr, 0, r0)

vfrho_sigma_M <- Vectorize(frho_sigma_M)
vfrho_sigma_R <- Vectorize(frho_sigma_R)
vfrho_R0 <- Vectorize(frho_R0)
vfsigma_M_sigma_R <- Vectorize(fsigma_M_sigma_R)
vfsigma_M_R0 <- Vectorize(fsigma_M_R0)
vfsigma_R_R0 <- Vectorize(fsigma_R_R0)

rho_sigma_M <- outer(rho_inc, sigma_M_inc, vfrho_sigma_M)
rho_sigma_R <- outer(rho_inc, sigma_R_inc, vfrho_sigma_R)
rho_R0      <- outer(rho_inc, R0_inc, vfrho_R0)
sigma_M_sigma_R <- outer(sigma_M_inc, sigma_R_inc, vfsigma_M_sigma_R)
sigma_M_R0  <- outer(sigma_M_inc, R0_inc, vfsigma_M_R0)
sigma_R_R0  <- outer(sigma_R_inc, R0_inc, vfsigma_R_R0)

#    title(main="", xlab=expression(rho), ylab=expression(sigma[M]))
#    persp(rho_inc, sigma_M_inc, rho_sigma_M, theta=120, phi=30, expand=0.5, col="lightblue",
#    require(plot3D)
persp3D(rho_inc, sigma_M_inc, rho_sigma_M, xlab="rho", ylab="sigma_M", clab=c("Log", "Likelihood"))
readline("Press enter for next graph: ")
persp3D(rho_inc, sigma_R_inc, rho_sigma_R, xlab="rho", ylab="sigma_R", clab=c("Log", "Likelihood"))
readline("Press enter for next graph: ")
persp3D(rho_inc, R0_inc, rho_R0, xlab="rho", ylab="R0", clab=c("Log", "Likelihood"))
readline("Press enter for next graph: ")
persp3D(sigma_M_inc, sigma_R_inc, sigma_M_sigma_R, xlab="sigma_M", ylab="sigma_R", clab=c("Log", "Likelihood"))
readline("Press enter for next graph: ")
persp3D(sigma_M_inc, R0_inc, sigma_M_R0, xlab="sigma_M", ylab="R0", clab=c("Log", "Likelihood"))
readline("Press enter for next graph: ")
persp3D(sigma_R_inc, R0_inc, sigma_R_R0, xlab="sigma_R", ylab="R0", clab=c("Log", "Likelihood"))

#    return(1)

#    persp(rho_inc, sigma_R_inc, rho_sigma_R, theta=30, phi=30, expand=0.5, col="lightblue",
#        ltheta=120, shade=0.75, ticktype="detailed", xlab=expression(rho), ylab=expression(sigma[R]))

#    persp(rho_inc, R0_inc, rho_R0, theta=30, phi=30, expand=0.5, col="lightblue",
#        ltheta=120, shade=0.75, ticktype="detailed", xlab=expression(rho), ylab=expression(R[0]))

#    persp(sigma_M_inc, sigma_R_inc, sigma_M_sigma_R, theta=30, phi=30, expand=0.5, col="lightblue",
#        ltheta=120, shade=0.75, ticktype="detailed", xlab=expression(sigma[M]), ylab=expression(sigma[R]))

#    persp(sigma_M_inc, R0_inc, sigma_M_R0, theta=30, phi=30, expand=0.5, col="lightblue",
#        ltheta=120, shade=0.75, ticktype="detailed", xlab=expression(sigma[M]), ylab=expression(R[0]))

#    persp(sigma_R_inc, R0_inc, sigma_R_R0, theta=30, phi=30, expand=0.5, col="lightblue",
#        ltheta=120, shade=0.75, ticktype="detailed", xlab=expression(sigma[R]), ylab=expression(R[0]))

}

as.data.frame.par.fit <- function (x, row.names, optional, ...) {
# On input, A is the result of fitting a partially autoregressive series.
# Creates a single row data.frame from A containing the key values of the fit.
# The columns in the data.frame are:
#   rho
#   sigma_M
#   sigma_R
#   rho.se
#   sigma_M.se
#   sigma_R.se
#   pvmr

as.data.frame.internal.par.fit (x)
}

as.data.frame.internal.par.fit <- function (A) {
# On input, A is the result of fitting a partially autoregressive series.
# Creates a single row data.frame from A containing the key values of the fit.
# The columns in the data.frame are:
#   rho
#   sigma_M
#   sigma_R
#   rho.se
#   sigma_M.se
#   sigma_R.se
#   pvmr

df <- data.frame(robust=A\$robust,
nu=A\$nu,
opt_method=A\$opt_method,
n=length(A\$data),
rho=A\$rho,
sigma_M=A\$sigma_M,
sigma_R=A\$sigma_R,
M0=A\$par[4],
R0=A\$par[5],
rho.se=A\$stderr[1],
sigma_M.se=A\$stderr[2],
sigma_R.se=A\$stderr[3],
M0.se=A\$stderr[4],
R0.se=A\$stderr[5],
lambda=A\$lambda,
pvmr=A\$pvmr,
negloglik=A\$negloglik)
rownames(df) <- NULL
df
}
```
matthewclegg/partialAR documentation built on May 21, 2019, 1 p.m.