#---------retrieve the model scheme
ModelScheme = function(DependentVar = NULL, Regressor=NULL, Intercept = NULL, EstMethod="PFR", ARMAModel=c(0,0), CountDist="NULL",
ParticleNumber = 5, epsilon = 0.5, initialParam=NULL, TrueParam=NULL, Task="Evaluation", SampleSize=NULL,
OptMethod="L-BFGS-B", OutputType="list", ParamScheme=1, maxdiff=10^(-8),ntrials= NULL,...){
# Distribution list
if( !(CountDist %in% c("Poisson", "Negative Binomial", "Generalized Poisson", "Generalized Poisson 2", "Mixed Poisson",
"ZIP", "Binomial")))
stop("The specified distribution in not supported.")
# Specify Task
#if( !(Task %in% c("Evaluation", "Optimization", "Synthesis"))){
if( !(Task %in% c("Evaluation", "Optimization", "Simulation", "Synthesis"))){
Task = "Evaluation"
message("The selected Task is not supported. The Task has been set to 'Evaluation'")
}
# Estimation Method
if( !(EstMethod %in% c("PFR"))){
EstMethod = "PFR"
message("The selected EstMethod is not supported. EstMethod has been set to'PFR'")
}
# check that the ARMA order has dimension 2, the ARMA orders are integers
if(length(ARMAModel)!=2 | !prod(ARMAModel %% 1 == c(0,0)) )
stop("The specified ARMA model is not supported.")
# make the dependent variable numeric
if(is.data.frame(DependentVar)){
DependentVar = DependentVar[,1]
}
# retrieve sample size
if(Task=="Synthesis"){
n = SampleSize
}else{
n = length(DependentVar)
}
# fix me: do I need a warning here is there is no samplesize in simulation
if(!is.null(Regressor)){
if (Intercept==TRUE && sum(as.matrix(Regressor)[,1])!=n ){
Regressor = as.data.frame(cbind(rep(1,n),Regressor))
names(Regressor)[1] = "Intercept"
}
else{
Regressor = as.data.frame(Regressor)
}
}
# number of regressors
nreg = ifelse(is.null(Regressor), 0, DIM(Regressor)[2]-as.numeric(Intercept))
if(is.null(Intercept) || is.null(Regressor)){
nint = 1
}else{
nint = as.numeric(Intercept)
}
MargParmIndices = switch(CountDist,
"Poisson" = 1:(1+nreg+nint-1),
"Negative Binomial" = 1:(2+nreg+nint-1),
"Generalized Poisson" = 1:(2+nreg+nint-1),
"Generalized Poisson 2" = 1:(2+nreg+nint-1),
"Binomial" = 1:(1+nreg+nint-1),
"Mixed Poisson" = 1:(3+(nreg+nint-1)*2),
"ZIP" = 1:(2+nreg+nint-1)
)
# retrieve marginal distribution parameters
nMargParms = length(MargParmIndices)
nparms = nMargParms + sum(ARMAModel)
nAR = ARMAModel[1]
nMA = ARMAModel[2]
if(Task=="Synthesis"){
if (nparms!=length(TrueParam)){
stop("The length of the specified true parameter does not comply with the
model or the number of regressors.")
}
}
# cannot allow for sample size to be smaller than model parameters
if (n<=(nparms+2))
stop(sprintf("The provided sample size is equal to %.0f while the
the specified model has %.0f parameters",n,nparms))
# check if initial param length is wrong
if(!is.null(initialParam) & length(initialParam)!=nparms)
stop("The specified initial parameter has wrong length.")
# parse all information in the case without Regressors or in the case with Regressors
if(nreg<1){
# retrieve marginal cdf
mycdf = switch(CountDist,
"Poisson" = ppois,
"Negative Binomial" = function(x, theta){ pnbinom(x, theta[1], 1-theta[2])},
"Generalized Poisson" = function(x, theta){ pGpois(x, theta[1], theta[2])},
"Generalized Poisson 2" = function(x, theta){ pgenpois2(x, theta[2], theta[1])},,
"Binomial" = function(x, theta){ pbinom(x, ntrials, theta[1])},
"Mixed Poisson" = function(x, theta){ pmixpois1(x, theta[1], theta[2], theta[3])},
"ZIP" = function(x, theta){ pzipois(x, theta[1], theta[2])}
)
# retrieve marginal pdf
mypdf = switch(CountDist,
"Poisson" = dpois,
"Negative Binomial" = function(x, theta){ dnbinom(x, theta[1], 1-theta[2])},
"Generalized Poisson" = function(x, theta){ dGpois(x, theta[1], theta[2])},
"Generalized Poisson 2" = function(x, theta){ dgenpois2(x, theta[2], theta[1])},
"Binomial" = function(x, theta){ dbinom(x, ntrials, theta[1])},
"Mixed Poisson" = function(x, theta){ dmixpois1(x, theta[1], theta[2], theta[3])},
"ZIP" = function(x, theta){ dzipois(x, theta[1], theta[2])}
)
# retrieve marginal inverse cdf
myinvcdf = switch(CountDist,
"Poisson" = qpois,
"Negative Binomial" = function(x, theta){ qnbinom(x, theta[1], 1-theta[2])},
"Generalized Poisson" = function(x, theta){ qGpois(x, theta[1], theta[2])},
"Generalized Poisson 2" = function(x, theta){ qgenpois2(x, theta[2], theta[1])},
"Binomial" = function(x, theta){ qbinom(x, ntrials, theta[1])},
"Mixed Poisson" = function(x, theta){ qmixpois(x, theta[1], theta[2], theta[3])},
"ZIP" = function(x, theta){ qzipois(x, theta[1], theta[2])}
)
# lower bound constraints
LB = switch(CountDist,
"Poisson" = c(0.01, rep(-Inf, sum(ARMAModel))),
"Negative Binomial" = c(0.01, 0.01, rep(-Inf, sum(ARMAModel))),
"Generalized Poisson" = c(0.01, 0.01, rep(-Inf, sum(ARMAModel))),
"Generalized Poisson 2" = c(0.01, 0.01, rep(-Inf, sum(ARMAModel))),
"Binomial" = c(0.01, rep(-Inf, sum(ARMAModel))),
"Mixed Poisson" = c(0.01, 0.01, 0.01, rep(-Inf, sum(ARMAModel))),
"ZIP" = c(0.01, 0.01, rep(-Inf, sum(ARMAModel)))
)
# upper bound constraints
UB = switch(CountDist,
"Poisson" = c(Inf, rep( Inf, sum(ARMAModel))),
"Negative Binomial" = c(Inf, 0.99, rep( Inf, sum(ARMAModel))),
"Generalized Poisson" = c(Inf, Inf, rep( Inf, sum(ARMAModel))),
"Generalized Poisson 2" = c(Inf, Inf, rep( Inf, sum(ARMAModel))),
"Binomial" = c(Inf, rep( Inf, sum(ARMAModel))),
"Mixed Poisson" = c(Inf, Inf, 0.99, rep( Inf, sum(ARMAModel))),
"ZIP" = c(Inf, 0.99, rep( Inf, sum(ARMAModel)))
)
# names of marginal parameters
MargParmsNames = switch(CountDist,
"Poisson" = c("lambda"),
"Negative Binomial" = c("r","p"),
"Mixed Poisson" = c("lambda_1", "lambda_2", "p"),
"Generalized Poisson" = c("a", "mu"),
"Generalized Poisson 2" = c("a", "mu"),
"Binomial" = c("p"),
"ZIP" = c("lambda", "p")
)
}else{
# retrieve marginal cdf
mycdf = switch(CountDist,
"Poisson" = function(x, ConstMargParm, DynamMargParm){ ppois(x, DynamMargParm)},
"Negative Binomial" = function(x, ConstMargParm, DynamMargParm){ pnbinom(x, ConstMargParm, 1-DynamMargParm)},
"Generalized Poisson" = function(x, ConstMargParm, DynamMargParm){ pGpois(x, ConstMargParm, DynamMargParm)},
"Generalized Poisson 2" = function(x, ConstMargParm, DynamMargParm){ pgenpois2(x, DynamMargParm, ConstMargParm)},
"Binomial" = function(x, ConstMargParm, DynamMargParm){ pbinom(x, ntrials, DynamMargParm)},
"Mixed Poisson" = function(x, ConstMargParm, DynamMargParm){ pmixpois1(x, DynamMargParm[1], DynamMargParm[2], ConstMargParm)},
"ZIP" = function(x, ConstMargParm, DynamMargParm){ pzipois(x, DynamMargParm, ConstMargParm)}
)
# retrieve marginal pdf
mypdf = switch(CountDist,
"Poisson" = function(x, ConstMargParm, DynamMargParm){ dpois(x, DynamMargParm)},
"Negative Binomial" = function(x, ConstMargParm, DynamMargParm){ dnbinom(x, ConstMargParm, 1-DynamMargParm)},
"Generalized Poisson" = function(x, ConstMargParm, DynamMargParm){ dGpois(x, ConstMargParm, DynamMargParm)},
"Generalized Poisson 2" = function(x, ConstMargParm, DynamMargParm){ dgenpois2(x, DynamMargParm, ConstMargParm)},
"Binomial" = function(x, ConstMargParm, DynamMargParm){ dbinom(x, ntrials, DynamMargParm)},
"Mixed Poisson" = function(x, ConstMargParm, DynamMargParm){ dmixpois1(x, DynamMargParm[1], DynamMargParm[2], ConstMargParm)},
"ZIP" = function(x, ConstMargParm, DynamMargParm){ dzipois(x, DynamMargParm, ConstMargParm)}
)
# retrieve marginal inverse cdf
myinvcdf = switch(CountDist,
"Poisson" = function(x, ConstMargParm, DynamMargParm){ qpois(x, DynamMargParm)},
"Negative Binomial" = function(x, ConstMargParm, DynamMargParm){ qnbinom(x, ConstMargParm, 1-DynamMargParm)},
"Generalized Poisson" = function(x, ConstMargParm, DynamMargParm){ qGpois(x, ConstMargParm, DynamMargParm)},
"Generalized Poisson 2" = function(x, ConstMargParm, DynamMargParm){ qgenpois2(x, DynamMargParm, ConstMargParm)},
"Binomial" = function(x, ConstMargParm, DynamMargParm){ qbinom(x, ntrials, DynamMargParm)},
"Mixed Poisson" = function(x, ConstMargParm, DynamMargParm){ qmixpois(x, DynamMargParm[,1], DynamMargParm[,2], ConstMargParm)},
"ZIP" = function(x, ConstMargParm, DynamMargParm){ qzipois(x, DynamMargParm, ConstMargParm)}
)
# lower bound contraints
LB = switch(CountDist,
"Poisson" = rep(-Inf, sum(ARMAModel)+nreg+nint),
"Negative Binomial" = c(rep(-Inf, nreg+nint), 0.001, rep(-Inf, sum(ARMAModel))),
"Generalized Poisson" = c(rep(-Inf, nreg+nint), 0.001, rep(-Inf, sum(ARMAModel))),
"Generalized Poisson 2" = c(rep(-Inf, nreg+nint), 0.001, rep(-Inf, sum(ARMAModel))),
"Binomial" = rep(-Inf, sum(ARMAModel)+nreg+nint),
"Mixed Poisson" = c(rep(-Inf, 2*(nreg+nint)), 0.001, rep(-Inf, sum(ARMAModel))),
"ZIP" = c(rep(-Inf, nreg+nint), 0.001, rep(-Inf, sum(ARMAModel)))
)
# upper bound constraints
UB = switch(CountDist,
"Poisson" = rep(Inf, sum(ARMAModel)+nreg+nint),
"Negative Binomial" = c(rep(Inf, nreg+nint), Inf, rep(Inf, sum(ARMAModel))),
"Generalized Poisson" = c(rep(Inf, nreg+nint), Inf, rep(Inf, sum(ARMAModel))),
"Generalized Poisson 2" = c(rep(Inf, nreg+nint), Inf, rep(Inf, sum(ARMAModel))),
"Binomial" = rep(Inf, sum(ARMAModel)+nreg+nint),
"Mixed Poisson" = c(rep(Inf, 2*(nreg+nint)), 0.49, rep(Inf, sum(ARMAModel))),
"ZIP" = c(rep(Inf, nreg+nint), Inf, rep(Inf, sum(ARMAModel))),
)
# retrieve names of marginal parameters
MargParmsNames = switch(CountDist,
"Poisson" = paste(rep("b_",nreg),(1-nint):nreg,sep=""),
"Negative Binomial" = c(paste(rep("b_",nreg),(1-nint):nreg,sep=""), "k"),
"Mixed Poisson" = c(paste(rep("b_1",nreg),(1-nint):nreg,sep=""),paste(rep("b_2",nreg),(1-nint):nreg,sep=""), "p"),
"Generalized Poisson" = c(paste(rep("b_",nreg),(1-nint):nreg,sep=""), "a"),
"Generalized Poisson 2" = c(paste(rep("b_",nreg),(1-nint):nreg,sep=""), "a"),
"Binomial" = paste(rep("b_",nreg),(1-nint):nreg,sep=""),
"ZIP" = c(paste(rep("b_",nreg),(1-nint):nreg,sep=""), "p"),
)
}
# check whether the provided initial estimates make sense - this is for Evaluation, Simulation, Optimization Task
if (!is.null(initialParam)) {
if (max(initialParam<=LB) | max(initialParam>=UB)){
stop("The specified initial parameter is outside the feasible region.")
}
}
# check whether the provided initial estimates make sense - This is for Synthesis Tasks
if (Task=="Synthesis") {
if (max(TrueParam<=LB) | max(TrueParam>=UB)){
stop("The specified parameter is outside the feasible region.")
}
}
# create names of the ARMA parameters
if(nAR>0) ARNames = paste("AR_",1:ARMAModel[1], sep="")
if(nMA>0) MANames = paste("MA_",1:ARMAModel[2], sep="")
# put all the names together
if(nAR>0 && nMA<1) parmnames = c(MargParmsNames, ARNames)
if(nAR<1 && nMA>0) parmnames = c(MargParmsNames, MANames)
if(nAR>0 && nMA>0) parmnames = c(MargParmsNames, ARNames, MANames)
if(nAR==0 && nMA==0) parmnames = c(MargParmsNames)
# add the parmnames on theta fix me: does this affect performance?
if(!is.null(initialParam)) names(initialParam) = parmnames
# value I will set the loglik when things go bad (e.g. non invertible ARMA)
loglik_BadValue1 = 10^8
# value I will set the loglik when things go bad (e.g. non invertible ARMA)
loglik_BadValue2 = 10^9
out = list(
mycdf = mycdf,
mypdf = mypdf,
myinvcdf = myinvcdf,
MargParmIndices = MargParmIndices,
initialParam = initialParam,
TrueParam = TrueParam,
parmnames = parmnames,
nMargParms = nMargParms,
nAR = nAR,
nMA = nMA,
n = n,
nreg = nreg,
nint = nint,
CountDist = CountDist,
ARMAModel = ARMAModel,
ParticleNumber = ParticleNumber,
epsilon = epsilon,
nparms = nparms,
UB = UB,
LB = LB,
EstMethod = EstMethod,
DependentVar = DependentVar,
Regressor = Regressor,
Task = Task,
OptMethod = OptMethod,
OutputType = OutputType,
ParamScheme = ParamScheme,
maxdiff = maxdiff,
loglik_BadValue1 = loglik_BadValue1,
loglik_BadValue2 = loglik_BadValue2,
ntrials = ntrials
)
return(out)
}
# Particle filer likelihood function
ParticleFilter_Res_ARMA = function(theta, mod){
#------------------------------------------------------------------------------------#
# PURPOSE: Use particle filtering with resampling to approximate the likelihood
# of the a specified count time series model with an underlying MA(1)
# dependence structure.
#
# NOTES: 1. See "Latent Gaussian Count Time Series Modeling" for more
# details. A first version of the paper can be found at:
# https://arxiv.org/abs/1811.00203
# 2. This function is very similar to LikSISGenDist_ARp but here
# I have a resampling step.
# 3. This is the first stable ParticleFilter_Res_ARMA version. I am
# renaming it to ParticleFilter_Res_ARMA_stable and continue to use the
# ParticleFilter_Res_ARMA name for newer versions of the likelihood. In
# particu
# INPUTS:
# theta: parameter vector
# data: data
# ParticleNumber: number of particles to be used.
# Regressor: independent variable
# CountDist: count marginal distribution
# epsilon resampling when ESS<epsilon*N
#
# OUTPUT:
# loglik: approximate log-likelihood
#
#
# AUTHORS: James Livsey, Vladas Pipiras, Stefanos Kechagias,
# DATE: July 2020
#------------------------------------------------------------------------------------#
old_state <- get_rand_state()
on.exit(set_rand_state(old_state))
# Retrieve parameters and save them in a list called Parms
Parms = RetrieveParameters(theta,mod)
# check for causality
if( CheckStability(Parms$AR,Parms$MA) ) return(10^(8))
# Initialize the negative log likelihood computation
nloglik = ifelse(mod$nreg==0, - log(mod$mypdf(mod$DependentVar[1],Parms$MargParms)),
- log(mod$mypdf(mod$DependentVar[1], Parms$ConstMargParm, Parms$DynamMargParm[1,])))
# retrieve AR, MA orders and their max
m = max(mod$ARMAModel)
p = mod$ARMAModel[1]
q = mod$ARMAModel[2]
# Compute ARMA covariance up to lag n-1
a = list()
if(!is.null(Parms$AR)){
a$phi = Parms$AR
}else{
a$phi = 0
}
if(!is.null(Parms$MA)){
a$theta = Parms$MA
}else{
a$theta = 0
}
a$sigma2 = 1
gamma = itsmr::aacvf(a,mod$n)
# Compute coefficients of Innovations Algorithm see 5.2.16 and 5.3.9 in in Brockwell Davis book
IA = InnovAlg(Parms, gamma, mod)
Theta = IA$thetas
Rt = sqrt(IA$v)
# Get the n such that |v_n-v_{n-1}|< mod$maxdiff. check me: does this guarantee convergence of Thetas?
nTheta = IA$n
Theta_n = Theta[[nTheta]]
# allocate matrices for weights, particles and predictions of the latent series
w = matrix(0, mod$n, mod$ParticleNumber)
Z = matrix(0, mod$n, mod$ParticleNumber)
Zhat = matrix(0, mod$n, mod$ParticleNumber)
# initialize particle filter weights
w[1,] = rep(1,mod$ParticleNumber)
# Compute the first integral limits Limit$a and Limit$b
Limit = ComputeLimits(mod, Parms, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
# Initialize the particles using N(0,1) variables truncated to the limits computed above
#Z[1,] = SampleTruncNormParticles(mod, Limit$a, Limit$b, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
Z[1,] = SampleTruncNormParticles(mod, Limit, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
for (t in 2:mod$n){
# compute Zhat_t
#Zhat[t,] = ComputeZhat_t(m,Theta,Z,Zhat,t, Parms,p,q, nTheta, Theta_n)
Zhat[t,] = ComputeZhat_t(mod, IA, Z, Zhat,t, Parms)
# Compute integral limits
Limit = ComputeLimits(mod, Parms, t, Zhat[t,], Rt)
# Sample truncated normal particles
#Znew = SampleTruncNormParticles(mod, Limit$a, Limit$b, t, Zhat[t,], Rt)
Znew = SampleTruncNormParticles(mod, Limit, t, Zhat[t,], Rt)
# update weights
#w[t,] = ComputeWeights(mod, Limit$a, Limit$b, t, w[(t-1),])
w[t,] = ComputeWeights(mod, Limit, t, w[(t-1),])
# check me: break if I got NA weight
if (any(is.na(w[t,]))| sum(w[t,])==0 ){
#print(t)
#print(w[t,])
message(sprintf('WARNING: At t=%.0f some of the weights are either too small or sum to 0',t))
return(10^8)
}
# Resample the particles using common random numbers
old_state1 = get_rand_state()
Znew = ResampleParticles(mod, w, t, Znew)
set_rand_state(old_state1)
# save the current particle
Z[t,] = Znew
# update likelihood
nloglik = nloglik - log(mean(w[t,]))
}
# for log-likelihood we use a bias correction--see par2.3 in Durbin Koopman, 1997
# nloglik = nloglik- (1/(2*N))*(var(na.omit(wgh[T1,]))/mean(na.omit(wgh[T1,])))/mean(na.omit(wgh[T1,]))
# if (nloglik==Inf | is.na(nloglik)){
# nloglik = 10^8
# }
return(nloglik)
}
# Optimization wrapper to fit PF likelihood with resampling
FitMultiplePF_Res = function(theta, mod){
#====================================================================================#
# PURPOSE Fit the Particle Filter log-likelihood with resampling.
# This function maximizes the PF likelihood, nfit many times for nparts
# many choices of particle numbers, thus yielding a total of nfit*nparts
# many estimates.
#
# INPUT
# theta initial parameters
# data count series
# CountDist prescribed count distribution
# Particles vector with different choices for number of particles
# ARMAModel order of the udnerlying ARMA model
# epsilon resampling when ESS<epsilon*N
# LB parameter lower bounds
# UB parameter upper bounds
# OptMethod
#
#
# OUTPUT
# All parameter estimates, standard errors, likelihood value, AIC, etc
#
# Authors Stefanos Kechagias, James Livsey, Vladas Pipiras
# Date April 2020
# Version 3.6.3
#====================================================================================#
# retrieve parameter, sample size etc
nparts = length(mod$ParticleNumber)
nparms = length(theta)
nfit = 1
n = length(mod$DependentVar)
# allocate memory to save parameter estimates, hessian values, and loglik values
ParmEst = matrix(0,nrow=nfit*nparts,ncol=nparms)
se = matrix(NA,nrow=nfit*nparts,ncol=nparms)
loglik = rep(0,nfit*nparts)
convcode = rep(0,nfit*nparts)
kkt1 = rep(0,nfit*nparts)
kkt2 = rep(0,nfit*nparts)
# Each realization will be fitted nfit*nparts many times
for (j in 1:nfit){
set.seed(j)
# for each fit repeat for different number of particles
for (k in 1:nparts){
# FIX ME: I need to somehow update this in mod. (number of particles to be used). I t now works only for 1 choice of ParticleNumber
ParticleNumber = mod$ParticleNumber[k]
if(mod$Task == 'Optimization'){
# run optimization for our model --no ARMA model allowed
optim.output <- optimx(
par = theta,
fn = ParticleFilter_Res_ARMA,
lower = mod$LB,
upper = mod$UB,
hessian = TRUE,
method = mod$OptMethod,
mod = mod)
loglikelihood = optim.output["value"]
}else{
optim.output = as.data.frame(matrix(rep(NA,8+length(theta)), ncol=8+length(theta)))
names(optim.output) = c(mapply(sprintf, rep("p%.f",length(theta)), (1:length(theta)), USE.NAMES = FALSE),
"value", "fevals", "gevals", "niter", "convcode", "kkt1", "kkt2", "xtime")
optim.output[,1:length(theta)] = theta
startTime = Sys.time()
loglikelihood = ParticleFilter_Res_ARMA(theta,mod)
endTime = Sys.time()
runTime = difftime(endTime, startTime, units = 'secs')
optim.output[,(length(theta)+1)] = loglikelihood
optim.output[,(length(theta)+2)] = 1
optim.output[,(length(theta)+3)] = 1
optim.output[,(length(theta)+4)] = 0
optim.output[,(length(theta)+8)] = as.numeric(runTime)
}
# save estimates, loglik value and diagonal hessian
ParmEst[nfit*(k-1)+j,] = as.numeric(optim.output[1:nparms])
loglik[nfit*(k-1) +j] = optim.output$value
convcode[nfit*(k-1) +j] = optim.output$convcode
kkt1[nfit*(k-1) +j] = optim.output$kkt1
kkt2[nfit*(k-1) +j] = optim.output$kkt2
# compute Hessian
# t5 = tic()
if(mod$Task == "Optimization"){
H = gHgen(par = ParmEst[nfit*(k-1)+j,],
fn = ParticleFilter_Res_ARMA,
mod = mod)
# t5 = tic()-t5
# print(t5)
# if I get all na for one row and one col of H remove it
# H$Hn[rowSums(is.na(H$Hn)) != ncol(H$Hn), colSums(is.na(H$Hn)) != nrow(H$Hn)]
# t6 = tic()
if (!any(is.na(rowSums(H$Hn)))){
# save standard errors from Hessian
if(H$hessOK && det(H$Hn)>10^(-8)){
se[nfit*(k-1)+j,] = sqrt(abs(diag(solve(H$Hn))))
}else{
se[nfit*(k-1)+j,] = rep(NA, nparms)
}
}else{
# remove the NA rows and columns from H
Hnew = H$Hn[rowSums(is.na(H$Hn)) != ncol(H$Hn), colSums(is.na(H$Hn)) != nrow(H$Hn)]
# find which rows are missing and which are not
NAIndex = which(colSums(is.na(H$Hn))==nparms)
NonNAIndex = which(colSums(is.na(H$Hn))==1)
#repeat the previous ifelse for the reduced H matrix
if(det(Hnew)>10^(-8)){
se[nfit*(k-1)+j,NonNAIndex] = sqrt(abs(diag(solve(Hnew))))
}else{
se[nfit*(k-1)+j,NAIndex] = rep(NA, length(NAindex))
}
}
}
# t6 = tic()-t6
# print(t6)
}
}
# Compute model selection criteria (assuming one fit)
Criteria = Criteria.lgc(loglik, mod)
if(mod$OutputType=="list"){
# save the results in a list
ModelOutput = list()
# specify output list names
# names(ModelOutput) = c("ParamEstimates", "StdErrors", "FitStatistics", "OptimOutput")
ModelOutput$Model = paste(mod$CountDist, paste("ARMA(",mod$ARMAModel[1],",",mod$ARMAModel[2],")",sep=""), sep= "-")
ModelOutput$ParamEstimates = ParmEst
ModelOutput$Task = mod$Task
if(!is.null(mod$TrueParam)) ModelOutput$TrueParam = mod$TrueParam
if(!is.null(mod$initialParam)) ModelOutput$initialParam = mod$initialParam
if(mod$Task=="Optimization") ModelOutput$StdErrors = se
ModelOutput$FitStatistics = c(loglik, Criteria)
if(mod$Task=="Optimization") ModelOutput$OptimOutput = c(convcode,kkt1,kkt2)
ModelOutput$EstMethod = mod$EstMethod
ModelOutput$SampleSize = mod$n
if(loglikelihood==mod$loglik_BadValue1) ModelOutput$WarnMessage = "WARNING: The ARMA polynomial must be causal and invertible."
if(loglikelihood==mod$loglik_BadValue2) ModelOutput$WarnMessage = "WARNING: Some of the weights are either too small or sum to 0."
# assign names to all output elements
if(!is.null(mod$TrueParam)) names(ModelOutput$TrueParam) = mod$parmnames
colnames(ModelOutput$ParamEstimates) = mod$parmnames
if(mod$Task=="Optimization")colnames(ModelOutput$StdErrors) = paste("se(", mod$parmnames,")", sep="")
names(ModelOutput$FitStatistics) = c("loglik", "AIC", "BIC", "AICc")
if(mod$Task=="Optimization")names(ModelOutput$OptimOutput) = c("ConvergeStatus", "kkt1", "kkt2")
}else{
ModelOutput = data.frame(matrix(ncol = 4*mod$nparms+16, nrow = 1))
# specify output names
if(!is.null(mod$TrueParam)){
colnames(ModelOutput) = c(
'CountDist','ARMAModel', 'Regressor',
paste("True_", mod$parmnames, sep=""), paste("InitialEstim_", mod$parmnames, sep=""),
mod$parmnames, paste("se(", mod$parmnames,")", sep=""),
'EstMethod', 'SampleSize', 'ParticleNumber', 'epsilon', 'OptMethod', 'ParamScheme',
"loglik", "AIC", "BIC", "AICc", "ConvergeStatus", "kkt1", "kkt2")
}
colnames(ModelOutput) = c(
'CountDist','ARMAModel', 'Regressor',
paste("InitialEstim_", mod$parmnames, sep=""),
mod$parmnames, paste("se(", mod$parmnames,")", sep=""),
'EstMethod', 'SampleSize', 'ParticleNumber', 'epsilon', 'OptMethod',
"loglik", "AIC", "BIC", "AICc", "ConvergeStatus", "kkt1", "kkt2")
# Start Populating the output data frame
ModelOutput$CountDist = mod$CountDist
ModelOutput$ARMAModel = paste("ARMA(",mod$ARMAModel[1],",",mod$ARMAModel[2],")",sep="")
ModelOutput$Regressor = !is.null(mod$Regressor)
offset = 4
# true Parameters
if(!is.null(mod$TrueParam)){
if(mod$nMargParms>0){
ModelOutput[, offset:(offset + mod$nMargParms -1)] = mod$TrueParam[1:mod$nMargParms]
offset = offset + mod$nMargParms
}
if(mod$nAR>0){
ModelOutput[, offset:(offset + mod$nAR -1)] = mod$TrueParam[ (mod$nMargParms+1):(mod$nMargParms+mod$nAR) ]
offset = offset + mod$nAR
}
if(mod$nMA>0){
ModelOutput[, offset:(offset + mod$nMA -1)] = mod$TrueParam[ (mod$nMargParms+mod$nAR+1):(mod$nMargParms+mod$nAR+mod$nMA)]
offset = offset + mod$nMA
}
}
# Initial Parameter Estimates
if(mod$nMargParms>0){
ModelOutput[, offset:(offset + mod$nMargParms -1 )] = mod$initialParam[1:mod$nMargParms ]
offset = offset + mod$nMargParms
}
if(mod$nAR>0){
ModelOutput[, offset:(offset + mod$nAR-1)] = mod$initialParam[(mod$nMargParms+1):(mod$nMargParms+mod$nAR) ]
offset = offset + mod$nAR
}
if(mod$nMA>0){
ModelOutput[, offset:(offset + mod$nMA-1)] = mod$initialParam[(mod$nMargParms+mod$nAR+1):(mod$nMargParms+mod$nAR+mod$nMA) ]
offset = offset + mod$nMA
}
# Parameter Estimates
if(mod$nMargParms>0){
ModelOutput[, offset:(offset + mod$nMargParms-1)] = ParmEst[,1:mod$nMargParms]
offset = offset + mod$nMargParms
}
if(mod$nAR>0){
ModelOutput[, offset:(offset + mod$nAR-1)] = ParmEst[,(mod$nMargParms+1):(mod$nMargParms+mod$nAR)]
offset = offset + mod$nAR
}
if(mod$nMA>0){
ModelOutput[, offset:(offset + mod$nMA-1)] = ParmEst[,(mod$nMargParms+mod$nAR+1):(mod$nMargParms+mod$nAR+mod$nMA)]
offset = offset + mod$nMA
}
# Parameter Std Errors
if(mod$nMargParms>0){
ModelOutput[, offset:(offset + mod$nMargParms-1)] = se[,1:mod$nMargParms]
offset = offset + mod$nMargParms
}
if(mod$nAR>0){
ModelOutput[, offset:(offset + mod$nAR-1)] = se[,(mod$nMargParms+1):(mod$nMargParms+mod$nAR)]
offset = offset + mod$nAR
}
if(mod$nMA>0){
ModelOutput[, offset:(offset + mod$nMA-1)] = se[,(mod$nMargParms+mod$nAR+1):(mod$nMargParms+mod$nAR+mod$nMA)]
}
ModelOutput$EstMethod = mod$EstMethod
ModelOutput$SampleSize = mod$n
ModelOutput$ParticleNumber = mod$ParticleNumber
ModelOutput$epsilon = mod$epsilon
ModelOutput$OptMethod = row.names(optim.output)
if(!is.null(mod$TrueParam)) ModelOutput$ParamScheme = mod$ParamScheme
ModelOutput$loglik = loglik
ModelOutput$AIC = Criteria[1]
ModelOutput$BIC = Criteria[2]
ModelOutput$AICc = Criteria[3]
ModelOutput$ConvergeStatus = convcode
ModelOutput$kkt1 = kkt1
ModelOutput$kkt2 = kkt2
}
return(ModelOutput)
}
# same as FitMultiplePF_Res - the output part is transferred into another function to increase modularity
FitMultiplePF_Res_New = function(theta, mod){
#====================================================================================#
# PURPOSE Fit the Particle Filter log-likelihood with resampling.
# This function maximizes the PF likelihood, nfit many times for nparts
# many choices of particle numbers, thus yielding a total of nfit*nparts
# many estimates.
#
# INPUT
# theta initial parameters
# data count series
# CountDist prescribed count distribution
# Particles vector with different choices for number of particles
# ARMAModel order of the udnerlying ARMA model
# epsilon resampling when ESS<epsilon*N
# LB parameter lower bounds
# UB parameter upper bounds
# OptMethod
#
#
# OUTPUT
# All parameter estimates, standard errors, likelihood value, AIC, etc
#
# Authors Stefanos Kechagias, James Livsey, Vladas Pipiras
# Date April 2020
# Version 3.6.3
#====================================================================================#
# retrieve parameter, sample size etc
nparts = length(mod$ParticleNumber)
nparms = length(theta)
nfit = 1
n = length(mod$DependentVar)
# allocate memory to save parameter estimates, hessian values, and loglik values
ParmEst = matrix(0,nrow=nfit*nparts,ncol=nparms)
se = matrix(NA,nrow=nfit*nparts,ncol=nparms)
loglik = rep(0,nfit*nparts)
convcode = rep(0,nfit*nparts)
kkt1 = rep(0,nfit*nparts)
kkt2 = rep(0,nfit*nparts)
# Each realization will be fitted nfit*nparts many times
for (j in 1:nfit){
set.seed(j)
# for each fit repeat for different number of particles
for (k in 1:nparts){
# FIX ME: I need to somehow update this in mod. (number of particles to be used). I t now works only for 1 choice of ParticleNumber
ParticleNumber = mod$ParticleNumber[k]
if(mod$Task == 'Optimization'){
# run optimization for our model --no ARMA model allowed
optim.output <- optimx(
par = theta,
fn = ParticleFilter_Res_ARMA,
lower = mod$LB,
upper = mod$UB,
hessian = TRUE,
method = mod$OptMethod,
mod = mod)
loglikelihood = optim.output["value"]
}else{
optim.output = as.data.frame(matrix(rep(NA,8+length(theta)), ncol=8+length(theta)))
names(optim.output) = c(mapply(sprintf, rep("p%.f",length(theta)), (1:length(theta)), USE.NAMES = FALSE),
"value", "fevals", "gevals", "niter", "convcode", "kkt1", "kkt2", "xtime")
optim.output[,1:length(theta)] = theta
startTime = Sys.time()
loglikelihood = ParticleFilter_Res_ARMA(theta,mod)
endTime = Sys.time()
runTime = difftime(endTime, startTime, units = 'secs')
optim.output[,(length(theta)+1)] = loglikelihood
optim.output[,(length(theta)+2)] = 1
optim.output[,(length(theta)+3)] = 1
optim.output[,(length(theta)+4)] = 0
optim.output[,(length(theta)+8)] = as.numeric(runTime)
}
# save estimates, loglik value and diagonal hessian
ParmEst[nfit*(k-1)+j,] = as.numeric(optim.output[1:nparms])
loglik[nfit*(k-1) +j] = optim.output$value
convcode[nfit*(k-1) +j] = optim.output$convcode
kkt1[nfit*(k-1) +j] = optim.output$kkt1
kkt2[nfit*(k-1) +j] = optim.output$kkt2
# compute Hessian
# t5 = tic()
if(mod$Task == "Optimization"){
H = gHgen(par = ParmEst[nfit*(k-1)+j,],
fn = ParticleFilter_Res_ARMA,
mod = mod)
# t5 = tic()-t5
# print(t5)
# if I get all na for one row and one col of H remove it
# H$Hn[rowSums(is.na(H$Hn)) != ncol(H$Hn), colSums(is.na(H$Hn)) != nrow(H$Hn)]
# t6 = tic()
if (!any(is.na(rowSums(H$Hn)))){
# save standard errors from Hessian
if(H$hessOK && det(H$Hn)>10^(-8)){
se[nfit*(k-1)+j,] = sqrt(abs(diag(solve(H$Hn))))
}else{
se[nfit*(k-1)+j,] = rep(NA, nparms)
}
}else{
# remove the NA rows and columns from H
Hnew = H$Hn[rowSums(is.na(H$Hn)) != ncol(H$Hn), colSums(is.na(H$Hn)) != nrow(H$Hn)]
# find which rows are missing and which are not
NAIndex = which(colSums(is.na(H$Hn))==nparms)
NonNAIndex = which(colSums(is.na(H$Hn))==1)
#repeat the previous ifelse for the reduced H matrix
if(det(Hnew)>10^(-8)){
se[nfit*(k-1)+j,NonNAIndex] = sqrt(abs(diag(solve(Hnew))))
}else{
se[nfit*(k-1)+j,NAIndex] = rep(NA, length(NAindex))
}
}
}
# t6 = tic()-t6
# print(t6)
}
}
# Compute model selection criteria (assuming one fit)
Criteria = Criteria.lgc(loglik, mod)
FitResults = list()
FitResults$ParmEst = ParmEst
FitResults$se = se
FitResults$FitStatistics = c(loglik, Criteria)
FitResults$OptimOutput = c(convcode,kkt1,kkt2)
names(FitResults$FitStatistics) = c("loglik", "AIC", "BIC", "AICc")
if(mod$Task=="Optimization")names(FitResults$OptimOutput) = c("ConvergeStatus", "kkt1", "kkt2")
return(FitResults)
}
# prepare the output of the wrapper function
PrepareOutput = function(mod, FitResults){
if(mod$OutputType=="list"){
# save the results in a list
ModelOutput = list()
# populate information from fitting
ModelOutput$ParamEstimates = FitResults$ParmEst
ModelOutput$FitStatistics = FitResults$FitStatistics
if(mod$Task=="Optimization"){
ModelOutput$StdErrors = FitResults$se
ModelOutput$OptimOutput = FitResults$OptimOutput
}
# populate information from parsing the initial inputs
ModelOutput$Model = paste(mod$CountDist, paste("ARMA(",mod$ARMAModel[1],",",mod$ARMAModel[2],")",sep=""), sep= "-")
ModelOutput$Task = mod$Task
if(!is.null(mod$TrueParam)) ModelOutput$TrueParam = mod$TrueParam
if(!is.null(mod$initialParam)) ModelOutput$initialParam = mod$initialParam
ModelOutput$EstMethod = mod$EstMethod
ModelOutput$SampleSize = mod$n
if(FitResults$FitStatistics["loglik"]==mod$loglik_BadValue1) ModelOutput$WarnMessage = "WARNING: The ARMA polynomial must be causal and invertible."
if(FitResults$FitStatistics["loglik"]==mod$loglik_BadValue2) ModelOutput$WarnMessage = "WARNING: Some of the weights are either too small or sum to 0."
# assign names to all output elements
if(!is.null(mod$TrueParam)) names(ModelOutput$TrueParam) = mod$parmnames
colnames(ModelOutput$ParamEstimates) = mod$parmnames
if(mod$Task=="Optimization")colnames(ModelOutput$StdErrors) = paste("se(", mod$parmnames,")", sep="")
#names(ModelOutput$FitStatistics) = c("loglik", "AIC", "BIC", "AICc")
#if(mod$Task=="Optimization")names(ModelOutput$OptimOutput) = c("ConvergeStatus", "kkt1", "kkt2")
}else{
ModelOutput = data.frame(matrix(ncol = 4*mod$nparms+16, nrow = 1))
# specify output names
if(!is.null(mod$TrueParam)){
colnames(ModelOutput) = c(
'CountDist','ARMAModel', 'Regressor',
paste("True_", mod$parmnames, sep=""), paste("InitialEstim_", mod$parmnames, sep=""),
mod$parmnames, paste("se(", mod$parmnames,")", sep=""),
'EstMethod', 'SampleSize', 'ParticleNumber', 'epsilon', 'OptMethod', 'ParamScheme',
"loglik", "AIC", "BIC", "AICc", "ConvergeStatus", "kkt1", "kkt2")
}
colnames(ModelOutput) = c(
'CountDist','ARMAModel', 'Regressor',
paste("InitialEstim_", mod$parmnames, sep=""),
mod$parmnames, paste("se(", mod$parmnames,")", sep=""),
'EstMethod', 'SampleSize', 'ParticleNumber', 'epsilon', 'OptMethod',
"loglik", "AIC", "BIC", "AICc", "ConvergeStatus", "kkt1", "kkt2")
# Start Populating the output data frame
ModelOutput$CountDist = mod$CountDist
ModelOutput$ARMAModel = paste("ARMA(",mod$ARMAModel[1],",",mod$ARMAModel[2],")",sep="")
ModelOutput$Regressor = !is.null(mod$Regressor)
offset = 4
# true Parameters
if(!is.null(mod$TrueParam)){
if(mod$nMargParms>0){
ModelOutput[, offset:(offset + mod$nMargParms -1)] = mod$TrueParam[1:mod$nMargParms]
offset = offset + mod$nMargParms
}
if(mod$nAR>0){
ModelOutput[, offset:(offset + mod$nAR -1)] = mod$TrueParam[ (mod$nMargParms+1):(mod$nMargParms+mod$nAR) ]
offset = offset + mod$nAR
}
if(mod$nMA>0){
ModelOutput[, offset:(offset + mod$nMA -1)] = mod$TrueParam[ (mod$nMargParms+mod$nAR+1):(mod$nMargParms+mod$nAR+mod$nMA)]
offset = offset + mod$nMA
}
}
# Initial Parameter Estimates
if(mod$nMargParms>0){
ModelOutput[, offset:(offset + mod$nMargParms -1 )] = mod$initialParam[1:mod$nMargParms ]
offset = offset + mod$nMargParms
}
if(mod$nAR>0){
ModelOutput[, offset:(offset + mod$nAR-1)] = mod$initialParam[(mod$nMargParms+1):(mod$nMargParms+mod$nAR) ]
offset = offset + mod$nAR
}
if(mod$nMA>0){
ModelOutput[, offset:(offset + mod$nMA-1)] = mod$initialParam[(mod$nMargParms+mod$nAR+1):(mod$nMargParms+mod$nAR+mod$nMA) ]
offset = offset + mod$nMA
}
# Parameter Estimates
if(mod$nMargParms>0){
ModelOutput[, offset:(offset + mod$nMargParms-1)] = ParmEst[,1:mod$nMargParms]
offset = offset + mod$nMargParms
}
if(mod$nAR>0){
ModelOutput[, offset:(offset + mod$nAR-1)] = ParmEst[,(mod$nMargParms+1):(mod$nMargParms+mod$nAR)]
offset = offset + mod$nAR
}
if(mod$nMA>0){
ModelOutput[, offset:(offset + mod$nMA-1)] = ParmEst[,(mod$nMargParms+mod$nAR+1):(mod$nMargParms+mod$nAR+mod$nMA)]
offset = offset + mod$nMA
}
# Parameter Std Errors
if(mod$nMargParms>0){
ModelOutput[, offset:(offset + mod$nMargParms-1)] = se[,1:mod$nMargParms]
offset = offset + mod$nMargParms
}
if(mod$nAR>0){
ModelOutput[, offset:(offset + mod$nAR-1)] = se[,(mod$nMargParms+1):(mod$nMargParms+mod$nAR)]
offset = offset + mod$nAR
}
if(mod$nMA>0){
ModelOutput[, offset:(offset + mod$nMA-1)] = se[,(mod$nMargParms+mod$nAR+1):(mod$nMargParms+mod$nAR+mod$nMA)]
}
ModelOutput$EstMethod = mod$EstMethod
ModelOutput$SampleSize = mod$n
ModelOutput$ParticleNumber = mod$ParticleNumber
ModelOutput$epsilon = mod$epsilon
ModelOutput$OptMethod = row.names(optim.output)
if(!is.null(mod$TrueParam)) ModelOutput$ParamScheme = mod$ParamScheme
ModelOutput$loglik = FitResults$FitStatistics["loglkik"]
ModelOutput$AIC = FitResults$FitStatistics["AIC"]
ModelOutput$BIC = FitResults$FitStatistics["BIC"]
ModelOutput$AICc = FitResults$FitStatistics["AICc"]
ModelOutput$ConvergeStatus = FitResults$OptimOutput["ConvergeStatus"]
ModelOutput$kkt1 = FitResults$OptimOutput["kkt1"]
ModelOutput$kkt2 = FitResults$OptimOutput["kkt2"]
}
return(ModelOutput)
}
# Add some functions that I will need for common random numbers
get_rand_state <- function() {
# Using `get0()` here to have `NULL` output in case object doesn't exist.
# Also using `inherits = FALSE` to get value exactly from global environment
# and not from one of its parent.
get0(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
}
set_rand_state <- function(state) {
# Assigning `NULL` state might lead to unwanted consequences
if (!is.null(state)) {
assign(".Random.seed", state, envir = .GlobalEnv, inherits = FALSE)
}
}
#---------check causality and invertibility
CheckStability = function(AR,MA){
if (is.null(AR) && is.null(MA)) return(0)
# return 1 if model is not stable (causal and invertible)
if(!is.null(AR) && is.null(MA)){
rc = ifelse(any(abs( polyroot(c(1, -AR)) ) < 1), 1,0)
}
if(!is.null(MA) && is.null(AR)){
rc = ifelse(any(abs( polyroot(c(1, -MA)) ) < 1),1,0)
}
if(!is.null(MA) && !is.null(AR)){
rc = ifelse(any(abs( polyroot(c(1, -AR)) ) < 1) || any(abs( polyroot(c(1, -MA)) ) < 1) , 1, 0)
}
return(rc)
}
# compute initial estimates
InitialEstimates = function(mod){
# require(itsmr)
# allocate memory
est = rep(NA, mod$nMargParms+sum(mod$ARMAModel))
#------------First compute estimates of marginal parameters
#-----Poisson case
if(mod$CountDist=="Poisson"){
if(mod$nreg==0){
# Method of moments estimate for Poisson parameter (via the mean)
est[1] = mean(mod$DependentVar)
}else{
# GLM for the mean that depends on time
# CHECK ME: If I fit a Poisson AR(3) in the the data example of the JASA paper, but the code below doesn't
# specify poisson family (it would pick up the default distribution that glm function has) then there will
# be a numerical error in the likelihood. Check it!
# to fix #36 I will introduce an ifelse below. Another way to do things would be to pass the dataframe
# through the mod structure and use the data frame and the variable names in classic lm fashion. However,
# with my design of passing the DependentVar and the Regrerssor as separate arguments in mod, I need the
# ifelse statement below.
# glmPoisson = glm(mod$DependentVar~mod$Regressor[,2:(mod$nreg+1)], family = "poisson")
if(mod$nint){
glmPoisson = glm(mod$DependentVar~ as.matrix(mod$Regressor[,2:(mod$nreg+1)]), family = "poisson")
}else{
glmPoisson = glm(mod$DependentVar~0 + as.matrix(mod$Regressor[,1:mod$nreg]), family = "poisson")
}
est[1:mod$nMargParms] = as.numeric(glmPoisson[1]$coef)
}
}
#-----Neg Binomial case
if(mod$CountDist=="Negative Binomial"){
if(mod$nreg==0){
xbar = mean(mod$DependentVar)
sSquare = var(mod$DependentVar)
# Method of Moments for negBin
rEst = xbar^2/(sSquare - xbar)
pEst = 1 - xbar/sSquare
est[1:2] = c(rEst, pEst)
}else{
# GLM for the mean that depends on time
#glmNB = glm.nb(mod$DependentVar~mod$Regressor[,2:(mod$nreg+1)])
if(mod$nint){
glmNB = glm.nb(mod$DependentVar ~ as.matrix(mod$Regressor[,2:(mod$nreg+1)]))
}else{
glmNB = glm.nb(mod$DependentVar ~ 0 + as.matrix(mod$Regressor[,1:mod$nreg]))
}
est[1:(mod$nMargParms-1)] = as.numeric(glmNegBin[1]$coef)
# MoM for the over dispersion param in NegBin2 parametrization
est[mod$nMargParms] = sum(glmNB$fitted.values^2)/(sum((mod$DependentVar-glmNB$fitted.values)^2-glmNB$fitted.values))
}
}
if(mod$CountDist=="Mixed Poisson"){
if(mod$nreg==0){
# pmle for marginal parameters
MixPois_PMLE <- pmle.pois(mod$DependentVar,2)
pEst = MixPois_PMLE[[1]][1]
l1Est = MixPois_PMLE[[2]][1]
l2Est = MixPois_PMLE[[2]][2]
# correct estimates if they are outside the feasible region
# if(pEst<mod$LB[1]){pEst = 1.1*mod$LB[1]}
# if(pEst>mod$UB[1]){pEst = 0.9*mod$UB[1]}
#
# if(l1Est<mod$LB[2]){l1Est = 1.1*mod$LB[2]}
# if(l2Est<mod$LB[3]){l2Est = 1.1*mod$LB[3]}
est[1:3] = c(l1Est, l2Est, pEst)
}else{
#library(mixtools)
# MP_fit = poisregmixEM(mod$DependentVar, mod$Regressor[,2:(mod$nreg+1)])
if(mod$nint){
MP_fit = poisregmixEM(mod$DependentVar, as.matrix(mod$Regressor[,2:(mod$nreg+1)]))
}else{
MP_fit = poisregmixEM(mod$DependentVar, as.matrix(mod$Regressor[,1:mod$nreg]),addintercept = FALSE)
}
if(MP_fit$lambda[1]<0.5){
est[1:mod$nMargParms] = as.numeric(c(MP_fit$beta, MP_fit$lambda[1]))
}
else{
# check me: should I give an error here?
# check me: does this work for more regressors? I think there will be an error with the indices below
if(mod$nint){
est[1:mod$nMargParms] = as.numeric(c(MP_fit$beta[3:4],MP_fit$beta[1:2], 1-MP_fit$lambda[1]))
}else{
est[1:mod$nMargParms] = as.numeric(c(MP_fit$beta[2],MP_fit$beta[1], 1-MP_fit$lambda[1]))
}
}
#could also use the flexmix package
#check me: for now we allow mixture of only two components
#MP_fit <- flexmix(mod$DependentVar ~ mod$Regressor[,2:(mod$nreg+1)], k = 2, model = FLXMRglm(family = "poisson"))
#refit1 = refit(MP_fit)
#beta1Hat = as.numeric(refit1@coef[1:2])
#beta2Hat = as.numeric(refit1@coef[3:4])
#ProbHat = MP_fit@prior[1]
#est[1:mod$nMargParms] = c(beta1Hat, beta2Hat,ProbHat)
}
}
#-----Generalized Poisson case
if(mod$CountDist=="Generalized Poisson" || mod$CountDist=="Generalized Poisson 2"){
if(mod$nreg==0){
xbar = mean(mod$DependentVar)
sSquare = var(mod$DependentVar)
# the GenPois density is parametrized through the mean with the pair (alpha,mu) - see (2.4) in Famoye 1994
# solve (2.5) in Famoye 1994 wrt alpha and replace sample estimates
alpha_1 = (sqrt(sSquare/xbar) - 1)/xbar
alpha_2 = (-sqrt(sSquare/xbar) - 1)/xbar
# to choose between the two solutions I ll check "overdispersion"
if (sSquare>=xbar) alpha=alpha_1
if (sSquare<xbar) alpha =alpha_2
# FIX ME: the GenPois densities in VGAM do not allow for negative alpha yet
if (alpha<0) alpha = 10^(-6)
est[1:2] = c(alpha, xbar)
}else{
# run GenPois GLM using VGAM package - CHECK ME: shouls surface maxit as an option to the user?
if(mod$nint){
fit = VGAM::vglm( mod$DependentVar ~ as.matrix(mod$Regressor[,2:(1+mod$nreg)]), genpoisson2, maxit=60)
}else{
fit = VGAM::vglm( mod$DependentVar ~ 0 + as.matrix(mod$Regressor[,1:mod$nreg]), genpoisson2(zero=0), maxit=60,)
}
# save linear predictor coefficients
est[1:(mod$nMargParms-1)] = as.numeric(coef(fit, matrix = TRUE)[,1])
# save dispersion coefficient
est[mod$nMargParms] = loglink(as.numeric(coef(fit, matrix = TRUE)[1,2]),inverse = TRUE)
}
}
#-----Binomial case
if(mod$CountDist=="Binomial"){
if(mod$nreg==0){
xbar = mean(mod$DependentVar)
# Method of Moments for Binomial E(X)=rp, where r = ntrials
pEst = xbar/mod$ntrials
est[1] = pEst
}else{
#glmBinomial = glm(cbind(mod$DependentVar,mod$ntrials-mod$DependentVar) ~ mod$Regressor[,2:(mod$nreg+1)] , family = 'binomial')
if(mod$nint){
glmBinomial = glm(cbind(mod$DependentVar,mod$ntrials-mod$DependentVar) ~ as.matrix(mod$Regressor[,2:(mod$nreg+1)]) , family = 'binomial')
}else{
glmBinomial = glm(cbind(mod$DependentVar,mod$ntrials-mod$DependentVar) ~ 0 + as.matrix(mod$Regressor[,1:mod$nreg]) , family = 'binomial')
}
est[1:mod$nMargParms] = as.numeric(glmBinomial$coef)
}
}
#-----Zero Inflation Poisson
if(mod$CountDist=="ZIP"){
if(mod$nreg==0){
# pmle for marginal parameters
ZIP_PMLE <- poisson.zihmle(mod$DependentVar, type = c("zi"), lowerbound = 0.01, upperbound = 10000)
lEst = ZIP_PMLE[1]
pEst = ZIP_PMLE[2]
est[1:2] = c(lEst, pEst)
}else{
#zeroinfl_reg <- zeroinfl( mod$DependentVar~ mod$Regressor[,2:(mod$nreg+1)] | 1,)
#zeroinfl_reg = vglm( mod$DependentVar~ mod$Regressor[,2:(mod$nreg+1)], zipoisson(zero=1))
if(mod$nint){
zeroinfl_reg = vglm( mod$DependentVar ~ as.matrix(mod$Regressor[,2:(mod$nreg+1)]), zipoisson(zero=1))
}else{
zeroinfl_reg = vglm( mod$DependentVar ~ 0 + as.matrix(mod$Regressor[,1:mod$nreg]), zipoisson(zero=0))
}
est[1:(mod$nMargParms-1)] = as.numeric(coef(zeroinfl_reg))[2:(mod$nMargParms)]
est[mod$nMargParms] = plogis(as.numeric(coef(zeroinfl_reg))[1])
}
}
#------------ARMA Initial Estimates
# Transform (1) in the JASA paper to retrieve the "observed" latent series and fit an ARMA
if(mod$nreg==0){
Cxt = mod$mycdf(mod$DependentVar,est[1:mod$nMargParms])
# if Cxt = 1, I will need to deal with this somehow. In the Binomial case it seems very likely to get 1
# but this can also happen for other distributions. So far, in the initial estimation I have only seen the issue
# taking place in the Binomial case, however, I have come across this issue on the Particle filter estimation
# for misspecified models. For now I will simply set
if (mod$CountDist=="Binomial"){
Cxt[Cxt==1] = 1-10^(-16)
Cxt[Cxt==0] = 0+10^(-16)
}
# I am doing it for the binomial only so that I will get an error if it happens for another case.
if(max(mod$ARMAModel)>0) armafit = itsmr::arma(qnorm(Cxt),mod$nAR,mod$nMA)
if(mod$nAR>0) est[(mod$nMargParms+1):(mod$nMargParms+mod$nAR)] = armafit$phi
if(mod$nMA>0) est[(1+mod$nMargParms+mod$nAR):(mod$nMargParms+sum(mod$ARMAModel))] = armafit$theta
}else{
# put the parameters in appropriate format
Params = RetrieveParameters(est,mod)
# see comment above regarding Cxt=1 and Cxt = 0
Cxt = mod$mycdf(mod$DependentVar,Params$ConstMargParm, Params$DynamMargParm)
if (mod$CountDist=="Binomial" || mod$CountDist=="Mixed Poisson" ){
Cxt[Cxt==1] = 1-10^(-16)
Cxt[Cxt==0] = 0+10^(-16)
}
# Transform (1) in the JASA paper to retrieve the "observed" latent series and fit an ARMA
if(mod$nAR || mod$nMA) ARMAFit = itsmr::arma(qnorm(Cxt),mod$nAR,mod$nMA)
if(mod$nAR) est[(mod$nMargParms+1):(mod$nMargParms+mod$nAR)] = ARMAFit$phi
if(mod$nMA) est[(1+mod$nMargParms+mod$nAR):(mod$nMargParms+sum(mod$ARMAModel))] = ARMAFit$theta
}
# add the parmnames on theta fix me: does this affect performance?
names(est) = mod$parmnames
return(est)
}
# innovations algorithm
InnovAlg = function(Parms,gamma, mod) {
# adapt the Innovation.algorithm if ITSMR
# Compute autocovariance kappa(i,j) per equation (3.3.3)
# Optimized for i >= j and j > 0
kappa = function(i,j) {
if (j > m)
return(sum(theta_r[1:(q+1)] * theta_r[(i-j+1):(i-j+q+1)]))
else if (i > 2*m)
return(0)
else if (i > m)
return((gamma[i-j+1] - sum(phi * gamma[abs(seq(1-i+j,p-i+j))+1]))/sigma2)
else
return(gamma[i-j+1]/sigma2)
}
phi = Parms$AR
sigma2 = 1
N = length(gamma)
theta_r = c(1,Parms$MA,numeric(N))
# Innovations algorithm
p = ifelse(is.null(Parms$AR),0,length(Parms$AR))
q = ifelse(is.null(Parms$MA),0,length(Parms$MA))
m = max(p,q)
Theta = list()
v = rep(NA,N+1)
v[1] = kappa(1,1)
StopCondition = FALSE
n = 1
while(!StopCondition && n<N ) {
Theta[[n]] <- rep(0,q)
Theta[[n]][n] = kappa(n+1,1)/v[1]
if(n>q && mod$nAR==0) Theta[[n]][n]= 0
if(n>1){
for (k in 1:(n-1)) {
js <- 0:(k-1)
Theta[[n]][n-k] <- (kappa(n+1,k+1) - sum(Theta[[k]][k-js]*Theta[[n]][n-js]*v[js+1])) / v[k+1]
}
}
js = 0:(n-1)
v[n+1] = kappa(n+1,n+1) - sum(Theta[[n]][n-js]^2*v[js+1])
if(mod$nAR==0 && mod$nMA>0) StopCondition = (abs(v[n+1]-v[n])< mod$maxdiff)
if(mod$nAR>0 && mod$nMA==0) StopCondition = (n>3*m)
n = n+1
}
v = v/v[1]
StopCondition = (abs(v[n+1]-v[n])< mod$maxdiff)
return(list(n=n-1,thetas=lapply(Theta[ ][1:(n-1)], function(x) {x[1:q]}),v=v[1:(n-1)]))
}
# simulate from our model
sim_lgc_old = function(n, CountDist, MargParm, ARParm, MAParm, Regressor=NULL, Intercept=NULL){
# Generate latent Gaussian model
z =arima.sim(model = list( ar = ARParm, ma=MAParm ), n = n)
z = z/sd(z) # standardize the data
# add a column of ones in the Regressors if Intercept is present
if (!is.null(Intercept) && Intercept==TRUE && sum(as.matrix(Regressor)[,1])!=n){
Regressor = as.data.frame(cbind(rep(1,n),Regressor))
names(Regressor)[1] = "Intercept"
}
# number of regressors
nreg = ifelse(is.null(Regressor), 0, dim(Regressor)[2]-as.numeric(Intercept))
if(nreg==0){
# retrieve marginal inverse cdf
myinvcdf = switch(CountDist,
"Poisson" = qpois,
"Negative Binomial" = function(x, theta){ qnbinom (x, theta[1], 1-theta[2]) },
"Generalized Poisson" = function(x, theta){ qGpois (x, theta[1], theta[2])},
"Generalized Poisson 2" = function(x, theta){ qGpois (x, theta[2], theta[1])},
"Binomial" = qbinom,
"Mixed Poisson" = function(x, theta){qmixpois1(x, theta[1], theta[2], theta[3])},
"ZIP" = function(x, theta){ qzipois(x, theta[1], theta[2]) },
stop("The specified distribution is not supported.")
)
# get the final counts
x = myinvcdf(pnorm(z), MargParm)
}else{
# retrieve inverse count cdf
myinvcdf = switch(CountDist,
"Poisson" = function(x, ConstMargParm, DynamMargParm){ qpois (x, DynamMargParm)},
"Negative Binomial" = function(x, ConstMargParm, DynamMargParm){ qnbinom (x, ConstMargParm, 1-DynamMargParm)},
"Generalized Poisson" = function(x, ConstMargParm, DynamMargParm){ qGpois (x, ConstMargParm, DynamMargParm)},
"Generalized Poisson 2" = function(x, ConstMargParm, DynamMargParm){ qgenpois2 (x, DynamMargParm, ConstMargParm)},
"Binomial" = function(x, ConstMargParm, DynamMargParm){ qbinom (x, ConstMargParm, DynamMargParm)},
"Mixed Poisson" = function(x, ConstMargParm, DynamMargParm){qmixpois1(x, DynamMargParm[,1], DynamMargParm[,2], ConstMargParm)},
"ZIP" = function(x, ConstMargParm, DynamMargParm){ qzipois (x, DynamMargParm[,1], DynamMargParm[,2]) },
stop("The specified distribution is not supported.")
)
# regression parameters
beta = MargParm[1:(nreg+1)]
m = exp(as.matrix(Regressor)%*%beta)
if(CountDist == "Poisson" && nreg>0){
ConstMargParm = NULL
DynamMargParm = m
}
if(CountDist == "Negative Binomial" && nreg>0){
ConstMargParm = 1/MargParm[nreg+2]
DynamMargParm = MargParm[nreg+2]*m/(1+MargParm[nreg+2]*m)
}
if(CountDist == "Generalized Poisson" && nreg>0){
ConstMargParm = MargParm[nreg+2]
DynamMargParm = m
}
if(CountDist == "Generalized Poisson 2" && nreg>0){
ConstMargParm = MargParm[nreg+2]
DynamMargParm = m
}
if(CountDist == "Binomial" && nreg>0){
# ConstMargParm = MargParm[nreg+2]
DynamMargParm = m/(1+m)
}
if(CountDist == "Mixed Poisson" && nreg>0){
ConstMargParm = c(MargParm[nreg*2+3], 1 - MargParm[nreg*2+3])
DynamMargParm = cbind(exp(as.matrix(Regressor)%*%MargParm[1:(nreg+1)]),
exp(as.matrix(Regressor)%*%MargParm[(nreg+2):(nreg*2+2)]))
}
if(CountDist == "ZIP" && nreg>0){
ConstMargParm = NULL
DynamMargParm = cbind(exp(as.matrix(Regressor)%*%MargParm[1:(nreg+1)]),
1/(1+exp(-as.matrix(Regressor)%*%MargParm[(nreg+2):(nreg*2+2)])))
}
# get the final counts
x = myinvcdf(pnorm(z), ConstMargParm, DynamMargParm)
}
return(as.numeric(x))
}
# simulate from our model - a bit more concice version
sim_lgc = function(n, CountDist, MargParm, ARParm, MAParm, Regressor=NULL, Intercept=NULL, ntrials=NULL,...){
# combine all parameters in on vector
TrueParam = c(MargParm,ARParm,MAParm)
# set ARMAModel orders
ARMAModel = c(length(ARParm), length(MAParm))
# add a column of ones in the Regressors if Intercept is present
if (!is.null(Intercept) && Intercept==TRUE && sum(as.matrix(Regressor)[,1])!=n){
Regressor = as.data.frame(cbind(rep(1,n),Regressor))
names(Regressor)[1] = "Intercept"
}
# parse the model information - needed for distribution functions
mod = ModelScheme(SampleSize = n,
CountDist = CountDist,
ARMAModel = ARMAModel,
TrueParam = TrueParam,
Regressor = Regressor,
Intercept = Intercept,
Task = "Synthesis",
ntrials = ntrials)
# reorganize the parameters in expected format
Parms = RetrieveParameters(TrueParam,mod)
# Generate latent Gaussian model
z =arima.sim(model = list( ar = ARParm, ma=MAParm), n = n)
z = z/sd(z) # standardize the data
# apply the inverse count cdf - check me: in Binomial case ntrials comes as global
if(mod$nreg==0){
x = mod$myinvcdf(pnorm(z), Parms$MargParms)
}else{
x = mod$myinvcdf(pnorm(z), Parms$ConstMargParm, Parms$DynamMargParm)
}
# FIX ME: Should I be returning ts objects?
return(as.numeric(x))
}
#---------Compute AIC, BIC, AICc
Criteria.lgc = function(loglik, mod){
#---------------------------------#
# Purpose: Compute AIC, BIC, AICc for our models
#
# Inputs:
# loglik log likelihood value at optimum
# nparms number of parametersin the model
# n sample size
#
# NOTES: The Gaussian likelihood we are minimizing has the form:
# l0 = 0.5*logdet(G) + 0.5*X'*inv(G)*X. We will need to
# bring this tothe classic form before we compute the
# criteria (see log of lrealtion (8.6.1) in Brockwell and Davis)
#
# No correction is necessary in the PF case
#
# Authors Stefanos Kechagias, James Livsey, Vladas Pipiras
# Date July 2020
# Version 3.6.3
#---------------------------------#
if(mod$EstMethod!="GL"){
l1 = -loglik
}else{
l1 = -loglik - mod$n/2*log(2*pi)
}
AIC = 2*mod$nparms - 2*l1
BIC = log(mod$n)*mod$nparms - 2*l1
AICc = AIC + (2*mod$nparms^2 + 2*mod$nparms)/(mod$n-mod$nparms-1)
AllCriteria = c(AIC, BIC, AICc)
}
logLik.lgc = function(object){
return(object$FitStatistics[1])
}
AIC.lgc = function(object){
return(object$FitStatistics[2])
}
BIC <- function(object, ...) UseMethod("BIC")
BIC.lgc = function(object){
return(object$FitStatistics[3])
}
se <- function(object, ...) UseMethod("se")
se.lgc = function(object){
return(object$StdErrors)
}
coefficients <- function(object, ...) UseMethod("coefficients")
coefficients.lgc = function(object){
return(object$ParamEstimates)
}
model <- function(object, ...) UseMethod("model")
model.lgc = function(object){
if ((object$ARMAModel[1]>0) && (object$ARMAModel[2]>0)){
ARMAModel = sprintf("ARMA(%.0f, %.0f)",object$ARMAModel[1], object$ARMAModel[2])
}
if ((object$ARMAModel[1]>0) && (object$ARMAModel[2]==0)){
ARMAModel = sprintf("AR(%.0f)",object$ARMAModel[1])
}
if ((object$ARMAModel[1]==0) && (object$ARMAModel[2]>0)){
ARMAModel = sprintf("MA(%.0f)",object$ARMAModel[2])
}
if ((object$ARMAModel[1]==0) && (object$ARMAModel[2]==0)){
ARMAModel = "White Noise"
}
a = data.frame(object$CountDist, ARMAModel)
names(a) = c("Distribution", "Model")
return(a)
}
ComputeLimits = function(mod, Parms, t, Zhat, Rt){
# a and b are the arguments in the two normal cdfs in the 4th line in equation (19) in JASA paper
Lim = list()
# fix me: this will be ok for AR or MA models but for ARMA? is it p+q instead of max(p,q)
# fix me: why do I have t(Parms$MargParms)?
index = min(t, max(mod$ARMAModel))
# add the following for White Noise models
if(max(mod$ARMAModel)==0){index=1}
if(mod$nreg==0){
Lim$a = as.numeric((qnorm(mod$mycdf(mod$DependentVar[t]-1,t(Parms$MargParms)),0,1)) - Zhat)/(Rt[index])
Lim$b = as.numeric((qnorm(mod$mycdf(mod$DependentVar[t],t(Parms$MargParms)),0,1)) - Zhat)/Rt[index]
}else{
Lim$a = as.numeric((qnorm(mod$mycdf(mod$DependentVar[t]-1,Parms$ConstMargParm, Parms$DynamMargParm[t,]),0,1)) - Zhat)/Rt[index]
Lim$b = as.numeric((qnorm(mod$mycdf(mod$DependentVar[t],Parms$ConstMargParm, Parms$DynamMargParm[t,]),0,1)) - Zhat)/Rt[index]
}
return(Lim)
}
SampleTruncNormParticles = function(mod, Limit, t, Zhat, Rt){
# relation (21) in JASA paper and the inverse transform method
# check me: this can be improved?
index = min(t, max(mod$ARMAModel))
if(max(mod$ARMAModel)==0){index=1}
z = qnorm(runif(length(Limit$a),0,1)*(pnorm(Limit$b,0,1)-pnorm(Limit$a,0,1))+pnorm(Limit$a,0,1),0,1)*Rt[index] + Zhat
return(z)
}
ComputeZhat_t = function(mod, IA, Z, Zhat,t, Parms){
Theta = IA$thetas
nTheta = length(IA$thetas)
Theta_n = Theta[[nTheta]]
m = max(mod$ARMAModel)
p = mod$ARMAModel[1]
q = mod$ARMAModel[2]
if(m>1 && t<=m) Zhat_t = Theta[[t-1]][1:(t-1)]%*%(Z[(t-1):1,]-Zhat[(t-1):1,])
if(t>m && t<=nTheta){
A = B= 0
if(!is.null(Parms$AR)) A = Parms$AR%*%Z[(t-1):(t-p),]
if(!is.null(Parms$MA)) B = Theta[[t-1]][1:q]%*%(Z[(t-1):(t-q),]-Zhat[(t-1):(t-q),])
Zhat_t = A + B
}
if(t>nTheta){
A = B = 0
if(!is.null(Parms$AR)) A = Parms$AR%*%Z[(t-1):(t-p),]
if(!is.null(Parms$MA)) B = Theta_n[1:q]%*%(Z[(t-1):(t-q),]-Zhat[(t-1):(t-q),])
Zhat_t = A + B
}
return(Zhat_t)
}
ComputeWeights = function(mod, Limit, t, PreviousWeights){
# equation (21) in JASA paper
# update weights
if(t<=max(mod$ARMAModel)){
NewWeights = PreviousWeights*(pnorm(Limit$b,0,1) - pnorm(Limit$a,0,1))
}else{ # fix me: if I add the wgh[t-1,] below as I should the weights become small?
NewWeights = (pnorm(Limit$b,0,1) - pnorm(Limit$a,0,1))
}
return(NewWeights)
}
ResampleParticles = function(mod, wgh, t, Znew){
# relation (26) in JASA paper and following step
# compute normalized weights
wghn = wgh[t,]/sum(wgh[t,])
# effective sample size
ESS = 1/sum(wghn^2)
if(ESS<mod$epsilon*mod$ParticleNumber){
ind = rmultinom(1,mod$ParticleNumber,wghn)
Znew = rep(Znew,ind)
}
return(Znew)
}
RetrieveParameters = function(theta,mod){
Parms = vector(mode = "list", length = 5)
names(Parms) = c("MargParms", "ConstMargParm", "DynamMargParm", "AR", "MA")
# marginal parameters
Parms$MargParms = theta[mod$MargParmIndices]
# regressor parameters
if(mod$nreg>0){
beta = Parms$MargParms[1:(mod$nreg+mod$nint)]
m = exp(as.matrix(mod$Regressor)%*%beta)
}
# GLM type parameters
if(mod$CountDist == "Negative Binomial" && mod$nreg>0){
Parms$ConstMargParm = 1/Parms$MargParms[mod$nreg+mod$nint+1]
Parms$DynamMargParm = Parms$MargParms[mod$nreg+mod$nint+1]*m/(1+Parms$MargParms[mod$nreg+mod$nint+1]*m)
}
if(mod$CountDist == "Generalized Poisson" && mod$nreg>0){
Parms$ConstMargParm = Parms$MargParms[mod$nreg+mod$nint+1]
Parms$DynamMargParm = m
}
if(mod$CountDist == "Generalized Poisson 2" && mod$nreg>0){
Parms$ConstMargParm = Parms$MargParms[mod$nreg+mod$nint+1]
Parms$DynamMargParm = m
}
if(mod$CountDist == "Poisson" && mod$nreg>0){
Parms$ConstMargParm = NULL
Parms$DynamMargParm = m
}
if(mod$CountDist == "Binomial" && mod$nreg>0){
Parms$ConstMargParm = NULL
Parms$DynamMargParm = m/(1+m)
}
if(mod$CountDist == "Mixed Poisson" && mod$nreg>0){
Parms$ConstMargParm = c(Parms$MargParms[2*(mod$nreg+mod$nint)+1])
Parms$DynamMargParm = cbind(m, exp(as.matrix(mod$Regressor)%*%Parms$MargParms[(mod$nreg+mod$nint+1):((mod$nreg+mod$nint)*2)]))
}
if(mod$CountDist == "ZIP" && mod$nreg>0){
Parms$ConstMargParm = Parms$MargParms[mod$nreg+mod$nint+1]
Parms$DynamMargParm = m
}
# Parms$DynamMargParm = as.matrix(Parms$DynamMargParm)
# Parms$AR = NULL
if(mod$ARMAModel[1]>0) Parms$AR = theta[(mod$nMargParms+1):(mod$nMargParms + mod$ARMAModel[1])]
# Parms$MA = NULL
if(mod$ARMAModel[2]>0) Parms$MA = theta[(mod$nMargParms+mod$ARMAModel[1]+1) :
(mod$nMargParms + mod$ARMAModel[1] + mod$ARMAModel[2]) ]
return(Parms)
}
# Mixed Poisson inverse cdf - this will not allow for vector lambda
# qmixpois1 = function(y, lam1, lam2, p){
# yl = length(y)
# x = rep(0,yl)
# for (n in 1:yl){
# while(pmixpois(x[n], lam1, lam2,p) <= y[n]){ # R qpois would use <y; this choice makes the function right-continuous; this does not really matter for our model
# x[n] = x[n]+1
# }
# }
# return(x)
# }
# Inverse CDF (quantile function) for Mixed Poisson distribution
qmixpois1 = function(y, lam1, lam2, p){
yl = length(y) # Length of the input vector y
x = rep(0, yl) # Initialize a vector of zeros to store results
for (n in 1:yl) {
lambda1 = ifelse(length(lam1) > 1, lam1[n], lam1) # Use indexed lambda1 or constant
lambda2 = ifelse(length(lam2) > 1, lam2[n], lam2) # Use indexed lambda2 or constant
# Increment x[n] until the CDF is greater than y[n]
while (pmixpois1(x[n], lambda1, lambda2, p) <= y[n]) {
x[n] = x[n] + 1
}
}
return(x)
}
# Inverse CDF (quantile function) for Mixed Poisson distribution using mapply
qmixpois = function(p, lam1, lam2, prob) {
# Ensure p, lam1, and lam2 are vectors
p = as.vector(p)
# Replicate lam1 and lam2 if they are constants
if (length(lam1) == 1) {
lam1 = rep(lam1, length(p))
}
if (length(lam2) == 1) {
lam2 = rep(lam2, length(p))
}
# Function to calculate quantile for a single set of parameters
find_quantile = function(pi, lambda1, lambda2, prob) {
x = 0
while (pmixpois1(x, lambda1, lambda2, prob) <= pi) {
x = x + 1
}
return(x)
}
# Use mapply to apply the function over vectors p, lam1, and lam2
quantiles = mapply(find_quantile, p, lam1, lam2, MoreArgs = list(prob = prob))
return(quantiles)
}
# Mixed Poisson cdf
pmixpois1 = function(x, lam1, lam2,p){
y = p*ppois(x,lam1) + (1-p)*ppois(x,lam2)
return(y)
}
# mass of Mixed Poisson
dmixpois1 = function(x, lam1, lam2, p){
y = p*dpois(x,lam1) + (1-p)*dpois(x,lam2)
return(y)
}
#-------------------------------------------------------------------------------------#
# these are used when CountDist = "Generalize Poisson". I have now implemented the
# "Generalized Poisson 2" and in the future the following will not be needed.
# Generalized Poisson pdfs
dGpois = function(y,a,m){
#relation 2.4 in Famoye 1994, parametrization through the mean
k = m/(1+a*m)
return( k^y * (1+a*y)^(y-1) * exp(-k*(1+a*y)-lgamma(y+1)))
}
# Generalized Poisson cdf for one m---------#
pgpois1 = function(x,a,m){
M = max(x,0)
bb = dGpois(0:M,a,m)
cc = cumsum(bb)
g = rep(0,length(x))
g[x>=0] = cc[x+1]
return(g)
}
#--------- Generalized Poisson pdf for multiple m---------#
pGpois = function(x,a,m){
if (length(m)>1){
r = mapply(pgpois1, x=x, a, m = m)
}else{
r = pgpois1(x, a, m)
}
return(r)
}
#--------- Generalized Poisson icdf for 1 m---------#
qgpois1 <- function(p,a,m) {
check.list <- pGpois(0:100,a,m)
quantile.vec <- rep(-99,length(p))
for (i in 1:length(p)) {
x <- which(check.list>=p[i],arr.ind = T)[1]
quantile.vec[i] <- x-1
}
return(quantile.vec)
}
#--------- Generalized Poisson icdf for many m---------#
qGpois = function(p,a,m){
if (length(m)>1){
r = mapply(qgpois1, p=p, a, m = m)
}else{
r = qgpois1(p,a,m)
}
return(r)
}
#--------- Generate Gen Poisson datas---------#
rGpois = function(n, a,m){
u = runif(n)
x = qGpois(u,a, m)
return(x)
}
#-------------------------------------------------------------------------------------#
# function that generates random marginal parameter values - used in testing
GenModelParam = function(CountDist,BadParamProb, AROrder, MAOrder, Regressor){
#-----------------Marginal Parameters
if(CountDist=="Poisson"){
# create some "bad" Poisson parameter choices
BadLambda = c(-1,500)
# sample with high probability from "good" choices for lambda and with low prob the "bad" choices
prob = rbinom(1,1,BadParamProb)
# Marginal Parameter
if (is.null(Regressor)){
MargParm = prob*runif(1,0,100) + (1-prob)*sample(BadLambda,1)
}else{
# fix me: I am hard coding 1.2 here but we should probably make this an argument
b0 = runif(1,0,1.2)
b1 = runif(1,0,1.2)
MargParm = c(b0,b1)
}
}
if(CountDist=="Negative Binomial"){
# create some "bad" Poisson parameter choices
Badr_r = c(-1,500)
# sample a probability
p = runif(1,0,1)
# sample with high probability from "good" choices for lambda and with low prob the "bad" choices
prob = rbinom(1,1,BadParamProb)
# Marginal Parameter
if (is.null(Regressor)){
r = prob*runif(1,0,100) + (1-prob)*sample(Badr_r,1)
MargParm = c(r,p)
}else{
# fix me: I am hard coding 1.2 here but we should probably make this an argument
b0 = runif(1,0,1.2)
b1 = runif(1,0,1.2)
k = runif(1,0,1)
MargParm = c(b0,b1,k)
}
}
if(CountDist=="Mixed Poisson"){
# create some "bad" Poisson parameter choices
BadLambda = c(-1,500)
# sample a probability
p = runif(1,0,1)
# sample with high probability from "good" choices for lambda and with low prob the "bad" choices
prob = rbinom(2,1,BadParamProb)
# Marginal Parameter
if (is.null(Regressor)){
lambda1 = prob[1]*runif(1,0,100) + (1-prob[1])*sample(BadLambda,1)
lambda2 = prob[2]*runif(1,0,100) + (1-prob[2])*sample(BadLambda,1)
MargParm = c(lambda1, lambda2, p)
}else{
# fix me: I am hard coding 1.2 here but we should probably make this an argument
b0 = runif(1,0,1.2)
b1 = runif(1,0,1.2)
c0 = runif(1,0,1.2)
c1 = runif(1,0,1.2)
k = runif(1,0,1)
MargParm = c(b0,b1,c0,c1,k)
}
}
if(CountDist=="ZIP"){
# create some "bad" ZIP parameter choices
BadLambda = c(-1,500)
# sample a probability
p = runif(1,0,1)
# sample with high probability from "good" choices for lambda and with low prob the "bad" choices
prob = rbinom(1,1,BadParamProb)
# Marginal Parameter
if (is.null(Regressor)){
lambda = prob*runif(1,0,100) + (1-prob)*sample(BadLambda,1)
MargParm = c(lambda, p)
}else{
# fix me: I am hard coding 1.2 here but we should probably make this an argument
b0 = runif(1,0,1.2)
b1 = runif(1,0,1.2)
c0 = runif(1,0,1.2)
c1 = runif(1,0,1.2)
MargParm = c(b0,b1,c0, c1)
}
}
#------------------ARMA Parameters
# create some "bad" AR parameter choices
BadAR = c(0, -10, 0.99, 1.2)
# create some "bad" MA parameter choices
BadMA = c(0, 10, -0.99, -1.2)
# set the AR Parameters
ARParm = NULL
MAParm = NULL
p = rbinom(1,1,BadParamProb)
if(AROrder) ARParm = p*runif(1,-1, 1)+ (1-p)*sample(BadAR,1)
if(MAOrder) MAParm = p*runif(1,-1, 1)+ (1-p)*sample(BadMA,1)
AllParms = list(MargParm, ARParm, MAParm)
names(AllParms) = c("MargParm", "ARParm", "MAParm")
return(AllParms)
}
GenInitVal = function(AllParms, perturbation){
# select negative or positive perturbation based on a coin flip
#sign = 1-2*rbinom(1,1,0.5)
# adding only negative sign now to ensure stability
sign = -1
MargParm = AllParms$MargParm*(1+sign*perturbation)
PerturbedValues = list(MargParm,NULL,NULL)
names(PerturbedValues) = c("MargParm", "ARParm", "MAParm")
if(!is.null(AllParms$ARParm)) PerturbedValues$ARParm = AllParms$ARParm*(1+sign*perturbation)
if(!is.null(AllParms$MAParm)) PerturbedValues$MAParm = AllParms$MAParm*(1+sign*perturbation)
return(PerturbedValues)
}
# compute residuals
ComputeResiduals= function(theta, mod){
#--------------------------------------------------------------------------#
# PURPOSE: Compute the residuals in relation (66)
#
# NOTES: 1. See "Latent Gaussian Count Time Series Modeling" for more
# details. A first version of the paper can be found at:
# https://arxiv.org/abs/1811.00203
#
# INPUTS:
#
# OUTPUT:
# loglik:
#
# AUTHORS: James Livsey, Vladas Pipiras, Stefanos Kechagias,
# DATE: July 2020
#--------------------------------------------------------------------------#
# retrieve marginal distribution parameters
Parms = RetrieveParameters(theta,mod)
nMargParms = length(Parms$MargParms)
nparms = length(theta)
# check if the number of parameters matches the model setting
if(nMargParms + sum(mod$ARMAModel)!=nparms) stop('The length of theta does not match the model specification.')
# allocate memory
Zhat <- rep(NA,mod$n)
# pGpois funcion doesnt allow for vetor lambda so a for-loop is needed, however the change shouldnt
# be too hard. note the formula subtracts 1 from the counts mod$DependentVar so it may lead to negative numbers
# thats why there is and if else below.
for (i in 1:mod$n){
k <- mod$DependentVar[i]
if (k != 0) {
if(mod$nreg==0){
# Compute limits
a = qnorm(mod$mycdf(mod$DependentVar[i]-1,t(Parms$MargParms)),0,1)
b = qnorm(mod$mycdf(mod$DependentVar[i],t(Parms$MargParms)),0,1)
Zhat[i] <- (exp(-a^2/2)-exp(-b^2/2))/sqrt(2*pi)/(mod$mycdf(k, t(Parms$MargParms))-mod$mycdf(k-1,t(Parms$MargParms)))
}else{
a = qnorm(mod$mycdf(mod$DependentVar[i]-1, mod$ConstMargParm, mod$DynamMargParm[i]) ,0,1)
b = qnorm(mod$mycdf(mod$DependentVar[i], mod$ConstMargParm, mod$DynamMargParm[i]) ,0,1)
Zhat[i] <- (exp(-a^2/2)-exp(-b^2/2))/sqrt(2*pi)/(mod$mycdf(k, mod$ConstMargParm, mod$DynamMargParm[i])-mod$mycdf(k-1,mod$ConstMargParm, mod$DynamMargParm[i]))
}
}else{
if(mod$nreg==0){
# Compute integral limits
b = qnorm(mod$mycdf(mod$DependentVar[i],t(Parms$MargParms)),0,1)
Zhat[i] <- -exp(-b^2/2)/sqrt(2*pi)/mod$mycdf(0, t(Parms$MargParms))
}else{
b = qnorm(mod$mycdf(mod$DependentVar[i], mod$ConstMargParm, mod$DynamMargParm[i]) ,0,1)
Zhat[i] <- -exp(-b^2/2)/sqrt(2*pi)/mod$mycdf(0, mod$ConstMargParm, mod$DynamMargParm[i])
}
}
}
# apply AR filter--Fix me allow for MA as well
residual = data.frame(filter(Zhat,c(1,-Parms$AR))[1:(n-mod$ARMAModel[1])])
names(residual) = "residual"
return(residual)
}
# Retrieve the name of the current marginal disgribution
CurrentDist = function(theta,mod){
# check me: does it work for models with regressors?
name = sprintf("%s=%.2f",mod$parmnames[1],theta[1])
if (mod$nMargParms>1){
for(i in 2:mod$nMargParms){
#name = paste(name,initialParam[i],sep=",")
a = sprintf("%s=%.2f",mod$parmnames[i],theta[i])
name = paste(name,a,sep=",")
}
}
name = paste(mod$CountDist, "(", name,")", sep="")
return(name)
}
# Modifying the partcile filter function to return Predictive distribution
PDvalues = function(theta, mod){
#------------------------------------------------------------------------------------#
# PURPOSE: Compute predictive distribution
#
#
# AUTHORS: James Livsey, Vladas Pipiras, Stefanos Kechagias,
# DATE: July 2020
#------------------------------------------------------------------------------------#
old_state <- get_rand_state()
on.exit(set_rand_state(old_state))
# Retrieve parameters and save them in a list called Parms
Parms = RetrieveParameters(theta,mod)
# check for causality
if( CheckStability(Parms$AR,Parms$MA) ) return(10^(8))
# Initialize the negative log likelihood computation
nloglik = ifelse(mod$nreg==0, - log(mod$mypdf(mod$DependentVar[1],Parms$MargParms)),
- log(mod$mypdf(mod$DependentVar[1], Parms$ConstMargParm, Parms$DynamMargParm[1])))
# retrieve AR, MA orders and their max
m = max(mod$ARMAModel)
p = mod$ARMAModel[1]
q = mod$ARMAModel[2]
# Compute ARMA covariance up to lag n-1
a = list()
if(!is.null(Parms$AR)){
a$phi = Parms$AR
}else{
a$phi = 0
}
if(!is.null(Parms$MA)){
a$theta = Parms$MA
}else{
a$theta = 0
}
a$sigma2 = 1
gamma = itsmr::aacvf(a,mod$n)
# Compute coefficients of Innovations Algorithm see 5.2.16 and 5.3.9 in in Brockwell Davis book
IA = InnovAlg(Parms, gamma, mod)
Theta = IA$thetas
Rt = sqrt(IA$v)
# Get the n such that |v_n-v_{n-1}|< mod$maxdiff. check me: does this guarantee convergence of Thetas?
nTheta = IA$n
Theta_n = Theta[[nTheta]]
# allocate matrices for weights, particles and predictions of the latent series
w = matrix(0, mod$n, mod$ParticleNumber)
Z = matrix(0, mod$n, mod$ParticleNumber)
Zhat = matrix(0, mod$n, mod$ParticleNumber)
# to collect the values of predictive distribution of interest
preddist = matrix(0,2,mod$n)
# initialize particle filter weights
w[1,] = rep(1,mod$ParticleNumber)
# Compute the first integral limits Limit$a and Limit$b
Limit = ComputeLimits(mod, Parms, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
# Initialize the particles using N(0,1) variables truncated to the limits computed above
#Z[1,] = SampleTruncNormParticles(mod, Limit$a, Limit$b, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
Z[1,] = SampleTruncNormParticles(mod, Limit, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
for (t in 2:mod$n){
# compute Zhat_t
Zhat[t,] = ComputeZhat_t(mod, IA, Z, Zhat,t, Parms)
# compute a,b and temp
temp = rep(0,(mod$DependentVar[t]+1))
index = min(t, max(mod$ARMAModel))
for (x in 0:mod$DependentVar[t]){
# Compute integral limits
if(mod$nreg==0){
a = as.numeric(qnorm(mod$mycdf(x-1,Parms$MargParms),0,1) - Zhat[t,])/Rt[index]
b = as.numeric(qnorm(mod$mycdf(x, Parms$MargParms),0,1) - Zhat[t,])/Rt[index]
}else{
a = as.numeric(qnorm(mod$mycdf(x-1,Parms$ConstMargParm, Parms$DynamMargParm[t]),0,1) - Zhat[t,])/Rt[index]
b = as.numeric(qnorm(mod$mycdf(x, Parms$ConstMargParm, Parms$DynamMargParm[t]),0,1) - Zhat[t,])/Rt[index]
}
temp[x+1] = mean(pnorm(b,0,1) - pnorm(a,0,1))
}
# Compute integral limits
Limit = ComputeLimits(mod, Parms, t, Zhat[t,], Rt)
# Sample truncated normal particles
Znew = SampleTruncNormParticles(mod, Limit, t, Zhat[t,], Rt)
# update weights
w[t,] = ComputeWeights(mod, Limit, t, w[(t-1),])
# check me: break if I got NA weight
if (any(is.na(w[t,]))| sum(w[t,])==0 ){
message(sprintf('WARNING: At t=%.0f some of the weights are either too small or sum to 0',t))
return(10^8)
}
# Resample the particles using common random numbers
old_state1 = get_rand_state()
Znew = ResampleParticles(mod, w, t, Znew)
set_rand_state(old_state1)
# save the current particle
Z[t,] = Znew
if (mod$DependentVar[t]==0){
preddist[,t] = c(0,temp[1])
}else{
preddist[,t] = cumsum(temp)[mod$DependentVar[t]:(mod$DependentVar[t]+1)]
}
}
return(preddist)
}
# Compute PIT values - see relations (39) - (40) in JASA paper
PITvalues = function(H, predDist){
PITvalues = rep(0,H)
predd1 = predDist[1,]
predd2 = predDist[2,]
Tr = length(predd1)
for (h in 1:H){
id1 = (predd1 < h/H)*(h/H < predd2)
id2 = (h/H >= predd2)
tmp1 = (h/H-predd1)/(predd2-predd1)
tmp1[!id1] = 0
tmp2 = rep(0,Tr)
tmp2[id2] = 1
PITvalues[h] = mean(tmp1+tmp2)
}
PITvalues = c(0,PITvalues)
return(diff(PITvalues))
}
# parse the values form a standard formula
parse_formula <- function(formula) {
if (!inherits(formula, "formula")) {
stop("Input must be a formula.")
}
# Extract the terms object from the formula
terms_obj <- terms(formula)
# Extract the response variable (left-hand side)
DependentVar <- as.character(formula[[2]])
# Extract the predictor terms (right-hand side)
Regressor <- attr(terms_obj, "term.labels")
# If there are no predictors, return NULL
if (length(Regressor) == 0) {
Regressor <- NULL
}
# Check if the intercept is included (1 if included, 0 if excluded)
has_intercept <- attr(terms_obj, "intercept") == 1
# Return a list with response, predictors, and intercept status
return(list(
DependentVar = DependentVar,
Regressor = Regressor,
intercept = has_intercept
))
}
#===========================================================================================#
#--------------------------------------------------------------------------------#
# On August 2024 for Issue #27, I created a modified likelihood function called
# ParticleFilter_Res_ARMA_MisSpec with three changes:
#
# 1. The value 1 for the count CDF calculations is changed to 1-10^(-16), so that
# when inverse normal is applied in the copula, we do not receive an inf value.
# 2. When the limits of the truncated normal distribution become too large (say>7),
# I set them equal to 7-epsilon, so when I apply the normal cdf and subsequnetly the
# inverse cdf i avoid the inf values.
# 3. If the weights in the particle filter likelihood become 0 I set them equal to
# 10^(-64).
#
# I have added the changes in the ParticleFilter_Res_ARMA_MisSpec, ComputeLimits_MisSpec
# and SampleTruncNormParticles_MisSpec files and wil lcontinue working with the
# standard versions below until I perform adequate testing.
# PF likelihood with resampling for ARMA(p,q) - with adhoc truncations
ParticleFilter_Res_ARMA_MisSpec = function(theta, mod){
#------------------------------------------------------------------------------------#
# PURPOSE: Use particle filtering with resampling to approximate the likelihood
# of the a specified count time series model with an underlying MA(1)
# dependence structure.
#
# NOTES: 1. See "Latent Gaussian Count Time Series Modeling" for more
# details. A first version of the paper can be found at:
# https://arxiv.org/abs/1811.00203
# 2. This function is very similar to LikSISGenDist_ARp but here
# I have a resampling step.
#
# INPUTS:
# theta: parameter vector
# data: data
# ParticleNumber: number of particles to be used.
# Regressor: independent variable
# CountDist: count marginal distribution
# epsilon resampling when ESS<epsilon*N
#
# OUTPUT:
# loglik: approximate log-likelihood
#
#
# AUTHORS: James Livsey, Vladas Pipiras, Stefanos Kechagias,
# DATE: July 2020
#------------------------------------------------------------------------------------#
old_state <- get_rand_state()
on.exit(set_rand_state(old_state))
# Retrieve parameters and save them in a list called Parms
Parms = RetrieveParameters(theta,mod)
# check for causality
if( CheckStability(Parms$AR,Parms$MA) ) return(10^(8))
# Initialize the negative log likelihood computation
nloglik = ifelse(mod$nreg==0, - log(mod$mypdf(mod$DependentVar[1],Parms$MargParms)),
- log(mod$mypdf(mod$DependentVar[1], Parms$ConstMargParm, Parms$DynamMargParm[1])))
# retrieve AR, MA orders and their max
m = max(mod$ARMAModel)
p = mod$ARMAModel[1]
q = mod$ARMAModel[2]
# Compute ARMA covariance up to lag n-1
a = list()
if(!is.null(Parms$AR)){
a$phi = Parms$AR
}else{
a$phi = 0
}
if(!is.null(Parms$MA)){
a$theta = Parms$MA
}else{
a$theta = 0
}
a$sigma2 = 1
gamma = itsmr::aacvf(a,mod$n)
# Compute coefficients of Innovations Algorithm see 5.2.16 and 5.3.9 in in Brockwell Davis book
IA = InnovAlg(Parms, gamma, mod)
Theta = IA$thetas
Rt = sqrt(IA$v)
# Get the n such that |v_n-v_{n-1}|< mod$maxdiff. check me: does this guarantee convergence of Thetas?
nTheta = IA$n
Theta_n = Theta[[nTheta]]
# allocate matrices for weights, particles and predictions of the latent series
w = matrix(0, mod$n, mod$ParticleNumber)
Z = matrix(0, mod$n, mod$ParticleNumber)
Zhat = matrix(0, mod$n, mod$ParticleNumber)
# initialize particle filter weights
w[1,] = rep(1,mod$ParticleNumber)
# Compute the first integral limits Limit$a and Limit$b
Limit = ComputeLimits_MisSpec(mod, Parms, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
# Initialize the particles using N(0,1) variables truncated to the limits computed above
#Z[1,] = SampleTruncNormParticles(mod, Limit$a, Limit$b, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
Z[1,] = SampleTruncNormParticles_MisSpec(mod, Limit, Parms, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
for (t in 2:mod$n){
# compute Zhat_t
#Zhat[t,] = ComputeZhat_t(m,Theta,Z,Zhat,t, Parms,p,q, nTheta, Theta_n)
Zhat[t,] = ComputeZhat_t(mod, IA, Z, Zhat,t, Parms)
# Compute integral limits
Limit = ComputeLimits_MisSpec(mod, Parms, t, Zhat[t,], Rt)
# Sample truncated normal particles
#Znew = SampleTruncNormParticles(mod, Limit$a, Limit$b, t, Zhat[t,], Rt)
Znew = SampleTruncNormParticles_MisSpec(mod, Limit, Parms, t, Zhat[t,], Rt)
# update weights
#w[t,] = ComputeWeights(mod, Limit$a, Limit$b, t, w[(t-1),])
w[t,] = ComputeWeights(mod, Limit, t, w[(t-1),])
#print(t)
#print(w[t,])
# check me: In misspecified models, the weights may get equal to 0. Is it ok
# for me to do the following? how is this different from allowing zero weights and
# returning a large likelihood?
if (sum(w[t,])==0){
w[t,] = rep(10^(-64),mod$ParticleNumber)
message(sprintf("At t = %.0f all weights are equal to 0. They are resetted to equal 1e-64.",t))
}
# check me: break if I got NA weight
if (any(is.na(w[t,]))| sum(w[t,])==0 ){
#print(t)
#print(w[t,])
message(sprintf('WARNING: At t=%.0f some of the weights are either too small or sum to 0',t))
return(10^8)
}
# Resample the particles using common random numbers
old_state1 = get_rand_state()
Znew = ResampleParticles(mod, w, t, Znew)
set_rand_state(old_state1)
# save the current particle
Z[t,] = Znew
# update likelihood
nloglik = nloglik - log(mean(w[t,]))
}
# for log-likelihood we use a bias correction--see par2.3 in Durbin Koopman, 1997
# nloglik = nloglik- (1/(2*N))*(var(na.omit(wgh[T1,]))/mean(na.omit(wgh[T1,])))/mean(na.omit(wgh[T1,]))
# if (nloglik==Inf | is.na(nloglik)){
# nloglik = 10^8
# }
return(nloglik)
}
ComputeLimits_MisSpec = function(mod, Parms, t, Zhat, Rt){
# a and b are the arguments in the two normal cdfs in the 4th line in equation (19) in JASA paper
Lim = list()
# fix me: this will be ok for AR or MA models but for ARMA? is it p+q instead of max(p,q)
# fix me: why do I have t(Parms$MargParms)?
index = min(t, max(mod$ARMAModel))
# add the following for White Noise models
if(max(mod$ARMAModel)==0) index=1
if(mod$nreg==0){
C_1 = mod$mycdf(mod$DependentVar[t]-1,t(Parms$MargParms))
C = mod$mycdf(mod$DependentVar[t],t(Parms$MargParms))
# fix me: need to implement for regressors as well
if(C_1==1) {
C_1 = 1-10^(-16)
# retrieve the name of the current distribution
CurrentDist = CurrentDist(Parms$MargParms, mod)
# warn the user of the situation
message(sprintf("To avoid an Inf inverse cdf value, 1e-16 was
subtracted from C_xt_1 in relation (19). C_xt_1 = 1 can be caused by
a misspecified marginal distribution, lack of relevant covariates, or
an optimizer seaching the parameter space away from the truth. Here a
%s model is fitted, but at t=%.0f the dependent variable is
equal to %.0f.\n",CurrentDist, t,mod$DependentVar[t]))
}
if(C==1 ) {
# retrieve the name of the current distribution
CurrentDist = CurrentDist(Parms$MargParms, mod)
C = 1-10^(-16)
message(sprintf("To avoid an Inf inverse cdf value, 1e-16 was
subtracted from C_xt in relation (19). C_xt_1 = 1 can be caused by
a misspecified marginal distribution, lack of relevant covariates,
or an optimizer seaching the parameter space away from the truth. Here
a %s model is fitted, but at t=%.0f the dependent variable is
equal to %.0f.\n",CurrentDist,t,mod$DependentVar[t]))
}
Lim$a = as.numeric((qnorm(C_1,0,1)) - Zhat)/Rt[index]
Lim$b = as.numeric((qnorm(C ,0,1)) - Zhat)/Rt[index]
}else{
Lim$a = as.numeric((qnorm(mod$mycdf(mod$DependentVar[t]-1,Parms$ConstMargParm, Parms$DynamMargParm[t,]),0,1)) - Zhat)/Rt[index]
Lim$b = as.numeric((qnorm(mod$mycdf(mod$DependentVar[t],Parms$ConstMargParm, Parms$DynamMargParm[t,]),0,1)) - Zhat)/Rt[index]
}
return(Lim)
}
SampleTruncNormParticles_MisSpec = function(mod, Limit, Parms, t, Zhat, Rt){
# relation (21) in JASA paper and the inverse transform method
# check me: this can be improved?
index = min(t, max(mod$ARMAModel))
if(max(mod$ARMAModel)==0){index=1}
# Check me: in the case of missspecifed models, I may get really large values for the limits
# wchich means that the pnorm will equal 1 and then the qnorm will yield Inf. Is this ok?
if (min(Limit$a)>=7 ) {
# retrieve the name of the current distribution
CurrentDist = CurrentDist(Parms$MargParms, mod)
Limit$a[Limit$a>=7] = 7 - 10^(-11)
message(sprintf("To avoid an Inf inverse cdf value, 1e-11 was subtracted from
the lower limit of the truncated Normal distribution used to generate new particles.
Large limits can be caused by a misspecified marginal distribution, lack of relevant
covariates, or an optimizer seaching the parameter space away from the truth among
other things. Here a %s model is fitted, but at t=%.0f the dependent
variable is equal to %.0f.\n", CurrentDist, t,mod$DependentVar[t]))
}
# if (max(Limit$a)<=-7 ) {
# # retrieve the name of the current distribution
# CurrentDist = CurrentDist(Parms$MargParms, mod)
# Limit$a[Limit$a<=-7] = -7 + 10^(-11)
# message(sprintf("To avoid an Inf inverse cdf value, 1e-11 was added to
# the lower limit of the truncated Normal distribution used to generate new particles.
# Large limits can be caused by a misspecified marginal distribution, lack of relevant
# covariates, or an optimizer seaching the parameter space away from the truth among
# other things. Here a %s model is fitted, but at t=%.0f the dependent
# variable is equal to %.0f.\n", CurrentDist,t,mod$DependentVar[t]))
# }
if (min(Limit$b)>=7 ) {
# retrieve the name of the current distribution
CurrentDist = CurrentDist(Parms$MargParms, mod)
Limit$b[Limit$b>=7] = 7 - 10^(-11)
message(sprintf("To avoid an Inf inverse cdf value, 1e-11 was subtracted from
the upper limit of the truncated Normal distribution used to generate new particles.
Large limits can be caused by a misspecified marginal distribution, lack of relevant
covariates, or an optimizer seaching the parameter space away from the truth among
other things. Here a %s model is fitted, but at t=%.0f the dependent
variable is equal to %.0f.\n", CurrentDist, t,mod$DependentVar[t]))
}
# if (max(Limit$b)<=-7 ) {
# # retrieve the name of the current distribution
# CurrentDist = CurrentDist(Parms$MargParms, mod)
# Limit$b[Limit$b<=-7] = -7 + 10^(-11)
# message(sprintf("To avoid an Inf inverse cdf value, 1e-11 was added to
# the upper limit of the truncated Normal distribution used to generate new particles.
# Large limits can be caused by a misspecified marginal distribution, lack of relevant
# covariates, or an optimizer seaching the parameter space away from the truth among
# other things. Here a %s model is fitted, but at t=%.0f the dependent
# variable is equal to %.0f.", CurrentDist,t,mod$DependentVar[t]))
# }
z = qnorm(runif(length(Limit$a),0,1)*(pnorm(Limit$b,0,1)-pnorm(Limit$a,0,1))+pnorm(Limit$a,0,1),0,1)*Rt[index] + Zhat
return(z)
}
#############################################################################################
#-------------------------------------------------------------------------------------------#
# functions we wrote, and may use in the future
# poisson cdf using incomplete gamma and its derivative wrt to lambda
myppois = function(x, lambda){
# compute poisson cdf as the ratio of an incomplete gamma function over the standard gamma function
# I will also compute the derivative of the poisson cdf wrt lambda
X = c(lambda,x+1)
v1 = gammainc(X)
v2 = gamma(x+1)
# straight forward formula from the definition of incomplete gamma integral
v1_d = -lambda^x*exp(-lambda)
v2_d = 0
z = v1/v2
z_d = (v1_d*v2 - v2_d*v1)/v2^2
return(c(z,z_d))
}
# PF likelihood with resampling for AR(p)
ParticleFilter_Res_AR_old = function(theta, mod){
#--------------------------------------------------------------------------#
# PURPOSE: Use particle filtering with resampling
# to approximate the likelihood of the
# a specified count time series model with an underlying AR(p)
# dependence structure.
#
#
# INPUTS:
# theta: parameter vector
# mod: a list containing all t he information for the model, such as
# count distribution. ARMA model, etc
# OUTPUT:
# loglik: approximate log-likelihood
#
# AUTHORS: James Livsey, Vladas Pipiras, Stefanos Kechagias
# DATE: July 2020
#--------------------------------------------------------------------------#
# keep track of the random seed to use common random numbers
old_state <- get_rand_state()
on.exit(set_rand_state(old_state))
# Retrieve parameters ans save them in a li
Parms = RetrieveParameters(theta,mod)
# check for causality
if( CheckStability(Parms$AR,Parms$MA) ) return(10^(8))
# Initialize the negative log likelihood computation
nloglik = ifelse(mod$nreg==0, - log(mod$mypdf(mod$DependentVar[1],Parms$MargParms)),
- log(mod$mypdf(mod$DependentVar[1], Parms$ConstMargParm, Parms$DynamMargParm[1])))
# Compute the theoretical covariance for the AR model for current estimate
gt = ARMAacf(ar = Parms$AR, ma = Parms$MA,lag.max = mod$n)
# Compute the best linear predictor coefficients and errors using Durbin Levinson
Phi = list()
for (t in 2:mod$n){
CurrentDL = DLAcfToAR(gt[2:t])
Phi[[t-1]] = CurrentDL[,1]
}
Rt = c(1,sqrt(as.numeric(CurrentDL[,3])))
# allocate memory for particle weights and the latent Gaussian Series particles, check me: do I weights for 1:T or only 2?
w = matrix(0, mod$n, mod$ParticleNumber)
Z = matrix(0, max(mod$ARMAModel), mod$ParticleNumber)
#====================== Start the SIS algorithm ======================#
# Initialize the weights
w[1,] = rep(1,mod$ParticleNumber)
# Compute the first integral limits Limit$ a and Limit$b
Limit = ComputeLimits(mod, Parms, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
# Initialize particles from truncated normal distribution
#Z[1,] = SampleTruncNormParticles(mod, Limit$a, Limit$b, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
Z[1,] = SampleTruncNormParticles(mod, Limit, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
# =================== Loop over t ===================== #
for (t in 2:mod$n){
# Compute the latent Gaussian predictions Zhat_t using Innovations Algorithm
if(t==2 || mod$ParticleNumber==1){
Zhat = Z[1:(t-1),] %*% Phi[[t-1]]
}else{
Zhat = colSums(Z[1:(t-1),] %*% Phi[[t-1]])
}
# Compute integral limits
Limit = ComputeLimits(mod, Parms, t, Zhat, Rt)
# Sample truncated normal particles
#Znew = SampleTruncNormParticles(mod, Limit$a, Limit$b,t, Zhat, Rt)
Znew = SampleTruncNormParticles(mod, Limit, t, Zhat, Rt)
# update weights
#w[t,] = ComputeWeights(mod, Limit$a, Limit$b, t, w[(t-1),])
w[t,] = ComputeWeights(mod, Limit, t, w[(t-1),])
# check me: break if I got NA weight
if (any(is.na(w[t,]))| sum(w[t,])==0 ){
message(sprintf('WARNING: Some of the weights are either too small or sum to 0'))
return(10^8)
}
# Resample the particles using common random numbers
old_state1 = get_rand_state()
Znew = ResampleParticles(mod, w, t, Znew)
set_rand_state(old_state1)
# Combine current particles, with particles from previous iterations
Z = rbind(Znew, as.matrix(Z[1:(t-1),]))
# if (mod$ARMAModel[1]>1){
# Z = rbind(Znew, Z[1:( min(t,max(mod$ARMAModel)) -1),])
# }else {
# Z[1,]=Znew
# }
# update log-likelihood
nloglik = nloglik - log(mean(w[t,]))
}
return(nloglik)
}
# PF likelihood with resampling for MA(q)
ParticleFilter_Res_MA_old = function(theta, mod){
#------------------------------------------------------------------------------------#
# PURPOSE: Use particle filtering with resampling to approximate the likelihood
# of the a specified count time series model with an underlying MA(1)
# dependence structure.
#
# NOTES: 1. See "Latent Gaussian Count Time Series Modeling" for more
# details. A first version of the paper can be found at:
# https://arxiv.org/abs/1811.00203
# 2. This function is very similar to LikSISGenDist_ARp but here
# I have a resampling step.
#
# INPUTS:
# theta: parameter vector
# data: data
# ParticleNumber: number of particles to be used.
# Regressor: independent variable
# CountDist: count marginal distribution
# epsilon resampling when ESS<epsilon*N
#
# OUTPUT:
# loglik: approximate log-likelihood
#
#
# AUTHORS: James Livsey, Vladas Pipiras, Stefanos Kechagias,
# DATE: July 2020
#------------------------------------------------------------------------------------#
old_state <- get_rand_state()
on.exit(set_rand_state(old_state))
# Retrieve parameters and save them in a list called Parms
Parms = RetrieveParameters(theta,mod)
# check for causality
if( CheckStability(Parms$AR,Parms$MA) ) return(10^(8))
# Initialize the negative log likelihood computation
nloglik = ifelse(mod$nreg==0, - log(mod$mypdf(mod$DependentVar[1],Parms$MargParms)),
- log(mod$mypdf(mod$DependentVar[1], Parms$ConstMargParm, Parms$DynamMargParm[1])))
# Compute covariance up to lag n-1
gt = as.vector(ARMAacf(ar = Parms$AR, ma = Parms$MA, lag.max = mod$n))
# Compute coefficients of Innovations Algorithm see 5.2.16 and 5.3.9 in in Brockwell Davis book
IA = innovations.algorithm(gt)
Theta = IA$thetas
Rt = sqrt(IA$v)
# allocate matrices for weights, particles and innovations which are equal to Z-Zhat
w = matrix(0, mod$n, mod$ParticleNumber)
Z = matrix(0, max(mod$ARMAModel), mod$ParticleNumber)
Inn = matrix(0, max(mod$ARMAModel), mod$ParticleNumber)
# particle filter weights
w[1,] = rep(1,mod$ParticleNumber)
# Compute the first integral limits Limit$ a and Limit$b
Limit = ComputeLimits(mod, Parms, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
# Generate N(0,1) variables restricted to (ai,bi),i=1,...n
#Z[1,] = SampleTruncNormParticles(mod, Limit$a, Limit$b, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
Z[1,] = SampleTruncNormParticles(mod, Limit, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
# Compute the first innovation (Zhat_1=0)
Inn[1,] = Z[1,]
for (t in 2:mod$n){
# Compute the latent Gaussian predictions Zhat_t using Innovations Algorithm - see 5.3.9 in Brockwell Davis book
if(t==2 || mod$ParticleNumber==1){
Zhat = Inn[1:(min(t-1,mod$nMA)),] %*% Theta[[t-1]][1:(min(t-1,mod$nMA))]
}else{
Zhat = colSums(Inn[1:(min(t-1,mod$nMA)),] %*% Theta[[t-1]][1:(min(t-1,mod$nMA))])
}
# Compute integral limits
Limit = ComputeLimits(mod, Parms, t, Zhat, Rt)
# Sample truncated normal particles
#Znew = SampleTruncNormParticles(mod, Limit$a, Limit$b, t, Zhat, Rt)
Znew = SampleTruncNormParticles(mod, Limit, t, Zhat, Rt)
# update weights
#w[t,] = ComputeWeights(mod, Limit$a, Limit$b, t, w[(t-1),])
w[t,] = ComputeWeights(mod, Limit, t, w[(t-1),])
# check me: break if I got NA weight
if (any(is.na(w[t,]))| sum(w[t,])==0 ){
message(sprintf('WARNING: Some of the weights are either too small or sum to 0'))
return(10^8)
}
# Resample the particles using common random numbers
old_state1 = get_rand_state()
Znew = ResampleParticles(mod, w, t, Znew)
set_rand_state(old_state1)
# Compute the new Innovation
InnNew = Znew - Zhat
# Combine current particles, with particles from previous iterations
Inn = rbind(InnNew, as.matrix(Inn[1:min(t-1,mod$nMA),]))
# update likelihood
nloglik = nloglik - log(mean(w[t,]))
}
# for log-likelihood we use a bias correction--see par2.3 in Durbin Koopman, 1997
# nloglik = nloglik- (1/(2*N))*(var(na.omit(wgh[T1,]))/mean(na.omit(wgh[T1,])))/mean(na.omit(wgh[T1,]))
# if (nloglik==Inf | is.na(nloglik)){
# nloglik = 10^8
# }
return(nloglik)
}
# PF likelihood with resampling
ParticleFilter_Res = function(theta, mod){
#--------------------------------------------------------------------------#
# PURPOSE: Use particle filtering with resampling
# to approximate the likelihood of the
# a specified count time series model with an underlying AR(p)
# dependence structure or MA(q) structure.
#
# NOTES: 1. See "Latent Gaussian Count Time Series Modeling" for more
# details. A first version of the paer can be found at:
# https://arxiv.org/abs/1811.00203
#
# INPUTS:
# theta: parameter vector
# data: dependent variable
# Regressor: independent variables
# ParticleNumber: number of particles to be used in likelihood approximation
# CountDist: count marginal distribution
# epsilon resampling when ESS<epsilon*N
#
# OUTPUT:
# loglik: approximate log-likelihood
#
#
# AUTHORS: James Livsey, Vladas Pipiras, Stefanos Kechagias,
# DATE: July 2020
#--------------------------------------------------------------------------#
# Pure AR model
if(mod$ARMAModel[1]>0 && mod$ARMAModel[2]==0) loglik = ParticleFilter_Res_AR(theta, mod)
# Pure MA model or White noise
if(mod$ARMAModel[1]==0&& mod$ARMAModel[2]>=0) loglik = ParticleFilter_Res_MA(theta, mod)
return(loglik)
}
# innovations algorithm code - use in previous likelihood implementations
innovations.algorithm <- function(gamma){
n.max=length(gamma)-1
# Found this online need to check it
# http://faculty.washington.edu/dbp/s519/R-code/innovations-algorithm.R
thetas <- vector(mode="list",length=n.max)
v <- rep(gamma[1],n.max+1)
for(n in 1:n.max){
thetas[[n]] <- rep(0,n)
thetas[[n]][n] <- gamma[n+1]/v[1]
if(n>1){
for(k in 1:(n-1)){
js <- 0:(k-1)
thetas[[n]][n-k] <- (gamma[n-k+1] - sum(thetas[[k]][k-js]*thetas[[n]][n-js]*v[js+1]))/v[k+1]
}
}
js <- 0:(n-1)
v[n+1] <- v[n+1] - sum(thetas[[n]][n-js]^2*v[js+1])
}
v = v/v[1]
return(structure(list(v=v,thetas=thetas)))
}
# PF likelihood with resampling for AR(p) - written more concicely
ParticleFilter_Res_AR = function(theta, mod){
#--------------------------------------------------------------------------#
# PURPOSE: Use particle filtering with resampling
# to approximate the likelihood of the
# a specified count time series model with an underlying AR(p)
# dependence structure.
#
#
# INPUTS:
# theta: parameter vector
# mod: a list containing all t he information for the model, such as
# count distribution. ARMA model, etc
# OUTPUT:
# loglik: approximate log-likelihood
#
# AUTHORS: James Livsey, Vladas Pipiras, Stefanos Kechagias
# DATE: July 2020
#--------------------------------------------------------------------------#
# keep track of the random seed to use common random numbers
old_state <- get_rand_state()
on.exit(set_rand_state(old_state))
# Retrieve parameters ans save them in a li
Parms = RetrieveParameters(theta,mod)
# check for causality
if( CheckStability(Parms$AR,Parms$MA) ) return(10^(8))
# Initialize the negative log likelihood computation
nloglik = ifelse(mod$nreg==0, - log(mod$mypdf(mod$DependentVar[1],Parms$MargParms)),
- log(mod$mypdf(mod$DependentVar[1], Parms$ConstMargParm, Parms$DynamMargParm[1])))
# Compute the theoretical covariance for the AR model for current estimate
gt = ARMAacf(ar = Parms$AR, ma = Parms$MA,lag.max = mod$n)
# Compute the best linear predictor coefficients and errors using Durbin Levinson
Phi = list()
for (t in 2:mod$n){
CurrentDL = DLAcfToAR(gt[2:t])
Phi[[t-1]] = CurrentDL[,1]
}
Rt = c(1,sqrt(as.numeric(CurrentDL[,3])))
# allocate memory for particle weights and the latent Gaussian Series particles, check me: do I weights for 1:T or only 2?
w = matrix(0, mod$n, mod$ParticleNumber)
Z = matrix(0, max(mod$ARMAModel), mod$ParticleNumber)
#====================== Start the SIS algorithm ======================#
# Initialize the weights
w[1,] = rep(1,mod$ParticleNumber)
# Compute the first integral limits Limit$ a and Limit$b
Limit = ComputeLimits(mod, Parms, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
# Initialize particles from truncated normal distribution
#Z[1,] = SampleTruncNormParticles(mod, Limit$a, Limit$b, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
Z[1,] = SampleTruncNormParticles(mod, Limit, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
# =================== Loop over t ===================== #
for (t in 2:mod$n){
# Compute the latent Gaussian predictions Zhat_t using Innovations Algorithm
Zhat = Phi[[t-1]]%*%Z[1:(t-1),]
# Compute integral limits
Limit = ComputeLimits(mod, Parms, t, Zhat, Rt)
# Sample truncated normal particles
#Znew = SampleTruncNormParticles(mod, Limit$a, Limit$b,t, Zhat, Rt)
Znew = SampleTruncNormParticles(mod, Limit, t, Zhat, Rt)
# update weights
#w[t,] = ComputeWeights(mod, Limit$a, Limit$b, t, w[(t-1),])
w[t,] = ComputeWeights(mod, Limit, t, w[(t-1),])
# check me: break if I got NA weight
if (any(is.na(w[t,]))| sum(w[t,])==0 ){
message(sprintf('WARNING: Some of the weights are either too small or sum to 0'))
return(10^8)
}
# Resample the particles using common random numbers
old_state1 = get_rand_state()
Znew = ResampleParticles(mod, w, t, Znew)
set_rand_state(old_state1)
# Combine current particles, with particles from previous iterations
Z = rbind(Znew, matrix(Z[1:(t-1),],ncol = mod$ParticleNumber))
# update log-likelihood
nloglik = nloglik - log(mean(w[t,]))
}
return(nloglik)
}
# PF likelihood with resampling for MA(q)
ParticleFilter_Res_MA = function(theta, mod){
#------------------------------------------------------------------------------------#
# PURPOSE: Use particle filtering with resampling to approximate the likelihood
# of the a specified count time series model with an underlying MA(1)
# dependence structure.
#
# NOTES: 1. See "Latent Gaussian Count Time Series Modeling" for more
# details. A first version of the paper can be found at:
# https://arxiv.org/abs/1811.00203
# 2. This function is very similar to LikSISGenDist_ARp but here
# I have a resampling step.
#
# INPUTS:
# theta: parameter vector
# data: data
# ParticleNumber: number of particles to be used.
# Regressor: independent variable
# CountDist: count marginal distribution
# epsilon resampling when ESS<epsilon*N
#
# OUTPUT:
# loglik: approximate log-likelihood
#
#
# AUTHORS: James Livsey, Vladas Pipiras, Stefanos Kechagias,
# DATE: July 2020
#------------------------------------------------------------------------------------#
old_state <- get_rand_state()
on.exit(set_rand_state(old_state))
# Retrieve parameters and save them in a list called Parms
Parms = RetrieveParameters(theta,mod)
# check for causality
if( CheckStability(Parms$AR,Parms$MA) ) return(10^(8))
# Initialize the negative log likelihood computation
nloglik = ifelse(mod$nreg==0, - log(mod$mypdf(mod$DependentVar[1],Parms$MargParms)),
- log(mod$mypdf(mod$DependentVar[1], Parms$ConstMargParm, Parms$DynamMargParm[1])))
# Compute covariance up to lag n-1
gt = as.vector(ARMAacf(ar = Parms$AR, ma = Parms$MA, lag.max = mod$n))
# Compute coefficients of Innovations Algorithm see 5.2.16 and 5.3.9 in in Brockwell Davis book
IA = innovations.algorithm(gt)
Theta = IA$thetas
Rt = sqrt(IA$v)
# allocate matrices for weights, particles and innovations which are equal to Z-Zhat
w = matrix(0, mod$n, mod$ParticleNumber)
Z = matrix(0, max(mod$ARMAModel), mod$ParticleNumber)
Inn = matrix(0, max(mod$ARMAModel), mod$ParticleNumber)
# particle filter weights
w[1,] = rep(1,mod$ParticleNumber)
# Compute the first integral limits Limit$ a and Limit$b
Limit = ComputeLimits(mod, Parms, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
# Generate N(0,1) variables restricted to (ai,bi),i=1,...n
#Z[1,] = SampleTruncNormParticles(mod, Limit$a, Limit$b, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
Z[1,] = SampleTruncNormParticles(mod, Limit, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
# Compute the first innovation (Zhat_1=0)
Inn[1,] = Z[1,]
for (t in 2:mod$n){
# Compute the latent Gaussian predictions Zhat_t using Innovations Algorithm - see 5.3.9 in Brockwell Davis book
if(mod$ParticleNumber==1){
if(t==2){
Zhat = Inn[1:(min(t-1,mod$nMA)),] %*% Theta[[t-1]][1:(min(t-1,mod$nMA))]
}else{
Zhat = colSums(Inn[1:(min(t-1,mod$nMA)),] %*% Theta[[t-1]][1:(min(t-1,mod$nMA))])
}
}else{
if(t==2){
Zhat = Inn[1:(min(t-1,mod$nMA)),] * Theta[[t-1]][1:(min(t-1,mod$nMA))]
}else{
Zhat = colSums(Inn[1:(min(t-1,mod$nMA)),] * Theta[[t-1]][1:(min(t-1,mod$nMA))])
}
}
# Compute integral limits
Limit = ComputeLimits(mod, Parms, t, Zhat, Rt)
# Sample truncated normal particles
#Znew = SampleTruncNormParticles(mod, Limit$a, Limit$b, t, Zhat, Rt)
Znew = SampleTruncNormParticles(mod, Limit, t, Zhat, Rt)
# update weights
#w[t,] = ComputeWeights(mod, Limit$a, Limit$b, t, w[(t-1),])
w[t,] = ComputeWeights(mod, Limit, t, w[(t-1),])
# check me: break if I got NA weight
if (any(is.na(w[t,]))| sum(w[t,])==0 ){
message(sprintf('WARNING: Some of the weights are either too small or sum to 0'))
return(10^8)
}
# Resample the particles using common random numbers
old_state1 = get_rand_state()
Znew = ResampleParticles(mod, w, t, Znew)
set_rand_state(old_state1)
# Compute the new Innovation
InnNew = Znew - Zhat
# Combine current particles, with particles from previous iterations
Inn[1:min(t,mod$nMA),] = rbind(matrix(InnNew,ncol=mod$ParticleNumber), matrix(Inn[1:min(t-1,mod$nMA-1),],ncol = mod$ParticleNumber))
# update likelihood
nloglik = nloglik - log(mean(w[t,]))
}
# for log-likelihood we use a bias correction--see par2.3 in Durbin Koopman, 1997
# nloglik = nloglik- (1/(2*N))*(var(na.omit(wgh[T1,]))/mean(na.omit(wgh[T1,])))/mean(na.omit(wgh[T1,]))
# if (nloglik==Inf | is.na(nloglik)){
# nloglik = 10^8
# }
return(nloglik)
}
# older PF likelihood with resampling for ARMA(p,q)
ParticleFilter_Res_ARMA_old = function(theta, mod){
#------------------------------------------------------------------------------------#
# PURPOSE: Use particle filtering with resampling to approximate the likelihood
# of the a specified count time series model with an underlying MA(1)
# dependence structure.
#
# NOTES: 1. See "Latent Gaussian Count Time Series Modeling" for more
# details. A first version of the paper can be found at:
# https://arxiv.org/abs/1811.00203
# 2. This function is very similar to LikSISGenDist_ARp but here
# I have a resampling step.
# 3. The innovations algorithm here is the standard one, likely it will
# be slower.
# INPUTS:
# theta: parameter vector
# data: data
# ParticleNumber: number of particles to be used.
# Regressor: independent variable
# CountDist: count marginal distribution
# epsilon resampling when ESS<epsilon*N
#
# OUTPUT:
# loglik: approximate log-likelihood
#
#
# AUTHORS: James Livsey, Vladas Pipiras, Stefanos Kechagias,
# DATE: July 2020
#------------------------------------------------------------------------------------#
old_state <- get_rand_state()
on.exit(set_rand_state(old_state))
# Retrieve parameters and save them in a list called Parms
Parms = RetrieveParameters(theta,mod)
#print(Parms$AR)
# check for causality and invertibility
if( CheckStability(Parms$AR,Parms$MA) ){
mod$ErrorMsg = sprintf('WARNING: The ARMA polynomial must be causal and invertible.')
warning(mod$ErrorMsg)
return(mod$loglik_BadValue1)
}
# Initialize the negative log likelihood computation
nloglik = ifelse(mod$nreg==0, - log(mod$mypdf(mod$DependentVar[1],Parms$MargParms)),
- log(mod$mypdf(mod$DependentVar[1], Parms$ConstMargParm, Parms$DynamMargParm[1,])))
# retrieve AR, MA orders and their max
m = max(mod$ARMAModel)
p = mod$ARMAModel[1]
q = mod$ARMAModel[2]
# Compute ARMA covariance up to lag n-1
a = list()
if(!is.null(Parms$AR)){
a$phi = Parms$AR
}else{
a$phi = 0
}
if(!is.null(Parms$MA)){
a$theta = Parms$MA
}else{
a$theta = 0
}
a$sigma2 = 1
gamma = itsmr::aacvf(a,mod$n)
# Compute coefficients of Innovations Algorithm see 5.2.16 and 5.3.9 in in Brockwell Davis book
#IA = InnovAlg(Parms, gamma, mod)
IA = innovations.algorithm(gamma)
Rt = sqrt(IA$v)
# allocate matrices for weights, particles and predictions of the latent series
w = matrix(0, mod$n, mod$ParticleNumber)
Z = matrix(0, mod$n, mod$ParticleNumber)
Zhat = matrix(0, mod$n, mod$ParticleNumber)
# initialize particle filter weights
w[1,] = rep(1,mod$ParticleNumber)
# Compute the first integral limits Limit$a and Limit$b
Limit = ComputeLimits(mod, Parms, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
# Initialize the particles using N(0,1) variables truncated to the limits computed above
Z[1,] = SampleTruncNormParticles(mod, Limit, 1, rep(0,1,mod$ParticleNumber), rep(1,1,mod$ParticleNumber))
for (t in 2:mod$n){
# compute Zhat_t
Zhat[t,] = ComputeZhat_t(mod, IA, Z, Zhat,t, Parms)
# Compute integral limits
Limit = ComputeLimits(mod, Parms, t, Zhat[t,], Rt)
# Sample truncated normal particles
Znew = SampleTruncNormParticles(mod, Limit, t, Zhat[t,], Rt)
# update weights
w[t,] = ComputeWeights(mod, Limit, t, w[(t-1),])
# check me: break if I got NA weight
if (any(is.na(w[t,]))| sum(w[t,])==0 ){
#print(t)
#print(w[t,])
message(sprintf('WARNING: Some of the weights are either too small or sum to 0'))
return(mod$loglik_BadValue2)
}
# Resample the particles using common random numbers
old_state1 = get_rand_state()
Znew = ResampleParticles(mod, w, t, Znew)
set_rand_state(old_state1)
# save the current particle
Z[t,] = Znew
# update likelihood
nloglik = nloglik - log(mean(w[t,]))
}
# for log-likelihood we use a bias correction--see par2.3 in Durbin Koopman, 1997
# nloglik = nloglik- (1/(2*N))*(var(na.omit(wgh[T1,]))/mean(na.omit(wgh[T1,])))/mean(na.omit(wgh[T1,]))
# if (nloglik==Inf | is.na(nloglik)){
# nloglik = 10^8
# }
return(nloglik)
}
#' Dimension function for vector/matrix inputs
#' Treats vectors as length x 1 column vectors
#'
#' @param x vector (column matrix) or matrix
#'
#' @return
#' @export
#'
DIM <- function(x){
if (is.null(dim(x)))
c(length(x), 1)
else
dim(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.