setwd("/home/proto4426/Documents/master_Thesis/PissoortThesis/Scripts-R/Article")
rain_1880 <- read.csv("data/P50_Uccle_1880.csv",
sep = " ")
names(rain_1880)[1] <- "Date"
rain_1880$day <- substr(rain_1880$Date,7,8)
rain_1880$month <- as.numeric(substr(rain_1880$Date,5,6))
rain_1880$year <- substr(rain_1880$Date,1,4)
# Retrieve seasons with our function. Based on meteorological seasons
rain_1880$season <- sapply(rain_1880$month,
function(x) PissoortThesis::func_season(x))
rain_1901 <- rain_1880[rain_1880$year > 1900,]
### DLM ================================
library(tidyverse)
library(plotly)
library(dlm)
## Random walk + noise
lg <- dlm(m0 = rep(0,2), C0 = 10 * diag(2), FF = matrix(c(1,0),nr=1),
V = 1.4, GG = matrix(c(1,0,1,1),nr=2), W = diag(c(0,0.2)))
## Time varying DLM
x <- rnorm(100) # covariates
dlr <- dlm(m0 = rep(0,2), C0 = 10 * diag(2), FF = matrix(c(1,0),nr=1),
V = 1.3, GG = diag(2), W = diag(c(0.4,0.2)),
JFF = matrix(c(0,1),nr=1), X = as.matrix(x))
unlist(mod)
'ex.book.plot' <- function(ts = rain_1901$RR, dV1 = 15100, dW1 = 0.5 * 1468,
dV2 = 15100, dW2 = 0.5 * 1468, brow = F){
ts <- ts(ts)
if(brow) browser()
# mod <- dlmModPoly(order = 1, dV = 15100, dW = 1468)
# modFilt <- dlmFilter(ts, mod)
# str(modFilt,1)
# with(modFilt, dlmSvd2var(U.C[[101]], D.C[101,]))
mod1 <- dlmModPoly(order = 1, dV = dV1, dW = dW1)
nileFilt1 <- dlmFilter(ts, mod1)
plot(window(cbind(ts,nileFilt1$m[-1]),
start=start(ts)+1), plot.type='s',
type='o', col=c("grey","green"), lty=c(1,2), xlab="", ylab="Level")
mod2 <- dlmModPoly(order = 1, dV = dV2, dW = 10* dW2)
nileFilt2 <- dlmFilter(ts, mod2)
lines(window(nileFilt2$m,start=start(ts)+1), type='o', col="red", lty=4)
legend("bottomleft", legend=c("data", "filtered level - model 1",
"filtered level - 10x larger signal to noise ratio"),
col=c("grey", "green", "red"), lty=c(1,2,4), pch=1, bty='n')
return(list(mod1 = nileFilt1, mod2 = nileFilt2))
}
#ex.book.plot()
list_rain_by_years <- split(rain_1901, rain_1901$year)
max_years <- PissoortThesis::yearly.extrm(list.y = list_rain_by_years, tmp = "RR")
nileFilt1 <- ex.book.plot(max_years$df$Max,)$mod1
nileFilt2 <- ex.book.plot(max_years$df$Max,)$mod2
maxy <- ex.book.plot(max_years$df$Max, dV1 = 10000, dW1 = 50000)
'ex.book.ggplot' <- function(ts = rain_1901$RR, dV = 15100, dW = 0.5 * 1468,
dV2 = 15100, dW2 = 0.5 * 1468, brow = F){
ts <- ts(ts)
mod1 <- dlmModPoly(order = 1, dV = dV, dW = dW)
nileFilt1 <- dlmFilter(ts, mod1)
mod2 <- dlmModPoly(order = 1, dV = dV2, dW = dW2)
nileFilt2 <- dlmFilter(ts, mod2)
df1 <- as.data.frame(window(cbind(series = ts,
filter1 = nileFilt1$m[-1],
filter2 = nileFilt2$m[-1],
obs = 1:length(ts)),
start=start(ts)+1))
cat("var of the filtering distribution of the last-state vector",
with(nileFilt1, dlmSvd2var(U.C[[length(ts)+1]], D.C[length(ts)+1, ])) )
g <- ggplot(df1, aes(x = obs)) +
geom_point(aes(y = series)) + geom_line(aes(y = series)) +
geom_line(aes(y = filter2), col = "red") +
geom_line(aes(y = filter1), col = "green3") +
annotate(geom = "text", label = paste("dV2 = ", round(dV2,1), "(dW2=", round(dW2),")"),
x = 20, y = 65, col = "red" , size = 3) +
annotate(geom = "text", label = paste("dW = ", round(dW, 1 ), "(dV=", round(dV),")"),
x = 20, y = 60, col = "green3" , size = 3) +
PissoortThesis::theme_piss()
#print(g)
return(list(mod = nileFilt1, g = g))
}
maxy <- ex.book.ggplot(max_years$df$Max, dV = 10000, dW = 5000)
ggplotly(maxy$g + labs(title = "xxxx"))
for (i in 1:12){
g <- ex.book.ggplot(max_years$df$Max,
dV2 = 1500 * (i^1.5),
dW = 200 * (i^1.5) )$g
assign(paste0("g", i), g)
}
library(gridExtra)
library(grid)
glist <- as.list(paste0("g", seq(1,12)[1]))
n <- length(glist) ; nCol <- floor(sqrt(n))
do.call("grid.arrange", c(lapply(glist, [1]), ncol=nCol))
grid.arrange(g1,g2,g3,g4,g5,g6,g7,g8, g9, g10, g11, g12, ncol = 2)
cowplot::plot_grid(plotlist = glist, ncol = 2)
###### Smoothing
ts <- ts(max_years$df$Max)
mod1 <- dlmModPoly(order = 1, dV = 15100, dW = 0.5 * 1468)
modFilt <- dlmFilter(ts, mod1)
modSmooth <- dlmSmooth(modFilt)
str(modSmooth,1)
with(modSmooth, drop(dlmSvd2var(U.S[[length(ts)]], D.S[length(ts),])))
with(modFilt, drop(dlmSvd2var(U.C[[length(ts)/2]], D.C[length(ts)/2,])))
with(modSmooth, drop(dlmSvd2var(U.S[[length(ts)/2]], D.S[length(ts)/2,])))
hwid <- qnorm((1-0.95) / 2) *
sqrt(with(modSmooth, unlist(dlmSvd2var(U.S, D.S))))
smooth <- cbind(modSmooth$s, as.vector(modSmooth$s) + hwid %o% c(1,-1))
plot(cbind(ts, window(smooth, start=start(ts))), plot.type="s",
col=c("grey", "magenta", "cyan", "cyan"), lty=c(1, 2, 3, 3), type="o",
ylab="Level", xlab="")
legend("bottomleft", legend=c("data", "smoothed level", "95% probability limits"),
col=c("grey", "magenta", "cyan"), lty=1:3, pch=1, bty="n")
mod <- dlm(m0 = rep(0,4), C0 = 1e-8 * diag(4),
FF = matrix(c(1,1,0,0), nr=1),
V = 1e-3,
GG = bdiag(matrix(1),
matrix(c(-1,-1,-1,1,0,0,0,1,0),nr=3,byrow=TRUE)),
W = diag(c(771.35, 86.48, 0, 0), nr=4))
modFilt <- dlmFilter(ts, mod)
modSmooth <- dlmSmooth(modFilt)
plot(ts, type="o", xlim=c(1957, 1970), ylim=c(210, 725), ylab="Expenditure")
lines(modFilt$m[,1], col="green", type="o")
lines(modSmooth$s[,1], col="red", type="o")
plot(modSmooth$s[,2], type="o", ylab="Expenditure - Seasonal component")
abline(h=0)
## 2.5. Forecasting
plot(window(cbind(ts, nileFilt1$f, nileFilt2$f), start=1, end=116+10),
plot.type="s", type="o", col=c("grey", "green", "red"), lty=c(1,2,4),
xlab="", ylab="Level")
legend("bottomleft", legend=c("data", paste("one-step-ahead forecast - model", 1:2)),
col=c("grey", "green", "red"), lty=c(1,2,4), pch=1, bty="n")
fore <- dlmForecast(modFilt, nAhead = 8, sampleNew = 10)
invisible(lapply(fore$newObs, function(x) lines(x, col="grey", lty=2)))
lines(fore$f, col="magenta", type="o")
## 3.2.1 Trend models
# standardized innovations and perform Shapiro-Wilk and Ljung-Box tests.
modLSup <- dlmModPoly(1, dV = 9.465, dW = 0.121)
lSupFilt <- dlmFilter(ts, modLSup)
res <- residuals(lSupFilt, sd=FALSE)
stats::shapiro.test(res)
Box.test(res, lag=20, type="Ljung")
### 4.6 Gibbs Sampling ....
dlmBSample
arms
gdpGibbs
dlmGibbsDIG
dlmGibbsDIGt
#### DLM PACKAGE VIGNETTES #############################
## 1.3 Combining Models
myMod <- dlmModPoly() + dlmModSeas(4) # stochastic linear trend + 4terly seasonal component
# Outer sum : dimension of each model may be different.
dlmModPoly(dV = 0.2, dW = c(0, 0.5)) %+%
(dlmModSeas(4, dV = 0, dW = c(0, 0, 0.35)) +
dlmModPoly(1, dV = 0.1, dW = 0.03))
## 1.4 Time-varying Models
u <- rnorm(25)
myMod <- dlmModReg(u, dV = 14.5)
myMod$JFF
head(myMod$X)
### 2. MLE
dlmMLE ; dlmLL
#random walk plus noise, with unknown system and observation variances.
#Parametrizing variances on a log scale, to ensure positivity,
buildFun <- function(x) dlmModPoly(1, dV = exp(x[1]), dW = exp(x[2]))
fit <- dlmMLE(ts, parm = c(0,0), build = buildFun) ; fit$conv
dlmNile <- buildFun(fit$par)
V(dlmNile)
W(dlmNile)
StructTS(ts)
#take into account a jump #in the flow of the river following the construction
#f Ashwan dam in 1898. this can be done by
'buildFun' <- function(x) {
m <- dlmModPoly(1, dV = exp(x[1]))
m$JW <- matrix(1)
m$X <- matrix(exp(x[2]), nc = 1, nr = length(Nile))
j <- which(time(Nile) == 1899)
m$X[j,1] <- m$X[j,1] * (1 + exp(x[3]))
return(m)
}
fit <- dlmMLE(ts, parm = c(0,0,0), build = buildFun) ; fit$conv
dlmNileJump <- buildFun(fit$par)
V(dlmNileJump)
dlmNileJump$X[c(1, which(time(Nile) == 1899)), 1]
### 3.1 Filtering
nileJumpFilt <- dlmFilter(Nile, dlmNileJump)
plot(Nile, type = 'o', col = "seagreen")
lines(dropFirst(nileJumpFilt$m), type = "o", pch = 20, col = "brown")
attach(nileJumpFilt)
v <- unlist(dlmSvd2var(U.C, D.C))
pl <- dropFirst(m) + qnorm(0.05, sd = sqrt(v[-1]))
pu <- dropFirst(m) + qnorm(0.95, sd = sqrt(v[-1]))
detach()
lines(pl, lty = 2, col = "brown")
lines(pu, lty = 2, col = "brown")
### 3.2 Smoothing
nileJumpSmooth <- dlmSmooth(nileJumpFilt)
plot(Nile, type = 'o', col = "seagreen")
attach(nileJumpSmooth)
lines(dropFirst(s), type = 'o', pch = 20, col = "brown")
v <- unlist(dlmSvd2var(U.S, D.S))
pl <- dropFirst(s) + qnorm(0.05, sd = sqrt(v[-1]))
pu <- dropFirst(s) + qnorm(0.95, sd = sqrt(v[-1]))
detach()
lines(pl, lty = 2, col = "brown")
lines(pu, lty = 2, col = "brown")
# lGas example : quarterly seasonal component and a local linear trend,
#in the form of an integrated random walk.
lGas <- log(UKgas)
dlmGas <- dlmModPoly() + dlmModSeas(4)
buildFun <- function(x) {
diag(W(dlmGas))[2:3] <- exp(x[1:2])
V(dlmGas) <- exp(x[3])
return(dlmGas)
}
(fit <- dlmMLE(lGas, parm = rep(0, 3), build = buildFun))$conv
dlmGas <- buildFun(fit$par)
drop(V(dlmGas))
diag(W(dlmGas))[2:3]
gasSmooth <- dlmSmooth(lGas, mod = dlmGas)
x <- cbind(lGas, dropFirst(gasSmooth$s[,c(1,3)]))
colnames(x) <- c("Gas", "Trend", "Seasonal")
plot(x, type = 'o', main = "UK Gas Consumption")
### 3.3. Forecasting
# Means and variances of future states and observations are returned
#in a list as components a, R, f, and Q.
gasFilt <- dlmFilter(lGas, mod = dlmGas)
gasFore <- dlmForecast(gasFilt, nAhead = 20)
sqrtR <- sapply(gasFore$R, function(x) sqrt(x[1,1]))
pl <- gasFore$a[,1] + qnorm(0.05, sd = sqrtR)
pu <- gasFore$a[,1] + qnorm(0.95, sd = sqrtR)
x <- ts.union(window(lGas, start = c(1982, 1)),
window(gasSmooth$s[,1], start = c(1982, 1)),
gasFore$a[,1], pl, pu)
plot(x, plot.type = "single", type = 'o', pch = c(1, 0, 20, 3, 3),
col = c("darkgrey", "darkgrey", "brown", "yellow", "yellow"),
ylab = "Log gas consumption")
legend("bottomright", legend = c("Observed", "Smoothed (deseasonalized)",
"Forecasted level", "90% probability limit"),
bty = 'n', pch = c(1, 0, 20, 3, 3), lty = 1,
col = c("darkgrey", "darkgrey", "brown", "yellow", "yellow"))
#### 4. Bayesian analysis of DLM
## 4.1. Forward filtering backward sampling
plot(Nile, type = 'o', col = "seagreen")
nileFilt <- dlmFilter(Nile, dlmNile)
for (i in 1:10) # 10 simulated "true" levels
lines(dropFirst(dlmBSample(nileFilt)), col = "brown")
## 4.2. Adaptive rejection Metropolis sampling
lmixnorm <- function(x, weights, means, sds) {
log(crossprod(weights, exp(-0.5 * ((x - means) / sds)^2
- log(sds))))
}
y <- arms(0, myldens = lmixnorm, indFunc = function(x,...)
(x > (-100)) * (x < 100), n = 5000, weights = c(1, 3, 2),
means = c(-10, 0, 10), sds = c(7, 5, 2))
summary(y)
library(MASS)
truehist(y, prob = TRUE, ylim = c(0, 0.08), bty = 'o')
curve(colSums(c(1, 3, 2) / 6 * dnorm(matrix(x, 3, length(x), TRUE),
mean = c(-10, 0, 10), sd = c(7, 5, 2))), add = T)
legend(-25, 0.07, "True density", lty = 1, bty = 'n')
## ARMS Examples ========================================================
#### ==> Warning: running the examples may take a few minutes! <== ####
## Not run:
set.seed(4521222)
### Univariate densities
## Unif(-r,r)
y <- arms(runif(1,-1,1), function(x,r) 1, function(x,r) (x>-r)*(x<r), 5000, r=2)
summary(y); hist(y,prob=TRUE,main="Unif(-r,r); r=2")
## Normal(mean,1)
norldens <- function(x,mean) -(x-mean)^2/2
y <- arms(runif(1,3,17), norldens, function(x,mean) ((x-mean)>-7)*((x-mean)<7),
5000, mean=10)
summary(y); hist(y,prob=TRUE,main="Gaussian(m,1); m=10")
curve(dnorm(x,mean=10),3,17,add=TRUE)
## Exponential(1)
y <- arms(5, function(x) -x, function(x) (x>0)*(x<70), 5000)
summary(y); hist(y,prob=TRUE,main="Exponential(1)")
curve(exp(-x),0,8,add=TRUE)
## Gamma(4.5,1)
y <- arms(runif(1,1e-4,20), function(x) 3.5*log(x)-x,
function(x) (x>1e-4)*(x<20), 5000)
summary(y); hist(y,prob=TRUE,main="Gamma(4.5,1)")
curve(dgamma(x,shape=4.5,scale=1),1e-4,20,add=TRUE)
## Gamma(0.5,1) (this one is not log-concave)
y <- arms(runif(1,1e-8,10), function(x) -0.5*log(x)-x,
function(x) (x>1e-8)*(x<10), 5000)
summary(y); hist(y,prob=TRUE,main="Gamma(0.5,1)")
curve(dgamma(x,shape=0.5,scale=1),1e-8,10,add=TRUE)
## Beta(.2,.2) (this one neither)
y <- arms(runif(1), function(x) (0.2-1)*log(x)+(0.2-1)*log(1-x),
function(x) (x>1e-5)*(x<1-1e-5), 5000)
summary(y); hist(y,prob=TRUE,main="Beta(0.2,0.2)")
curve(dbeta(x,0.2,0.2),1e-5,1-1e-5,add=TRUE)
## Triangular
y <- arms(runif(1,-1,1), function(x) log(1-abs(x)), function(x) abs(x)<1, 5000)
summary(y); hist(y,prob=TRUE,ylim=c(0,1),main="Triangular")
curve(1-abs(x),-1,1,add=TRUE)
## Multimodal examples (Mixture of normals)
lmixnorm <- function(x,weights,means,sds) {
log(crossprod(weights, exp(-0.5*((x-means)/sds)^2 - log(sds))))
}
y <- arms(0, lmixnorm, function(x,...) (x>(-100))*(x<100), 5000, weights=c(1,3,2),
means=c(-10,0,10), sds=c(1.5,3,1.5))
summary(y); hist(y,prob=TRUE,main="Mixture of Normals")
curve(colSums(c(1,3,2)/6*dnorm(matrix(x,3,length(x),byrow=TRUE),c(-10,0,10),c(1.5,3,1.5))),
par("usr")[1], par("usr")[2], add=TRUE)
### Bivariate densities
## Bivariate standard normal
y <- arms(c(0,2), function(x) -crossprod(x)/2,
function(x) (min(x)>-5)*(max(x)<5), 500)
plot(y, main="Bivariate standard normal", asp=1)
## Uniform in the unit square
y <- arms(c(0.2,.6), function(x) 1, function(x) (min(x)>0)*(max(x)<1), 500)
plot(y, main="Uniform in the unit square", asp=1)
polygon(c(0,1,1,0),c(0,0,1,1))
## Uniform in the circle of radius r
y <- arms(c(0.2,0), function(x,...) 1,
function(x,r2) sum(x^2)<r2, 500, r2=2^2)
plot(y, main="Uniform in the circle of radius r; r=2", asp=1)
curve(-sqrt(4-x^2), -2, 2, add=TRUE)
curve(sqrt(4-x^2), -2, 2, add=TRUE)
## Uniform on the simplex
simp <- function(x) if ( any(x<0) || (sum(x)>1) ) 0 else 1
y <- arms(c(0.2,0.2), function(x) 1, simp, 500)
plot(y, xlim=c(0,1), ylim=c(0,1), main="Uniform in the simplex", asp=1)
polygon(c(0,1,0), c(0,0,1))
## A bimodal distribution (mixture of normals)
bimodal <- function(x) { log(prod(dnorm(x,mean=3))+prod(dnorm(x,mean=-3))) }
y <- arms(c(-2,2), bimodal, function(x) all(x>(-10))*all(x<(10)), 500)
plot(y, main="Mixture of bivariate Normals", asp=1)
## A bivariate distribution with non-convex support
support <- function(x) {
return( as.numeric( -1 < x[2] && x[2] < 1 && -2 < x[1] &&
( x[1] < 1 || crossprod(x-c(1,0)) < 1 ) ) )
}
Min.log <- log(.Machine$double.xmin) + 10
logf <- function(x) {
if ( x[1] < 0 ) return(log(1/4))
else
if (crossprod(x-c(1,0)) < 1 ) return(log(1/pi))
return(Min.log)
}
x <- as.matrix(expand.grid(seq(-2.2,2.2,length=40),seq(-1.1,1.1,length=40)))
y <- sapply(1:nrow(x), function(i) support(x[i,]))
plot(x,type='n',asp=1)
points(x[y==1,],pch=1,cex=1,col='green')
z <- arms(c(0,0), logf, support, 1000)
points(z,pch=20,cex=0.5,col='blue')
polygon(c(-2,0,0,-2),c(-1,-1,1,1))
curve(-sqrt(1-(x-1)^2),0,2,add=TRUE)
curve(sqrt(1-(x-1)^2),0,2,add=TRUE)
sum( z[,1] < 0 ) # sampled points in the square
sum( apply(t(z)-c(1,0),2,crossprod) < 1 ) # sampled points in the circle
## End(Not run)
# ===================================
### 4.3. Gibbs sampling : example
dlmGibbsDIG <- function (y, mod, a.y, b.y, a.theta, b.theta, shape.y, rate.y,
shape.theta, rate.theta, n.sample = 1, thin = 0, ind, save.states = TRUE,
progressBar = interactive())
{
msg1 <- "Either \\"a.y\\" and \\"b.y\\" or \\"shape.y\\" and \\"rate.y\\" must be specified"
msg2 <- "Unexpected length of \\"shape.y\\" and/or \\"rate.y\\""
msg3 <- "Unexpected length of \\"a.y\\" and/or \\"b.y\\""
msg4 <- paste("Either \\"a.theta\\" and \\"b.theta\\" or \\"shape.theta\\"",
"and \\"rate.theta\\" must be specified")
msg5 <- "Unexpected length of \\"shape.theta\\" and/or \\"rate.theta\\""
msg6 <- "Unexpected length of \\"a.theta\\" and/or \\"b.theta\\""
msg7 <- "\\"thin\\" must be a nonnegative integer"
msg8 <- "multivariate observations are not allowed"
msg9 <- "inadmissible value of \\"ind\\""
if (NCOL(y) > 1)
stop(msg8)
r <- ncol(mod$FF)
if (hasArg(ind)) {
ind <- unique(as.integer(ind))
s <- 1:r
if (!all(ind %in% s))
stop(msg9)
perm <- s[c(ind, s[!(s %in% ind)])]
FF(mod) <- mod$FF[, perm, drop = FALSE]
GG(mod) <- mod$GG[perm, perm, drop = FALSE]
W(mod) <- mod$W[perm, perm, drop = FALSE]
p <- length(ind)
}
else {
perm <- ind <- 1:r
p <- r
}
nobs <- NROW(y)
if (is.numeric(thin) && (thin <- as.integer(thin)) >= 0) {
every <- thin + 1
mcmc <- n.sample * every
}
else stop(msg7)
if (!hasArg(a.y))
if (!hasArg(shape.y))
stop(msg1)
else if (!hasArg(rate.y))
stop(msg1)
else {
if (!all(c(length(shape.y), length(rate.y)) == 1))
warning(msg2)
}
else if (!hasArg(b.y))
stop(msg1)
else {
if (!all(c(length(a.y), length(b.y)) == 1))
warning(msg3)
shape.y <- a.y^2/b.y
rate.y <- a.y/b.y
}
if (!hasArg(a.theta))
if (!hasArg(shape.theta))
stop(msg4)
else if (!hasArg(rate.theta))
stop(msg4)
else {
if (!all(c(length(shape.theta), length(rate.theta)) %in%
c(1, p)))
warning(msg5)
}
else if (!hasArg(b.theta))
stop(msg4)
else {
if (!all(c(length(a.theta), length(b.theta)) %in% c(1,
p)))
warning(msg6)
shape.theta <- a.theta^2/b.theta
rate.theta <- a.theta/b.theta
}
shape.y <- shape.y + 0.5 * nobs
shape.theta <- shape.theta + 0.5 * nobs
shape.theta <- rep(shape.theta, length.out = p)
rate.theta <- rep(rate.theta, length.out = p)
theta <- matrix(0, nobs + 1, r)
if (save.states)
gibbsTheta <- array(0, dim = c(nobs + 1, r, n.sample))
gibbsV <- vector("numeric", n.sample)
gibbsW <- matrix(0, nrow = n.sample, ncol = p)
it.save <- 0
if (progressBar)
pb <- txtProgressBar(0, mcmc, style = 3)
for (it in 1:mcmc) {
if (progressBar)
setTxtProgressBar(pb, it)
modFilt <- dlmFilter(y, mod, simplify = TRUE)
theta[] <- dlmBSample(modFilt)
y.center <- y - tcrossprod(theta[-1, , drop = FALSE],
mod$FF)
SSy <- drop(crossprod(y.center))
mod$V[] <- 1/rgamma(1, shape = shape.y, rate = rate.y +
0.5 * SSy)
theta.center <- theta[-1, , drop = FALSE] - tcrossprod(theta[-(nobs +
1), , drop = FALSE], mod$GG)
SStheta <- drop(sapply(1:p, function(i) crossprod(theta.center[,
i])))
SStheta <- colSums((theta[-1, 1:p, drop = FALSE] - tcrossprod(theta[-(nobs +
1), , drop = FALSE], mod$GG)[, 1:p])^2)
diag(mod$W)[1:p] <- 1/rgamma(p, shape = shape.theta,
rate = rate.theta + 0.5 * SStheta)
if (!(it%%every)) {
it.save <- it.save + 1
if (save.states)
gibbsTheta[, , it.save] <- theta
gibbsV[it.save] <- diag(mod$V)
gibbsW[it.save, ] <- diag(mod$W)[1:p]
}
}
colnames(gibbsW) <- paste("W", ind, sep = ".")
if (progressBar)
close(pb)
if (save.states)
return(list(dV = gibbsV, dW = gibbsW,
theta = gibbsTheta[, order(perm), , drop = FALSE]))
else return(list(dV = gibbsV, dW = gibbsW))
}
library(dlm)
library(PissoortThesis)
data(max_years)
outGibbs <- dlmGibbsDIG(max_years$df$Max, dlmModPoly(2) + dlmModSeas(4),
a.y = 1, b.y = 1000, a.theta = 1,
b.theta = 1000, n.sample = 1100, ind = c(2, 3),
save.states = FALSE)
burn <- 100
attach(outGibbs)
dV <- dV[-(1:burn)]
dW <- dW[-(1:burn), ]
detach()
par(mfrow=c(2,3), mar=c(3.1,2.1,2.1,1.1))
plot(dV, type = 'l', xlab = "", ylab = "",
main = expression(sigma^2))
plot(dW[ , 1], type = 'l', xlab = "", ylab = "",
main = expression(sigma[beta]^2))
plot(dW[ , 2], type = 'l', xlab = "", ylab = "",
main = expression(sigma[s]^2))
use <- length(dV) - burn
from <- 0.05 * use
at <- pretty(c(0, use), n = 3); at <- at[at >= from]
plot(ergMean(dV, from), type = 'l', xaxt = 'n',xlab = "", ylab = "")
axis(1, at = at - from, labels = format(at))
plot(ergMean(dW[ , 1], from), type = 'l', xaxt = 'n',
xlab = "", ylab = "")
axis(1, at = at - from, labels = format(at))
plot(ergMean(dW[ , 2], from), type = 'l', xaxt = 'n',
xlab = "", ylab = "")
axis(1, at = at - from, labels = format(at))
#===================================================================================
library(nimble)
## Building a simple linear state-space model.
## x is latent space, y is observed data
<- nimbleCode({
x[1] ~ dnorm(mu_0, 1)
y[1] ~ dnorm(x[1], 1)
for(i in 2:t){
x[i] ~ dnorm(x[i-1] * a + b, 1)
y[i] ~ dnorm(x[i] * c, 1)
}
a ~ dunif(0, 1)
b ~ dnorm(0, 1)
c ~ dnorm(1,1)
mu_0 ~ dnorm(0, 1)
})
## simulate some data
t <- 25; mu_0 <- 1
x <- rnorm(1 ,mu_0, 1)
y <- rnorm(1, x, 1)
a <- 0.5; b <- 1; c <- 1
for(i in 2:t){
x[i] <- rnorm(1, x[i-1] * a + b, 1)
y[i] <- rnorm(1, x[i] * c, 1)
}
## build the model
rTimeModel <- nimbleModel(timeModelCode, constants = list(t = t),
data <- list(y = y), check = FALSE )
## Set parameter values and compile the model
rTimeModel$a <- 0.5
rTimeModel$b <- 1
rTimeModel$c <- 1
rTimeModel$mu_0 <- 1
cTimeModel <- compileNimble(rTimeModel)
## ============================== http://sbfnk.github.io/mfiidd/pmcmc.html =======
'my_particleFilter' <- function(fitmodel, theta, init.state,
data, n.particles, brow=F) {
# - fitmodel: a fitmodel object
# - theta: named numeric vector. Values of the parameters for
# which the marginal log-likelihood is desired.
# - init.state: named numeric vector. Initial values of the state variables.
# - data: data frame. Observation times and observed data.
# The function returns the value of the marginal log-likelihood
## Initialisation of the algorithm
if(brow) browser()
# Marginal log-likelihood is set to 0 and will be updated
#during the filtering steps
margLogLike <- 0
# Particle states can be stored in a list
state.particles <- rep(list(init.state), n.particles)
# Weight: initially equal for all the particles
# particle weight can be stored in a vector
weight.particles <- rep(1/n.particles, length = n.particles)
# Initialise time variable
current.time <- 0
## Loop over observation times: resample, propagate, weight
for(i in seq_len(nrow(data))){
# Extract next data point (must be a vector)
data.point <- unlist(data[i, ])
next.time <- data.point["time"]
# Resample particles according to their weights.
# You can use the `sample` function of R
# (normalisation of the weights is done in the function)
index.resampled <- sample(x = n.particles,
size = n.particles,
replace = TRUE,
prob = weight.particles)
state.particles <- state.particles[index.resampled]
## Loop over particles: propagate and weight
for(p in 1:n.particles){
# Extract current state of the particle
current.state.particle <- state.particles[[p]]
# Propagate the particle from current observation time
# to the next one using the function `fitmodel$simulate`
traj <- fitmodel$simulate(theta = theta,
init.state = current.state.particle,
times = c(current.time,next.time))
# Extract state of the model at next observation time
# Also make sure that model.point is a vector
model.point <- unlist(traj[2,fitmodel$state.names])
# Weight the particle with the likelihood of the observed
# data point using the function `fitmodel$dPointObs`
weight.particles[p] <-
fitmodel$dPointObs(data.point = data.point,
model.point = model.point,
theta = theta)
# Update state of the p particle
state.particles[[p]] <- model.point
}
# Increment time
current.time <- next.time
## Increment the marginal log-likelihood
# Add the log of the mean of the particles weights
margLogLike <- margLogLike + log(mean(weight.particles))
}
## Return marginal log-likelihood
return(margLogLike)
}
library(fitR)
data(SEIT4L_stoch)
data(FluTdC1971)
# theta close to the mean posterior estimate of the deterministic SEIT4L
# model
theta <- c(R0 = 7, D_lat = 1, D_inf = 4,
alpha = 0.5, D_imm = 10, rho = 0.65)
# init state
init.state <- c(S = 279, E = 0, I = 2, T1 = 3,
T2 = 0, T3 = 0, T4 = 0, L = 0, Inc = 0)
# run the particle filter with 20 particles
res2 <- c()
for(i in 1:10)
res2[i] <- my_particleFilter(SEIT4L_stoch, theta, init.state,
data = FluTdC1971, n.particles = 35, brow = F)
plot(res, type = "l")
lines(res2, col = "red")
## Calibrate # of particles
# vector of number of particles to test
test.n.particles <- seq(50, 1000, 50)
# number of replicates
n.replicates <- 100
# vector and data frame of results
sample.log.like <- vector("numeric", length = n.replicates)
res <- data.frame()
for(n.particles in test.n.particles){
# start measuring time
start.time <- Sys.time()
for(i in 1:n.replicates){
cat("Testing ", i, " particles\n")
# one Monte-Carlo estimate of the log-likelihood
sample.log.like[i] <- my_particleFilter(SEIT4L_stoch, theta,
init.state, FluTdC1971,
n.particles)
}
# end measuring time
end.time <- Sys.time()
# keep only replicate with finite log-likelihood to be able to compute the mean and sd
# this give us the proportion of replicates with particle depletion.
sample.finite.log.like <- sample.log.like[is.finite(sample.log.like)]
ans <- c(mean = mean(sample.finite.log.like),
sd = sd(sample.finite.log.like),
prop.depletion = 1-length(sample.finite.log.like)/length(sample.log.like),
time = end.time - start.time)
res <- rbind(res, t(ans))
}
## Setting pMCMC
# wrapper for posterior
my_posteriorSto <- function(theta){
my_fitmodel <- SEIT4L_stoch
my_init.state <- c("S" = 279,"E" = 0,"I" = 2,"T1" = 3,"T2" = 0, "T3" = 0, "T4" = 0,"L" = 0,"Inc" = 0)
my_n.particles <- 400
# you can reduce the number of particles if your pMCMC is too slow
return(logPosterior(fitmodel = my_fitmodel,
theta = theta,
init.state = my_init.state,
data = FluTdC1971,
margLogLike = my_particleFilter,
n.particles = my_n.particles))
}
# load results of deterministic fit
data(mcmc_TdC_deter_longRun)
# Let's use the first trace only, no need to burn or thin
trace <- mcmc_SEITL_infoPrior_theta1$trace
# we will start the pMCMC at the mean posterior estimate
# of the deterministic fit
init.theta <- colMeans(trace[SEIT4L_stoch$theta.names])
# and we take the empirical covariance matrix for the
# Gaussian kernel proposal
covmat <- mcmc_SEITL_infoPrior_theta1$covmat.empirical
# lower and upper limits of each parameter
lower <- c(R0 = 0, D_lat = 0 , D_inf = 0, alpha = 0, D_imm = 0, rho = 0)
upper <- c(R0 = Inf, D_lat = Inf , D_inf = Inf, alpha = 1, D_imm = Inf, rho = 1)
# number of iterations for the MCMC
n.iterations <- 50 # just a few since it takes quite a lot of time
# Here we don't adapt so that we can check the acceptance rate of the empirical covariance matrix
adapt.size.start <- 100
adapt.size.cooling <- 0.99
adapt.shape.start <- 100
# load traces
data(pmcmc_SEIT4L_infoPrior_n50)
# combine into a `mcmc.list` object
library("coda")
trace <- mcmc.list(lapply(pmcmc_SEIT4L_infoPrior_n50, function(chain) {
mcmc(chain$trace)
}))
# acceptance rate is below the optimal 23%
1 - rejectionRate(trace)
# accordingly, the combined ESS is a bit low
effectiveSize(trace)
library("lattice")
xyplot(trace)
# this can take some time as we have 5 chains
plotESSBurn(trace)
# Actually, it looks like no burn-in is needed What about autocorrelation
acfplot(x = trace, lag.max = 50)
# There is substantial autocorrelation but we can't thin too much since the
# chains are quite short. So let's keep 1 iteration every 20
trace.thin.n50 <- burnAndThin(trace, thin = 20)
# Finally we can plot the posterior density
densityplot(x = trace.thin.n50)
## 400 PArticles !!!
data(pmcmc_SEIT4L_infoPrior_n400)
trace <- mcmc.list(lapply(pmcmc_SEIT4L_infoPrior_n400, function(chain) {
mcmc(chain$trace)
}))
trace.thin.n400 <- burnAndThin(trace, thin = 20)
densityplot(x = trace.thin.n400)
plotPosteriorDensity(list(n50 = trace.thin.n50, n400 = trace.thin.n400))
## Stochastic vs deterministic fit
# create mcmc object
trace1 <- mcmc(mcmc_SEIT4L_infoPrior_theta1$trace)
trace2 <- mcmc(mcmc_SEIT4L_infoPrior_theta2$trace)
# combine in a mcmc.list
trace <- mcmc.list(trace1, trace2)
# burn and thin as the chain with uniform prior (see above sections)
trace.deter <- burnAndThin(trace, burn = 5000, thin = 40)
# compare posterior density
plotPosteriorDensity(list(deter = trace.deter, sto = trace.thin.n400))
# combine all traces in a data frame
library("plyr")
trace.combined <- ldply(trace.thin.n400)
# take the mean of theta
theta.bar <- colMeans(trace.combined[SEIT4L_stoch$theta.names])
print(theta.bar)
# compute its log-likelihood
init.state <- c(S = 279, E = 0, I = 2, T1 = 3, T2 = 0, T3 = 0, T4 = 0, L = 0,
Inc = 0)
log.like.theta.bar <- my_particleFilter(SEIT4L_stoch, theta.bar, init.state,
data = FluTdC1971, n.particles = 40)
print(log.like.theta.bar)
log.like.theta.bar.deter <- dTrajObs(SEIT4L_deter, theta.bar, init.state,
data = FluTdC1971, log = TRUE)
print(log.like.theta.bar.deter)
# and its deviance
D.theta.bar <- -2 * log.like.theta.bar
print(D.theta.bar)
# the effective number of parameters
p.D <- var(-2 * trace.combined$log.likelihood)/2
print(p.D)
# and finally the DIC
DIC <- D.theta.bar + 2 * p.D
print(DIC)
# take the mean posterior estimates of the deterministic model
x <- summary(trace.deter)
theta.bar.deter <- x$statistics[SEIT4L_deter$theta.names, "Mean"]
plotFit(SEIT4L_stoch, theta.bar, init.state, data = FluTdC1971, n.replicates = 1000)
plotFit(SEIT4L_deter, theta.bar.deter, init.state, data = FluTdC1971, n.replicates = 1000)
library("pomp")
density <- dwish(matrix(c(2,-.3,-.3,4),2,2), 3, matrix(c(1,.3,.3,1),2,2))
draw <- rwish(3, matrix(c(1,.3,.3,1),2,2))
plot(dwish( W = matrix(c(2,-.3,-.3,4),2,2), v = 5,
S = matrix(c(1,.3,.3,1),2,2)))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.