Scripts-R/Bayes_own_gev.R

#setwd('/home/proto4426/Documents/Extreme/R resources/IRM')
#load("/home/proto4426/Documents/Thesis/Extreme/R resources/IRM/data1.Rdata")
load("/home/proto4426/Documents/Thesis/Extreme/R resources/IRM/data1_bayes.Rdata")


# Bayesian Analysis constrcuted with functions in package PissoortThesis
######################################################################
library(evd)
library(mvtnorm)
library(KernSmooth)
library(coda)
library(pander)
library(gridExtra)
library(tidyverse)
library(ggjoy)
library(viridis)
library(grid)
library(reshape2)

library(PissoortThesis)
data("max_years")



## As we aim at building models sequentially,  we will begin by the Gumbel model
## It is models the lowest number of degrees of freedom.


## MEtropolis-Hastlings for the stationary Gumbel model ###
###########################################################


# Optimize Posterior Density Function to find starting values
fn <- function(par, data)
  - log_post_gumb(par[1], par[2], data)
param <- c(mean(max_years$df$Max), log(sd(max_years$df$Max)))
opt0 <- nlm(fn,
            param,
            data = max_years$data,
            hessian = T,
            iterlim = 1e5)
start0 <- opt0$estimate
names(start0) <- c("mu", "logsig")
Sig <- solve(opt0$hessian)
ev <- eigen((2.4 / sqrt(2)) ^ 2 * Sig)
varmat0 <- ev$vectors %*% diag(sqrt(ev$values)) %*% t(ev$vectors)


# Starting Values
set.seed(10)
start0 <- list()
k <- 1
while (k < 5) {
  # starting values are randomly selected from a distribution
  # that is overdispersed relative to the target
  sv <-
    as.numeric(rmvnorm(1, opt0$estimate, 50 * solve(opt0$hessian)))
  svlp <- log_post_gumb(sv[1], sv[2], max_years$data)
  print(svlp)
  if (is.finite(svlp)) {
    start0[[k]] <- sv
    names(start0[[k]]) <- c("mu", "logsig")
    k <- k + 1
  }
}

set.seed(101)
iter <- 2e3
mh.mcmc_gumb <- PissoortThesis::MH_mcmc.own(start0,
                                            varmat0 %*% c(.9, 1.1),
                                            iter, burnin = iter / 4)
# Display the mean acceptance rate
mh.mcmc_gumb$mean.acc_rates



##### Gumbel Model with the Gibbs Sampler
#########################################
set.seed(101)
iter <- 1e3
gib.mcmc_gumbel <- gibbs_mcmc.own(
  start0,
  iter = iter,
  nbr.chain = 4,
  Gumbel = T,
  propsd = c(.42, .12),
  burnin = iter / 4
)
## (For  computation time comparisons we take these values for the parameter here)




#########   MEtropolis-Hastlings for the stationary GEV model #######
#####################################################################


# Optimize Posterior Density Function to find starting values
fn <- function(par, data)
  - log_post0(par[1], par[2], par[3], data)
param <- c(mean(max_years$df$Max), log(sd(max_years$df$Max)), 0.1)
# opt <- optim(param, fn, data = max_years$data,
#              method="BFGS", hessian = TRUE)
opt <- nlm(fn,
           param,
           data = max_years$data,
           hessian = T,
           iterlim = 1e5)
start <- opt$estimate
names(start) <- c("mu", "logsig", "xi")
Sig <- solve(opt$hessian)
ev <- eigen((2.4 / sqrt(2)) ^ 2 * Sig)
varmat <- ev$vectors %*% diag(sqrt(ev$values)) %*% t(ev$vectors)

# Starting Values
set.seed(101)
start <- list()
k <- 1
while (k < 2) {
  # starting values are randomly selected from a distribution
  # that is overdispersed relative to the target
  sv <-
    as.numeric(rmvnorm(1, opt$estimate, 50 * solve(opt$hessian)))
  svlp <- log_post0(sv[1], sv[2], sv[3], max_years$data)
  print(svlp)
  if (is.finite(svlp)) {
    start[[k]] <- sv
    names(start[[k]]) <- c("mu", "logsig", "xi")
    k <- k + 1
  }
}


set.seed(101)
iter <- 2e3
mh.mcmc1 <-
  PissoortThesis::MH_mcmc.own(start[[1]],
                              varmat %*% c(.5, .6, .85),
                              iter = iter,
                              burnin = iter / 2)
# Display the mean acceptance rate
mh.mcmc1$mean.acc_rates

param_mean_mh <- apply(mh.mcmc1$out.chain[, 1:3], 2, mean)
names(param_mean_mh) <- c("mu", "logsig", "xi")
chainsPlot_mh <- chains.plotOwn(mh.mcmc1$out.chain,
                                post.mean.green = param_mean_mh,
                                title = "Using Metropolis-Hastings Algorithm")


## Burn in 1/4 of values  :
mh.mcmc.out <- mh.mcmc1$out.chain[-(1:iter / 4),]

# Effective sample sizes :
effectiveSize(mcmc(mh.mcmc.out[, 1:3]))




##########  GIBBS sampler for the stationary GEV model  #######
##############################################################


## Advised value for the proposal variance
#prop_var <- sqrt( (2.4/sqrt(1))^2 * solve(opt$hessian) )


set.seed(100)
iter <- 1e3
# Same starting point as for MH. We also keep only one chain
# i.e. no different starting values so far.
gibbs_statio <- gibbs_mcmc.own(
  start,
  iter = iter,
  nbr.chain = 4,
  propsd = c(.42, .12, .12),
  burnin = iter / 4
)
gibbs_statio$mean_acc.rates


chainsPlot_gibb <- chains.plotOwn(gibbs_statio$out.chain,
                                  title = "Using Gibbs Sampler")
## Compare chains of Gibbs sampler with MH and HMC (see other code)
grid.arrange(
  arrangeGrob(chainsPlot_mh, chainsPlot_gibb, ncol = 2),
  arrangeGrob(plots_hmc),
  #, nrow = 2,
  #layout_matrix = cbind(c(1,3), c(2,3)), widths = cbind(c(1/2, 1/4),
  widths = c(2, 1),
  top = textGrob(
    "TracePlots of the generated Chains for the stationary Model",
    gp = gpar(
      col = "#33666C",
      fontsize = 28,
      font = 2
    )
  )
)


## Compare the parameters from Gibbs and MH
param_gibbs <- apply(gibbs_statio$out.chain[, 1:3], 2, mean)

# Transform back the scale parameter
param_mean_mh["sigma"] <- exp(param_mean_mh["logsig"])
#param_mean_mh <- param_mean_mh[names(param_mean_mh) %in% "logsig" == F]
param_gibbs["sigma"] <- exp(param_gibbs["logsig"])
#param_gibbs <- param_gibbs[names(param_gibbs) %in% "logsig" == F]

frame <-
  data.frame(
    Bayesian.mh = param_mean_mh[c("mu", "sigma", "xi")],
    Bayesian.gib = param_gibbs[c("mu", "sigma", "xi")],
    'Frequentist(mle)' = gev_fit$mle
  )
row.names(frame) = c("$\\mu \\ $", "$\\sigma \\quad$", "$\\xi \\quad$")
#knitr::kable(frame, align = 'l')
stargazer::stargazer(frame, summary = F)


# Effective sample sizes :
effectiveSize(mcmc(gibbs_statio$out.chain[, 1:3]))




#####################################################################
###########  Gibbs Sampler dealing with Nonstationarity : ###########
##### Linear model on the location :  mu(t) = mu0 + mu1 * t  ########
#####################################################################

data <- max_years$data


fn <- function(par, data)
  - log_post1(par[1], par[2], par[3],
              par[4], rescale.time = T, data)
param <-
  c(mean(max_years$df$Max), 0, log(sd(max_years$df$Max)), -0.1)
opt <- optim(param,
             fn,
             data = max_years$data,
             method = "BFGS",
             hessian = T)
# opt <- nlm(fn, param, data = max_years$data,
#     hessian=T, iterlim = 1e5)
opt

# Starting Values
set.seed(10)
start <- list()
k <- 1
while (k < 5) {
  # starting values are randomly selected from a distribution
  # that is overdispersed relative to the target
  sv <- as.numeric(rmvnorm(1, opt$par, 50 * solve(opt$hessian)))
  svlp <- log_post1(sv[1], sv[2], sv[3], sv[4], max_years$data)
  print(svlp)
  if (is.finite(svlp)) {
    start[[k]] <- sv
    k <- k + 1
  }
}
mat_startvalues <- matrix(
  unlist(start),
  ncol = 4,
  byrow = T,
  dimnames = list(
    c("start[[1]]", "start[[2]]",
      "start[[3]]", "start[[4]]"),
    c("$\\mu_0 \\ $", "$\\mu_{trend}$",
      "$\\logsig$", "$\\xi$")
  )
)
knitr::kable(mat_startvalues, align = "c")
stargazer::stargazer(mat_startvalues, summary = F)

# k chains with k different starting values. Number of iter is for each chain.
set.seed(102)
iter.by.chain <- 1000
gibbs.trend <- PissoortThesis::gibbs.trend.own(
  start,
  propsd = c(.5, 1.9, .15, .12),
  iter = iter.by.chain,
  burnin = ceiling(iter.by.chain /
                     2 + 1),
  keep.same.seed = 12
)
gibbs.trend$mean_acc.rates
colMeans(do.call(rbind, gibbs.trend$mean_acc.rates))


head(param.chain <- gibbs.trend$out.chain[, 1:4])

# Effective sample sizes :
effectiveSize(mcmc(param.chain))


### Plot of the chains
PissoortThesis::chains.plotOwn(gibbs.trend$out.chain)
ggplot(gibbs.trend$out.chain) + geom_line(aes(x = iter, y = mu1)) +
  theme_piss(16, 14) + labs(ylab = "mu1", xlab = "iter")


## TracePlots
chain.mix <- cbind.data.frame(gibbs.trend$out.chain,
                              iter.chain = rep(501:1000, 4))
chain_mix_gg <- mixchains.Own(chain.mix, burnin = 501)
title = "TracePlots of the generated Chains "
PissoortThesis::grid_arrange_legend(
  chain_mix_gg$gmu,
  chain_mix_gg$gmutrend,
  ncol = 2,
  top = grid::textGrob(title,
                       gp = grid::gpar(
                         col = "#33666C",
                         fontsize = 28,
                         font = 4
                       ))
)
grid.arrange(chain_mix_gg$glogsig, chain_mix_gg$gxi, ncol = 2)

library("bayesplot")
array.post <-
  array(
    unlist(gibbs.trend$out.ind),
    dim = c(4000 / 4 + 1, 4, 4),
    dimnames = list(
      iterations = NULL,
      parameters = c("mu0", "mu1", "logsig", "xi"),
      chains = c("chain:1", "chain:2", "chain:3",
                 "chain:4")
    )
  )
color_scheme_set("mix-blue-red")
mcmc_trace(array.post,
           facet_args = list(ncol = 1, strip.position = "left"))
mcmc_trace_highlight(array.post, highlight = 3)


# Function to create mcmc.lists, useful for diagnostics on chains.
'mc.listDiag4' <-
  function (list,
            subset = c("mu0", "mu1", "logsig", "xi")) {
    mcmc.list(mcmc(list[[1]][, subset]),
              mcmc(list[[2]][, subset]),
              mcmc(list[[3]][, subset]),
              mcmc(list[[4]][, subset]))
  }

## Gelman Coda Diagnostics
Rhat <- gelman.diag(mc.listDiag4(gibbs.trend$out.ind), autoburnin = F)
gelman.plot(mc.listDiag4(gibbs.trend$out.ind),
            autoburnin = F,
            auto.layout = T)
# In ggplot : Put all on the same y-scales
gp.dat <-
  gelman.plot(mc.listDiag4(gibbs.trend$out.ind), autoburnin = F)
df = data.frame(
  bind_rows(as.data.frame(gp.dat[["shrink"]][, , 1]),
            as.data.frame(gp.dat[["shrink"]][, , 2])),
  q = rep(dimnames(gp.dat[["shrink"]])[[3]],
          each = nrow(gp.dat[["shrink"]][, , 1])),
  last.iter = rep(gp.dat[["last.iter"]], length(gp.dat))
)
df_gg <- melt(df, c("q", "last.iter"), value.name = "shrink_factor")
ggplot(df_gg,
       aes(
         last.iter,
         shrink_factor,
         colour = q,
         linetype = q
       )) +
  geom_hline(yintercept = 1,
             colour = "grey30",
             lwd = 0.2) +
  geom_line() +
  geom_hline(
    yintercept = 1.1,
    colour = "green4",
    linetype = "dashed",
    size = 0.3
  ) +
  scale_y_continuous(breaks = c(1, 1.1, 1.5, 2, 3, 4),
                     labels = c(1, 1.1, 1.5, 2, 3, 4)) +
  #ggtitle("Gelman Rubin dignostic : R-hat Statistic") +
  facet_wrap( ~ variable,
              labeller = labeller(
                .cols = function(x)
                  gsub("V", "Chain ", x)
              )) +
  labs(
    x = "Last Iteration in Chain",
    y = "Shrink Factor",
    colour = "Quantile",
    linetype = "Quantile",
    subtitle = "Gelman Rubin diagnostic : R-hat Statistic"
  ) +
  scale_linetype_manual(values = c(2, 1)) +
  theme_piss() +
  theme(
    strip.text = element_text(size = 15),
    plot.subtitle = element_text(
      size = 21,
      hjust = 0.5,
      colour = "#33666C",
      face = "bold"
    )
  )


# Transform back the scale parameter
param.chain[, "sigma"] <- exp(param.chain[, "logsig"])

# Markov Chain Correlations
autocorr(mcmc(param.chain[, c("mu0", "mu1", "logsig", "xi")]))
autocorr.diag(mcmc(param.chain[, c("mu0", "mu1", "logsig", "xi")]))
autocorr.plot(mcmc(param.chain[, c("mu0", "mu1", "logsig", "xi")]))

crosscorr.plot(mcmc(param.chain[, c("mu0", "mu1", "logsig", "xi")]),
               title = "Cross-correlation")
title("Cross-correlation")
# In ggplot
library(ggcorrplot)
ggcorrplot(
  crosscorr(mcmc(param.chain[, c("mu0", "mu1", "logsig", "xi")])),
  hc.order = TRUE,
  type = "lower",
  lab = TRUE,
  title = "Cross-correlation",
  ggtheme = PissoortThesis::theme_piss
)
# Compare it with Fisher information matrix ('frequentist') by MLE with ismev
cr.corr_lin <-
  crosscorr(mcmc(param.chain[, c("mu0", "mu1", "sigma", "xi")]))
dimnames(gev_nonstatio$cov) <- dimnames(cr.corr_lin)
# Transform it to correlation for comparison
cov2cor(gev_nonstatio$cov)
cr.corr_lin

## Geweke
geweke <- geweke.diag(mcmc(param.chain))
2 * dnorm(geweke$z)
geweke.plot(mcmc(param.chain), nbins = 20)


## Raftery Coda Diagnostics
# For each chain individually
raf1 <-
  raftery.diag(
    mc.listDiag4(gibbs.trend$out.ind)[[1]],
    q = 0.05,
    r = 0.02,
    s = 0.95
  )
raf2 <-
  raftery.diag(
    mc.listDiag4(gibbs.trend$out.ind)[[2]],
    q = 0.05,
    r = 0.02,
    s = 0.95
  )
raf3 <-
  raftery.diag(
    mc.listDiag4(gibbs.trend$out.ind)[[3]],
    q = 0.05,
    r = 0.02,
    s = 0.95
  )
raf4 <-
  raftery.diag(
    mc.listDiag4(gibbs.trend$out.ind)[[4]],
    q = 0.05,
    r = 0.02,
    s = 0.95
  )
set.seed(12)
raf <-
  sample(list(
    raf1$resmatrix,
    raf2$resmatrix,
    raf3$resmatrix,
    raf4$resmatrix
  ),
  1)
stargazer::stargazer(raf, summary = F)
# For the complete chains
raf_tot <-
  raftery.diag(mcmc(gibbs.trend$out.chain[, c("mu0", "mu1", "logsig", "xi")]),
               q = 0.05,
               r = 0.02,
               s = 0.95)
stargazer::stargazer(raf_tot$resmatrix, summary = F)



## Recompute the chain by decreasing the Burn-in period
set.seed(102)
# keep.same.seed  sets a seed at each iterations that is the integer you specify
# times the iteration number.
gibbs.trend <- PissoortThesis::gibbs.trend.own(
  start,
  propsd = c(.5, 0.008, .15, .12),
  # 0.008 ; 1.9
  iter = iter.by.chain,
  burnin = ceiling(iter.by.chain2 /
                     4 + 1),
  keep.same.seed = 123,
  rescale.time = F
)
gibbs.trend$mean_acc.rates
colMeans(do.call(rbind, gibbs.trend$mean_acc.rates))
head(param.chain <- gibbs.trend$out.chain[, 1:4])
# Effective sample sizes :
effectiveSize(mcmc(param.chain))



##### Inference  ########

par_gibbs_trend <-
  apply(gibbs.trend$out.chain[, c("mu0", "mu1", "logsig", "xi")],
        2, mean) # Average over the (4) generated chains

## Handling of the slope parameter mu_trend
t_bayes <- round((
  min(max_years$df$Year):max(max_years$df$Year) -
    mean(max_years$df$Year)
) / length(max_years$data), 4)
t_freq <- seq(1, length(max_years$data), 1)

mut <- par_gibbs_trend["mu0"] + par_gibbs_trend["mu1"] * t_bayes
mu_freq <- gev_nonstatio$mle[1] + gev_nonstatio$mle[2] * t_freq
# See the values of mu(t) in Bayesian VS in frequentist
pander(t(matrix(
  c(mut[1:4], mut[(length(mut) - 3):length(mut)],
    mu_freq[1:4], mu_freq[(length(mu_freq) - 3):length(mu_freq)]),
  ncol = 2,
  dimnames = list(c(
    rep("First values", 4),
    rep("Last values", 4)
  ),
  c("Frequentist",  "Bayesian"))
)))
(mu_freq[length(mu_freq)] - mu_freq[1]) / length(mu_freq)
(mut[length(mut)] - mut[1]) / length(mut)


## Summary And Parameter Table
param.chain$sigma <- exp(param.chain$logsig)
tab_quantiles <- as.data.frame(summary(mcmc(param.chain))$quantiles)

stargazer::stargazer(tab_quantiles, summary = F)

mean.post <- apply(param.chain, 2, mean)

# Transform back mu_1
mut <- tab_quantiles$`2.5%`[1] + tab_quantiles$`2.5%`[2] * t_bayes
cat("q.2.5% is ", q2.5_mu1Trans <-
      (mut[length(mut)] - mut[1])  / length(mut))
mut <- tab_quantiles$`25%`[1] + tab_quantiles$`25%`[2] * t_bayes
cat("q.25% is ", q25_mu1Trans <-
      (mut[length(mut)] - mut[1])  / length(mut))
mut <- tab_quantiles$`50%`[1] + tab_quantiles$`50%`[2] * t_bayes
cat("q.50% is ", q50_mu1Trans <-
      (mut[length(mut)] - mut[1])  / length(mut))
mut <- tab_quantiles$`75%`[1] + tab_quantiles$`75%`[2] * t_bayes
cat("q.75% is ", q75_mu1Trans <-
      (mut[length(mut)] - mut[1])  / length(mut))
mut <- tab_quantiles$`97.5%`[1] + tab_quantiles$`97.5%`[2] * t_bayes
cat("q.97.5% is ", q975_mu1Trans <-
      (mut[length(mut)] - mut[1])  / length(mut))


## Comparisons with Frequentist's results (GEV)
par_gibbs_trend["sigma"] <- exp(par_gibbs_trend["logsig"])
names(par_gibbs_trend["logsig"]) <- "sigma"
frame <-
  data.frame(Bayesian = par_gibbs_trend[c("mu0", "mu1", "sigma", "xi")],
             'Frequentist(mle)' = gev_nonstatio$mle)
knitr::kable(frame, align = 'l')


## Credible intervals (quantile-based) see above quantiles ! Densities are quite
# symetric, so it is not a bad idea to use this method.

## HPD intervals
library(HDInterval)
hpd_mu0 <- hdi(param.chain$mu0)
hpd_mu1 <- hdi(param.chain$mu1)
hpd_logsigma <- hdi(param.chain$logsig)
hpd_xi <- hdi(param.chain$xi)
hpd_sigma <- hdi(param.chain$sigma)
hpd95 <- data.frame(
  mu0 = c(hpd_mu0),
  mu1 = c(hpd_mu1),
  logsig = c(hpd_logsigma),
  xi = c(hpd_xi),
  sig = c(hpd_sigma)
)

hpd_mu0.75 <- hdi(param.chain$mu0, credMass = 0.5)
hpd_logsigma.75 <- hdi(param.chain$logsig, credMass = 0.5)
hpd_sigma.75 <- hdi(param.chain$sigma, credMass = 0.5)
hpd_xi.75 <- hdi(param.chain$xi, credMass = 0.5)
hpd_mu1.75 <- hdi(param.chain$mu1, credMass = 0.5)
hpd75 <- data.frame(
  mu0 = c(hpd_mu0.75),
  mu1 = c(hpd_mu1.75),
  logsig = c(hpd_logsigma.75),
  xi = c(hpd_xi.75),
  sig = c(hpd_sigma.75)
)
### Comparisons of all the ci :

## Densities of the parameters with their quantile-based and  HPD 0.95 intervals
color_scheme_set("brightblue")
col.intervals <- c("Quantile" = "red", "HPD" = "green")

'legend.things'  <-
  list(
    scale_color_manual(name = "Intervals", values = col.intervals),
    theme_piss(legend.position = c(0.92, 0.5)),
    theme(legend.background = element_rect(colour = "transparent", size = 0.5))
  )

g1 <- mcmc_dens(param.chain, pars = c("mu0")) +
  geom_vline(aes(xintercept = tab_quantiles['mu0', "2.5%"],
                 col = "Quantile"), linetype = "dashed") +
  geom_vline(aes(xintercept = tab_quantiles['mu0', "97.5%"],
                 col = "Quantile"), linetype = "dashed") +
  geom_vline(aes(xintercept = hpd_mu0[[1]],
                 col = "HPD"), linetype = "dashed") +
  geom_vline(aes(xintercept = hpd_mu0[[2]],
                 col = "HPD"), linetype = "dashed") +
  legend.things
g2 <- mcmc_dens(param.chain, pars = c("logsig")) +
  geom_vline(aes(xintercept = tab_quantiles['logsig', "2.5%"],
                 col = "Quantile"), linetype = "dashed") +
  geom_vline(aes(xintercept = tab_quantiles['logsig', "97.5%"],
                 col = "Quantile"), linetype = "dashed") +
  geom_vline(aes(xintercept = hpd_logsigma[[1]],
                 col = "HPD"), linetype = "dashed") +
  geom_vline(aes(xintercept = hpd_logsigma[[2]],
                 col = "HPD"), linetype = "dashed") +
  legend.things
g3 <- mcmc_dens(param.chain, pars = c("xi")) +
  geom_vline(aes(xintercept = tab_quantiles['xi', '2.5%'],
                 col = "Quantile"), linetype = "dashed") +
  geom_vline(aes(xintercept = tab_quantiles['xi', "97.5%"],
                 col = "Quantile"), linetype = "dashed") +
  geom_vline(aes(xintercept = hpd_xi[[1]],
                 col = "HPD"), linetype = "dashed") +
  geom_vline(aes(xintercept = hpd_xi[[2]],
                 col = "HPD"), linetype = "dashed") +
  legend.things
g4 <- mcmc_dens(param.chain, pars = c("mu1")) +
  geom_vline(aes(xintercept = tab_quantiles['mu1', '2.5%'],
                 col = "Quantile"), linetype = "dashed") +
  geom_vline(aes(xintercept = tab_quantiles['mu1', "97.5%"],
                 col = "Quantile"), linetype = "dashed") +
  geom_vline(aes(xintercept = hpd_mu1[[1]],
                 col = "HPD"), linetype = "dashed") +
  geom_vline(aes(xintercept = hpd_mu1[[2]],
                 col = "HPD"), linetype = "dashed") +
  legend.things

title <-
  "Posterior densities of the parameters and Bayesian intervals"
grid.arrange(
  g1,
  g4,
  g2,
  g3,
  nrow = 2,
  top = grid::textGrob(
    title,
    gp = grid::gpar(
      col = "#33666C",
      fontsize = 25,
      font = 4
    ),
    vjust = 0.4
  )
)


## Compare the frequentists with the Bayesian intervals

gev_freq_mu0_prof75 <-
  ci(
    gev_nonstatio_prof,
    method = "proflik",
    verbose = T,
    type = "parameter",
    which.par = 1,
    nint = 1000,
    alpha = 0.5
  )
gev_freq_mu1_prof75 <-
  ci(
    gev_nonstatio_prof,
    method = "proflik",
    verbose = T,
    type = "parameter",
    which.par = 2,
    nint = 1000,
    alpha = 0.5
  )
gev_freq_sig_prof75 <-
  ci(
    gev_nonstatio_prof,
    method = "proflik",
    verbose = T,
    type = "parameter",
    which.par = 3,
    nint = 1000,
    alpha = 0.5
  )
gev_freq_xi_prof75 <-
  ci(
    gev_nonstatio_prof,
    method = "proflik",
    verbose = T,
    type = "parameter",
    which.par = 4,
    nint = 1000,
    alpha = 0.5
  )

# Function to compute the intervals from porfile likelihood and from bayesian quantiles
'intervals_compareByParam' <-
  function(proflik.ci95 = gev_freq_xi_prof95,
           proflik.ci75 = gev_freq_xi_prof75,
           which = 4) {
    int_freq.norm <- data.frame(
      ll = gev_freq_norm95[which, 1],
      l = gev_freq_norm75[which, 1],
      m = gev_nonstatio$mle[which],
      h = gev_freq_norm75[which, 3],
      hh = gev_freq_norm95[which, 3]
    )
    int_freq.prof <- data.frame(
      ll = proflik.ci95[[1]],
      l = proflik.ci75[[1]],
      m = gev_nonstatio$mle[which],
      h = proflik.ci75[[3]],
      hh = proflik.ci95[[3]]
    )
    # browser()
    # hanle the transformation of sigma
    if (which == 3)
      which <- which + 2   #

    ## The  If condition to handle the rescaling problem of mu_1
    if (which == 2) {
      # int_bayes_quant <- data.frame(ll = q2.5_mu1Trans,
      #                                        l = q25_mu1Trans,
      #                                        m = q50_mu1Trans,
      #                                        h = q75_mu1Trans,
      #                                        hh = q975_mu1Trans)
      int_bayes_quant <- data.frame(
        ll = tab_quantiles$`2.5%`[which],
        l = tab_quantiles$`25%`[which],
        m = tab_quantiles$`50%`[which],
        h = tab_quantiles$`75%`[which],
        hh = tab_quantiles$`97.5%`[which]
      )
      int_bayes_hpd <- data.frame(
        ll = hpd_mu1.trans[[1]],
        l = hpd_mu1_trans.75[[1]],
        m = q50_mu1Trans,
        h = hpd_mu1_trans.75[[2]],
        hh = hpd_mu1.trans[[2]]
      )
    }
    else  {
      int_bayes_quant <- data.frame(
        ll = tab_quantiles$`2.5%`[which],
        l = tab_quantiles$`25%`[which],
        m = tab_quantiles$`50%`[which],
        h = tab_quantiles$`75%`[which],
        hh = tab_quantiles$`97.5%`[which]
      )

      int_bayes_hpd <- data.frame(
        ll = hpd95[1, which],
        l = hpd75[1, which],
        m = tab_quantiles$`50%`[which],
        h = hpd75[2, which],
        hh = hpd95[2, which]
      )
    }

    df <- cbind.data.frame(
      "Frequentist.Normal" = t(int_freq.norm),
      "Frequentist.profiled" =  t(int_freq.prof),
      "Bayesian.Quantile" = t(int_bayes_quant),
      "Bayesian.HPD" = t(int_bayes_hpd)
    )
    if (which == 5) {
      int_boot_res <- data.frame(
        ll = boot.ci.Sig_Xi[1, which - 4],
        l = boot.ci.Sig_Xi[1, which - 3],
        m = boot.ci.Sig_Xi[2, which - 3],
        h = boot.ci.Sig_Xi[3, which - 4],
        hh = boot.ci.Sig_Xi[3, which - 3]
      )
      int_boot_par <- data.frame(
        ll = boot.ci.Sig_Xi.par[1, which - 4],
        l = boot.ci.Sig_Xi.par[1, which - 3],
        m = boot.ci.Sig_Xi.par[2, which - 3],
        h = boot.ci.Sig_Xi.par[3, which - 4],
        hh = boot.ci.Sig_Xi.par[3, which - 3]
      )
    }
    else if (which == 4) {
      int_boot_res <- data.frame(
        ll = boot.ci.Sig_Xi[1, which],
        l = boot.ci.Sig_Xi[1, which - 1],
        m = boot.ci.Sig_Xi[2, which],
        h = boot.ci.Sig_Xi[3, which],
        hh = boot.ci.Sig_Xi[3, which - 1]
      )
      int_boot_par <- data.frame(
        ll = boot.ci.Sig_Xi.par[1, which],
        l = boot.ci.Sig_Xi.par[1, which - 1],
        m = boot.ci.Sig_Xi.par[2, which],
        h = boot.ci.Sig_Xi.par[3, which],
        hh = boot.ci.Sig_Xi.par[3, which - 1]
      )
    }
    if (which == 4 || which == 5)
      df <- cbind.data.frame(
        df,
        "Bootstrap.residual" = t(int_boot_res),
        "Bootstrap.parametric" = t(int_boot_par)
      )


    mcmc_intervals(df)#, show_density = T) #, rhat = c(1, Rhat[[1]][,2][which]) )
    # Rhat colour by the Rhat (Gelman-Rubin diag) computed above
  }

# mu_0
int1 <- intervals_compareByParam(gev_freq_mu0_prof95,
                                 gev_freq_mu0_prof75, which = 1) +
  labs(x = "mu0") + theme_piss()
# mu_1
int2 <- intervals_compareByParam(gev_freq_mu1_prof95,
                                 gev_freq_mu1_prof75, which = 2) +
  labs(x = "mu1") + theme_piss()
# sigma
int3 <- intervals_compareByParam(gev_freq_sig_prof95,
                                 gev_freq_sig_prof75, which = 3) +
  labs(x = "sigma") + theme_piss()
# Xi
int4 <- intervals_compareByParam() +
  labs(x = "xi") + theme_piss(theme_classic()) +
  scale_x_continuous(breaks = c(-0.3, -0.2, -0.1, 0),
                     labels = c(-0.3, -0.2, -0.1, 0))

## All-in-one Sheet
title <-
  "Confidence intervals comparisons : Frequentists and Bayesians"
grid.arrange(
  int1,
  int2,
  int3,
  int4,
  ncol = 2,
  top = grid::textGrob(
    title,
    gp = grid::gpar(
      col = "#33666C",
      fontsize = 25,
      hjust = 0.5,
      font = 2
    ),
    hjust = .5,
    vjust = 0.4
  )
)


####### Posterior Predictive Distribution  ###########

repl <- PissoortThesis::pred_post_samples()

post.pred <-
  apply(repl, 2, function(x)
    quantile(x, probs = c(0.05, 0.5, 0.95)))


df.postpred <-
  data.frame(
    data = max_years$data,
    q05 = post.pred["5%", ],
    q50 = post.pred["50%", ],
    q95 = post.pred["95%", ],
    year = seq(1901:2016)
  )
ggplot(df.postpred) + geom_point(aes(x = year, y = data)) +
  geom_line(aes(x = year, y = q05)) + geom_line(aes(x = year, y = q50)) +
  geom_line(aes(x = year, y = q95)) +
  ggtitle("Original data with PPD quantiles 5, 50 and 95%") + theme_piss()


## And for predictions ? 10 years here
n_future <- 116
repl2 <-
  PissoortThesis::pred_post_samples(n_future = n_future, seed = 12)

post.pred2 <-
  apply(repl2, 2, function(x)
    quantile(x, probs = c(0.025, 0.5, 0.975)))
hpd_pred <- as.data.frame(t(hdi(repl2)))

df.postpred2 <- data.frame(
  org.data = c(max_years$data,
               repl2[sample(10, 1:nrow(repl2)),
                     117:(116 + n_future)]),
  q025 = post.pred2["2.5%", ],
  q50 = post.pred2["50%", ],
  q975 = post.pred2["97.5%", ],
  year = 1901:(2016 + n_future),
  'data' = c(rep('original', 116), rep('new', n_future)),
  hpd.low = hpd_pred$lower,
  hpd.up = hpd_pred$upper
)

col.interval <-
  c(
    "2.5%-97.5%" = "red",
    "Median" = "blue2",
    "HPD 95%" = "green2",
    "orange",
    "magenta"
  )
col.data <-
  c("original" = "cyan",
    "simulated" = "red",
    "orange",
    "magenta")

g.ppd <- ggplot(df.postpred2) +
  geom_line(aes(x = year, y = q025, col = "2.5%-97.5%"), linetype = "dashed") +
  geom_line(aes(x = year, y = q50, col = "Median")) +
  geom_line(aes(x = year, y = q975, col =  "2.5%-97.5%"), linetype = "dashed") +
  geom_line(aes(x = year, y = hpd.low, col = "HPD 95%"), linetype = "dashed") +
  geom_line(aes(x = year, y = hpd.up , col =  "HPD 95%"), linetype = "dashed") +
  geom_vline(
    xintercept = 2016,
    linetype = "dashed",
    size = 0.4,
    col  = 1
  ) +
  scale_x_continuous(
    breaks = c(1900, 1950, 2000, 2016, 2050, 2100, 2131),
    labels = c(1900, 1950, 2000, 2016, 2050, 2100, 2131)
  ) +
  scale_colour_manual(name = " PP intervals", values = col.interval) +
  geom_point(data = df.postpred2[1:116, ],
             aes(x = year, y = org.data), col = "black") +
  geom_point(data = df.postpred2[117:nrow(df.postpred2), ],
             aes(x = year, y = org.data), col = "orange") +
  scale_fill_discrete(name = "Data") + #, values = col.data) +
  labs(y = expression(Max ~ (T ~ degree * C)),
       x = "Year",
       title = "Posterior Predictive quantiles with observation + 116 years simulations") +
  theme(
    legend.position =  c(0.91, 0.12),
    plot.title = element_text(
      size = 28,
      colour = "#33666C",
      face = "bold",
      hjust = 0.5
    ),
    axis.title = element_text(
      size = 19,
      colour = "#33666C",
      face = "bold"
    ),
    legend.title = element_text(
      size = 19,
      colour = "#33666C",
      face = "bold"
    )
  )

## LEngth of the intervals
length.quantil <- df.postpred2$q975 - df.postpred2$q025
length.hpd <- df.postpred2$hpd.up - df.postpred2$hpd.low
df.length.ci <- data.frame(quantiles = length.quantil,
                           hpd = length.hpd,
                           Year = df.postpred2$year)

g.length <- ggplot(df.length.ci) +
  geom_line(aes(x = Year , y = quantiles), col = "red") +
  geom_line(aes(x = Year , y = hpd), col = "green2") +
  labs(title = "Intervals' lengths", y = "Length") +
  scale_x_continuous(
    breaks = c(1900, 1950, 2000, 2050, 2100, 2131),
    labels = c(1900, 1950, 2000, 2050, 2100, 2131)
  ) +
  geom_vline(
    xintercept = 2016,
    linetype = "dashed",
    size = 0.4,
    col  = 1
  ) +
  theme(
    plot.title = element_text(
      size = 17,
      colour = "#33666C",
      face = "bold",
      hjust = 0.5
    ),
    axis.title = element_text(
      size = 10,
      colour = "#33666C",
      face = "bold"
    )
  )

vp <- grid::viewport(
  width = 0.23,
  height = 0.28,
  x = 0.65,
  y = 0.23
)
g.ppd
print(g.length, vp = vp)



# Densities associated with the PPD, with mean(red) and

gg1 <- PissoortThesis::Pred_Dens_ggPlot(1901, repl2)
gg2 <- PissoortThesis::Pred_Dens_ggPlot(1950, repl2)
gg3 <- PissoortThesis::Pred_Dens_ggPlot(2016, repl2)
gg4 <- PissoortThesis::Pred_Dens_ggPlot(2026, repl2)
grid.arrange(gg1, gg2, gg3, gg4, nrow = 1)


## Provide better visualizatons with geom_joy(). ( include it in the package !!!)
## Definition of parameters is straightfoward : it defines time at which we predict
# An by = is the number of densities we want to draw !
'posterior_pred_ggplot' <-
  function(from = 1,
           until = nrow(max_years$df),
           n_future = 0,
           by = 10,
           x_coord = c(27, 35)) {
    repl2 <-
      PissoortThesis::pred_post_samples(from = from,
                                        until = until,
                                        n_future = n_future)

    repl2_df <- data.frame(repl2)
    colnames(repl2_df) <- seq(from + 1900, length = ncol(repl2))

    ## Compute some quantiles to later draw on the plot
    quantiles_repl2 <- apply(repl2_df, 2,
                             function(x)
                               quantile(x , probs = c(.025, 0.05,
                                                      0.5, 0.95, 0.975)))
    quantiles_repl2 <- as.data.frame(t(quantiles_repl2))
    quantiles_repl2$year <- colnames(repl2_df)

    repl2_df_gg <-
      repl2_df[, seq(1, (until + n_future) - from, by = by)] %>%
      gather(year, value)

    col.quantiles <-
      c("2.5%-97.5%" = "red",
        "Median" = "black",
        "HPD 95%" = "green2")

    last_year <- as.character((until + n_future) - from + 1900)
    titl <-
      paste(
        "Posterior Predictive densities evolution in [ 1901 -",
        last_year,
        "] with linear model on location"
      )
    subtitl <-
      paste(
        "with some quantiles and intervals. The last density is in year ",
        last_year,
        ". After 2016 is extrapolation."
      )

    #browser()
    ## Compute the HPD intervals
    hpd_pred <- as.data.frame(t(hdi(repl2)))
    hpd_pred$year <- colnames(repl2_df)

    g <-
      ggplot(repl2_df_gg, aes(x = value, y = as.numeric(year))) +  # %>%rev() inside aes()
      geom_joy(aes(fill = year)) +
      geom_point(aes(x = `2.5%`, y = as.numeric(year), col = "2.5%-97.5%"),
                 data = quantiles_repl2,
                 size = 0.9) +
      geom_point(aes(x = `50%`, y = as.numeric(year), col = "Median"),
                 data = quantiles_repl2,
                 size = 0.9) +
      geom_point(
        aes(x = `97.5%`, y = as.numeric(year), col = "2.5%-97.5%"),
        data = quantiles_repl2,
        size = 0.9
      ) +
      geom_point(aes(x = lower, y = as.numeric(year) , col = "HPD 95%"),
                 data = hpd_pred,
                 size = 0.9) +
      geom_point(aes(x = upper, y = as.numeric(year) , col = "HPD 95%"),
                 data = hpd_pred,
                 size = 0.9) +
      geom_hline(
        yintercept = 2016,
        linetype = "dashed",
        size = 0.3,
        col  = 1
      ) +
      scale_fill_viridis(
        discrete = T,
        option = "D",
        direction = -1,
        begin = .1,
        end = .9
      ) +
      scale_y_continuous(breaks = c(seq(1901, 2016, by = by),
                                    seq(2016, colnames(repl2_df)[ncol(repl2_df)], by = by)))  +
      coord_cartesian(xlim = x_coord) +
      theme_piss(theme = theme_minimal()) +
      labs(
        x = expression(Max ~ (T ~ degree * C)),
        y = "Year",
        title = titl,
        subtitle = subtitl
      ) +
      scale_colour_manual(name = "Intervals", values = col.quantiles) +
      guides(colour = guide_legend(override.aes = list(size = 4))) +
      theme(
        legend.position = c(.952, .37),
        plot.subtitle = element_text(hjust = 0.5, face = "italic"),
        plot.caption = element_text(hjust = 0.1, face = "italic")
      )
    g
  }

# Plot for the already observed data :
posterior_pred_ggplot(by = 8)

# Plot for future data (extrapolation) :
posterior_pred_ggplot(
  from = nrow(max_years$df),
  x_coord = c(30, 38),
  n_future = nrow(max_years$df),
  by = 8
)

## All
posterior_pred_ggplot(
  from = 1,
  x_coord = c(27, 38),
  n_future = nrow(max_years$df),
  by = 12
)



#####  Return Levels

# Stationary
gibb1$out.chain[, "sigma"] <- exp(gibb1$out.chain[, "logsig"])
rl.post_gg(gibb1$out.chain[, c("mu", "sigma", "xi")], method = "gev")
rl.pred_gg(
  gibb1$out.chain[, c("mu", "sigma", "xi")],
  method = "gev",
  qlim = c(30, 40),
  period = c(1, 5, 15)
)

#  Nonstationary (linear trend)

# library(evdbayes)
# gibbs.trend$out.chain[,"sigma"] <- exp(gibbs.trend$out.chain[,"logsig"])
# rl.data <-gibbs.trend$out.chain[, c("mu0", "mu1", "sigma", "xi")]
# rl.pred(rl.data, qlim = c(30, 40))
#
# "rl_bayes_trend" <- function(data_year, params, t = 10, m = 10 ){
#   y_m <- -(1 / log(1 - 1/m))
#   t <- seq(max(data_year), max(data_year) + t, 1)
#   rl_m <- (params[1] + params[2] * (t-max(max_years$df$Year))) +
#     (params[3] / params[4]) *  (y_m^params[4] - 1)
#   g <- ggplot(data.frame(r.lvels = rl_m, years = t)) +
#     geom_point(aes(x = years, y = r.lvels))
#   g
#   return(rl_m)
# }
# par_gibbs_trend[3] <- exp(par_gibbs_trend["logsig"])
# rl_bayes_trend(max_years$data,
#                unname(par_gibbs_trend[c("mu0", "mu1", "sigma", "xi")]))

m <- 10
t = 218
start <- 1980
y_m <- -(1 / log(1 - 1 / m))
t <- seq(start, start + t, 1)
rl_m <- (tab_quantiles$`50%`[1] + tab_quantiles$`50%`[2] *
           (t - min(max_years$df$Year))) +
  (tab_quantiles$`50%`[5] / tab_quantiles$`50%`[4]) *
  (y_m ^ tab_quantiles$`50%`[4] - 1)
g <- ggplot(data.frame(r.lvels = rl_m, years = t)) +
  geom_point(aes(x = years, y = r.lvels))
g
rl_m

## Comparing with the frequentist intervals : Retrieve the plot from 2nonstationary.R
gg_rlAll_data + coord_cartesian(ylim = c(30, 40)) +
  geom_point(
    aes(x = years, y = r.levels),
    data = data.frame(years = t, r.levels = rl_m),
    col = 2,
    size = 0.3
  )




#### Predictive accuracy criterion

## For the Gumbel and the stationary model  computed in
#previous section with Gibbs sampler (gib.mcmc_gumbel  and  gibbs_statio)

# Function for Gumbel model to create mcmc.lists
'mc.listDiag2' <- function (list, subset = c("mu", "logsig")) {
  mcmc.list(mcmc(list[[1]][, subset]),
            mcmc(list[[2]][, subset]),
            mcmc(list[[3]][, subset]),
            mcmc(list[[4]][, subset]))
}

ic_vals.gumb <- gib.mcmc_gumbel$dic.vals
# DIC Values : compute for each chains and then average it.
dic.gum.1 <- mc.listDiag2(gib.mcmc_gumbel$out.ind)[[1]] %>%
  dic_2p(vals = ic_vals.gumb[[1]])
dic.gum.2 <- mc.listDiag2(gib.mcmc_gumbel$out.ind)[[2]] %>%
  dic_2p(vals = ic_vals.gumb[[2]])
dic.gum.3 <- mc.listDiag2(gib.mcmc_gumbel$out.ind)[[3]] %>%
  dic_2p(vals = ic_vals.gumb[[3]])
dic.gum.4 <- mc.listDiag2(gib.mcmc_gumbel$out.ind)[[4]] %>%
  dic_2p(vals = ic_vals.gumb[[4]])
cat(dic.gum.1, dic.gum.2, dic.gum.3, dic.gum.4)
# Variability
sd(c(dic.gum.1, dic.gum.2, dic.gum.3, dic.gum.4))

dic.mean.gum <- mean(dic.gum.1, dic.gum.2, dic.gum.3, dic.gum.4)
# WAIC Values
waic.gum.1 <- PissoortThesis::waic(ic_vals.gumb[[1]])
waic.gum.2 <- PissoortThesis::waic(ic_vals.gumb[[2]])
waic.gum.3 <- PissoortThesis::waic(ic_vals.gumb[[3]])
waic.gum.4 <- PissoortThesis::waic(ic_vals.gumb[[4]])
cat(waic.gum.1, waic.gum.2, waic.gum.3, waic.gum.4)
# Variability
sd(c(waic.gum.1, waic.gum.2, waic.gum.3, waic.gum.4))

waic.mean.gum <-
  mean(waic.gum.1, waic.gum.2, waic.gum.3, waic.gum.4)



# Function for stationary model to create mcmc.lists
'mc.listDiag3' <-
  function (list, subset = c("mu", "logsig", "xi")) {
    mcmc.list(mcmc(list[[1]][, subset]),
              mcmc(list[[2]][, subset]),
              mcmc(list[[3]][, subset]),
              mcmc(list[[4]][, subset]))
  }

ic_vals.00 <- gibbs_statio$dic.vals
# DIC Values : compute for each chains and then average it.
dic.statio.1 <- mc.listDiag3(gibbs_statio$out.ind)[[1]] %>%
  PissoortThesis::dic_3p(vals = ic_vals.00[[1]])
dic.statio.2 <- mc.listDiag3(gibbs_statio$out.ind)[[2]] %>%
  PissoortThesis::dic_3p(vals = ic_vals.00[[2]])
dic.statio.3 <- mc.listDiag3(gibbs_statio$out.ind)[[3]] %>%
  PissoortThesis::dic_3p(vals = ic_vals.00[[3]])
dic.statio.4 <- mc.listDiag3(gibbs_statio$out.ind)[[4]] %>%
  PissoortThesis::dic_3p(vals = ic_vals.00[[4]])
cat(dic.statio.1, dic.statio.2, dic.statio.3, dic.statio.4)

dic.mean.statio <-
  mean(dic.statio.1, dic.statio.2, dic.statio.3, dic.statio.4)
# WAIC Values
waic.statio.1 <- PissoortThesis::waic(ic_vals.00[[1]])
waic.statio.2 <- PissoortThesis::waic(ic_vals.00[[2]])
waic.statio.3 <- PissoortThesis::waic(ic_vals.00[[3]])
waic.statio.4 <- PissoortThesis::waic(ic_vals.00[[4]])
cat(waic.statio.1, waic.statio.2, waic.statio.3, waic.statio.4)

waic.mean.statio <-
  mean(waic.statio.1, waic.statio.2, waic.statio.3, waic.statio.4)



## For the nonstationary model computed in this section
ic_vals.trend <- gibbs.trend$dic.vals

# DIC Values : compute for each chains and then average it.
dic.trend.1 <- mc.listDiag4(gibbs.trend$out.ind)[[1]] %>%
  PissoortThesis::dic_4p(vals = ic_vals.trend[[1]])
dic.trend.2 <- mc.listDiag4(gibbs.trend$out.ind)[[2]] %>%
  PissoortThesis::dic_4p(vals = ic_vals.trend[[2]])
dic.trend.3 <- mc.listDiag4(gibbs.trend$out.ind)[[3]] %>%
  PissoortThesis::dic_4p(vals = ic_vals.trend[[3]])
dic.trend.4 <- mc.listDiag4(gibbs.trend$out.ind)[[4]] %>%
  PissoortThesis::dic_4p(vals = ic_vals.trend[[4]])
cat(dic.trend.1, dic.trend.2, dic.trend.3, dic.trend.4)

dic.mean.trend <-
  mean(dic.trend.1, dic.trend.2, dic.trend.3, dic.trend.4)
# WAIC Values
waic.trend.1 <- PissoortThesis::waic(ic_vals.trend[[1]])
waic.trend.2 <- PissoortThesis::waic(ic_vals.trend[[2]])
waic.trend.3 <- PissoortThesis::waic(ic_vals.trend[[3]])
waic.trend.4 <- PissoortThesis::waic(ic_vals.trend[[4]])
cat(waic.trend.1, waic.trend.2, waic.trend.3, waic.trend.4)

waic.mean.trend <-
  mean(waic.trend.1, waic.trend.2, waic.trend.3, waic.trend.4)

## Cross-validation
waic(ic_vals.trend[[1]])




###############################################################################
## Make the time component in squared ? ie mu(t) = mu_0 + mu_1 * t + mu_2 * t^2
##############################################################################


fn2 <- function(par, data)
  - log_post2(par[1], par[2], par[3],
              par[4], par[5], data)
param2 <- c(
  mu0 = mean(max_years$df$Max),
  mu1 = 0,
  mu2 = 0,
  logsig = log(sd(max_years$df$Max)),
  xi =  -0.1
)
opt2 <- optim(param2,
              fn2,
              data = max_years$data,
              method = "BFGS",
              hessian = T)
opt2

# Starting Values
set.seed(100)
start2 <- list()
k <- 1
while (k < 5) {
  # starting value is randomly selected from a distribution
  # that is overdispersed relative to the target
  sv <- as.numeric(rmvnorm(1, opt2$par, solve(opt2$hessian)))
  svlp <-
    log_post2(sv[1], sv[2], sv[3], sv[4], sv[5], max_years$data)
  print(svlp)
  if (is.finite(svlp)) {
    start2[[k]] <- sv
    k <- k + 1
  }
}
# k chains with k different starting values
set.seed(101)
gibbs.trend2 <-
  gibbs.trend2.own(start2,
                   propsd = c(.5, 1.4, 3.5, .2, .15),
                   iter = 1000)
colMeans(do.call(rbind, gibbs.trend2$mean_acc.rates))

param.chain2 <- gibbs.trend2$out.chain[, 1:5]


### Plot of the chains
chains.plotOwn(gibbs.trend2$out.chain)
grid.arrange(
  ggplot(gibbs.trend2$out.chain) + geom_line(aes(x = iter, y = mu1)) +
    theme_piss(16, 14) + labs(ylab = "mu1", xlab = "iter"),
  ggplot(gibbs.trend2$out.chain) + geom_line(aes(x = iter, y = mu2)) +
    theme_piss(16, 14) + labs(ylab = "mu2", xlab = "iter")
)


## TracePlots
chain.mix <- cbind.data.frame(gibbs.trend2$out.chain,
                              iter.chain = rep(1:500, 4))
mixchains.Own(chain.mix)
ggplot(chain.mix, aes(
  x = iter.chain,
  y = mu1,
  col = as.factor(chain.nbr)
)) +
  geom_line() + theme_piss(18, 16, theme_classic()) +
  scale_colour_brewer(name = "chain nr", palette = "Set1") +
  guides(colour = guide_legend(override.aes = list(size = 1.2)))
ggplot(chain.mix, aes(
  x = iter.chain,
  y = mu2,
  col = as.factor(chain.nbr)
)) +
  geom_line() + theme_piss(18, 16, theme_classic()) +
  scale_colour_brewer(name = "chain nr", palette = "Set1") +
  guides(colour = guide_legend(override.aes = list(size = 1.2)))


## Predictive accuracy criterion

ic_vals.trend2 <- gibbs.trend2$dic.vals

# DIC Values. "1:5" takes the 5 parameters of the model, see function
dic.trend2.1 <- mc.listDiag5(gibbs.trend2$out.ind)[[1]] %>%
  PissoortThesis::dic_5p(vals = ic_vals.trend2[[1]])
dic.trend2.2 <- mc.listDiag5(gibbs.trend2$out.ind)[[2]] %>%
  PissoortThesis::dic_5p(vals = ic_vals.trend2[[2]])
dic.trend2.3 <- mc.listDiag5(gibbs.trend2$out.ind)[[3]] %>%
  PissoortThesis::dic_5p(vals = ic_vals.trend2[[3]])
dic.trend2.4 <- mc.listDiag5(gibbs.trend2$out.ind)[[4]] %>%
  PissoortThesis::dic_5p(vals = ic_vals.trend2[[4]])
cat(dic.trend2.1, dic.trend2.2, dic.trend2.3, dic.trend2.4)

dic.mean.trend2 <- mean(dic.trend2.1, dic.trend2.2, dic.trend2.3,
                        dic.trend2.4)

# WAIC Values
waic.trend2.1 <- PissoortThesis::waic(ic_vals.trend2[[1]])
waic.trend2.2 <- PissoortThesis::waic(ic_vals.trend2[[2]])
waic.trend2.3 <- PissoortThesis::waic(ic_vals.trend2[[3]])
waic.trend2.4 <- PissoortThesis::waic(ic_vals.trend2[[4]])
cat(waic.trend2.1, waic.trend2.2, waic.trend2.3, waic.trend2.4)
## Interestingly here, all the criterion are lower than for simple trend
# and this models should be preferred... different result from frequentist !!

waic.mean.trend2 <-
  mean(waic.trend2.1, waic.trend2.2, waic.trend2.3,
       waic.trend2.4)




######## Model with inear trend + varying scale parameter (exp link)  ####
########   mu(t) = mu0 + mu1 * t,     logsig(t) = sig0 + sig1 * tt     #########
##########################################################################


fn3 <- function(par, data)
  - log_post3(par[1], par[2], par[3],
              par[4], par[5], data)
param3 <- c(
  mu0 = mean(max_years$df$Max),
  mu1 = 0,
  sig0 = log(sd(max_years$df$Max)),
  sig1 = 0,
  xi =  -0.1
)
opt3 <- optim(param3,
              fn3,
              data = max_years$data,
              method = "BFGS",
              hessian = T)
opt3

# Starting Values
set.seed(100)
start3 <- list()
k <- 1
while (k < 5) {
  # starting value is randomly selected from a distribution
  # that is overdispersed relative to the target
  sv <- as.numeric(rmvnorm(1, opt3$par, solve(opt3$hessian)))
  svlp <-
    log_post1(sv[1], sv[2], sv[3], sv[4], sv[5], max_years$data)
  print(svlp)
  if (is.finite(svlp)) {
    start3[[k]] <- sv
    k <- k + 1
  }
}
# k chains with k different starting values
set.seed(101)
gibbs.trend.sig3 <- PissoortThesis::gibbs.trend.sig3own(start3,
                                                        propsd = c(.45, 1.3, .2, .5, .1),
                                                        iter = 1e3)
colMeans(do.call(rbind, gibbs.trend.sig3$mean_acc.rates))

param.chain3 <- gibbs.trend.sig3$out.chain[, 1:5]


## TracePlots
chain.mix <- cbind.data.frame(gibbs.trend2$out.chain,
                              iter.chain = rep(1:500, 4))
mixchains.Own(chain.mix)
ggplot(chain.mix, aes(
  x = iter.chain,
  y = mu1,
  col = as.factor(chain.nbr)
)) +
  geom_line() + theme_piss(18, 16, theme_classic()) +
  scale_colour_brewer(name = "chain nr", palette = "Set1") +
  guides(colour = guide_legend(override.aes = list(size = 1.2)))




#### Predictive accuracy criterion

ic_vals.trend.sc <- gibbs.trend.sig3$dic.vals

# DIC Values. "1:5" takes the 5 parameters of the model, see function

dic.sig3.1 <- mc.listDiag4(gibbs.trend.sig3$out.ind, 1:5)[[1]] %>%
  PissoortThesis::dic_5p(vals = ic_vals.trend.sc[[1]], sig = T)
dic.sig3.2 <- mc.listDiag4(gibbs.trend.sig3$out.ind, 1:5)[[2]] %>%
  PissoortThesis::dic_5p(vals = ic_vals.trend.sc[[2]], sig = T)
dic.sig3.3 <- mc.listDiag4(gibbs.trend.sig3$out.ind, 1:5)[[3]] %>%
  PissoortThesis::dic_5p(vals = ic_vals.trend.sc[[3]], sig = T)
dic.sig3.4 <- mc.listDiag4(gibbs.trend.sig3$out.ind, 1:5)[[4]] %>%
  PissoortThesis::dic_5p(vals = ic_vals.trend.sc[[4]], sig = T)
cat(dic.sig3.1, dic.sig3.2, dic.sig3.3, dic.sig3.4)
#Variability
sd(c(dic.sig3.1, dic.sig3.2, dic.sig3.3, dic.sig3.4))

dic.mean.sig3 <- mean(dic.sig3.1, dic.sig3.2,
                      dic.sig3.3, dic.sig3.4)
# WAIC Values
waic.sig3.1 <- PissoortThesis::waic(ic_vals.trend.sc[[1]])
waic.sig3.2 <- PissoortThesis::waic(ic_vals.trend.sc[[2]])
waic.sig3.3 <- PissoortThesis::waic(ic_vals.trend.sc[[3]])
waic.sig3.4 <- PissoortThesis::waic(ic_vals.trend.sc[[4]])
cat(waic.sig3.1, waic.sig3.2, waic.sig3.3, waic.sig3.4)

waic.mean.sig3 <- mean(waic.sig3.1, waic.sig3.2,
                       waic.sig3.3, waic.sig3.4)
# Comparing These values with the ones obtained with simple linear trend, all are
# again for the complex model .....



###############################################################################
## Make the time component cubic in location ?
## mu(t) = mu_0 + mu_1 * t + mu_2 * t^2 + mu_3 * tt^3
##############################################################################


fn33 <- function(par, data)
  - log_post_mu3(par[1], par[2], par[3],
                 par[4], par[5], par[6], data)
param33 <-
  c(
    mu0 = mean(max_years$df$Max),
    mu1 = 0,
    mu2 = 0,
    mu3 = 0,
    logsig = log(sd(max_years$df$Max)),
    xi =  -0.1
  )
opt33 <- optim(
  param33,
  fn33,
  data = max_years$data,
  method = "BFGS",
  hessian = T
)
opt33

# Starting Values
set.seed(100)
start33 <- list()
k <- 1
while (k < 5) {
  # starting value is randomly selected from a distribution
  # that is overdispersed relative to the target
  sv <- as.numeric(rmvnorm(1, opt33$par, solve(opt33$hessian)))
  svlp <-
    log_post_mu3(sv[1], sv[2], sv[3], sv[4], sv[5], sv[6], max_years$data)
  print(svlp)
  if (is.finite(svlp)) {
    start33[[k]] <- sv
    k <- k + 1
  }
}

# k chains with k different starting values
set.seed(101)
gibbs.trend33 <- PissoortThesis::gibbs.trend3.own(start33,
                                                  propsd = c(.5, 1.4, 3.5, 7, .2, .15),
                                                  iter = 1000)
colMeans(do.call(rbind, gibbs.trend33$mean_acc.rates))

param.chain33 <- gibbs.trend33$out.chain[, 1:5]

## TracePlots
chain.mix <- cbind.data.frame(gibbs.trend33$out.chain,
                              iter.chain = rep(1:500, 4))
mixchains.Own(chain.mix)

## Predictive accuracy criterion

ic_vals.trend33 <- gibbs.trend33$dic.vals

# DIC Values. "1:5" takes the 5 parameters of the model, see function
dic.trend33.1 <- mc.listDiag4(gibbs.trend33$out.ind)[[1]] %>%
  dic_6p(vals = ic_vals.trend33[[1]])
dic.trend33.2 <- mc.listDiag4(gibbs.trend33$out.ind)[[2]] %>%
  dic_6p(vals = ic_vals.trend33[[2]])
dic.trend33.3 <- mc.listDiag4(gibbs.trend33$out.ind)[[3]] %>%
  dic_6p(vals = ic_vals.trend33[[3]])
dic.trend33.4 <- mc.listDiag4(gibbs.trend33$out.ind)[[4]] %>%
  dic_6p(vals = ic_vals.trend33[[4]])
cat(dic.trend33.1, dic.trend33.2, dic.trend33.3, dic.trend33.4)
#Variability
sd(c(dic.trend33.1, dic.trend33.2, dic.trend33.3,
     dic.trend33.4))

dic.mean.trend33 <-
  mean(dic.trend33.1, dic.trend33.2, dic.trend33.3,
       dic.trend33.4)

# WAIC Values
waic.trend33.1 <- PissoortThesis::waic(ic_vals.trend33[[1]])
waic.trend33.2 <- PissoortThesis::waic(ic_vals.trend33[[2]])
waic.trend33.3 <- PissoortThesis::waic(ic_vals.trend33[[3]])
waic.trend33.4 <- PissoortThesis::waic(ic_vals.trend33[[4]])
cat(waic.trend33.1,
    waic.trend33.2,
    waic.trend33.3,
    waic.trend33.4)
## Interestingly here, all the criterion are lower than for simple trend
# and this models should be preferred... different result from frequentist !!

waic.mean.trend33 <-
  mean(waic.trend33.1,
       waic.trend33.2,
       waic.trend33.3,
       waic.trend33.4)
# Variability
sd(c(
  waic.trend33.1,
  waic.trend33.2,
  waic.trend33.3,
  waic.trend33.4
))


##################### Comparisons ("Model Selection")  #######################
#############################################################################

library(loo)

cat(
  " The following represent the values of the DIC for the models, by ascending complexity",
  dic.mean.gum,
  dic.mean.statio,
  dic.mean.trend,
  dic.mean.trend2,
  dic.mean.sig3,
  dic.mean.trend33
)
cat(
  " The following represent the values of the WAIC for the same models,",
  waic.mean.gum,
  waic.mean.statio,
  waic.mean.trend,
  waic.mean.trend2,
  waic.mean.sig3,
  waic.mean.trend33
)


comp.waic.statio <- waic(ic_vals[[1]])

comp.waic.trend <-
  waic(rbind(
    ic_vals.trend[[1]],
    ic_vals.trend[[2]],
    ic_vals.trend[[3]],
    ic_vals.trend[[4]]
  ))

comp.waic.trend2 <-
  waic(
    rbind(
      ic_vals.trend2[[1]],
      ic_vals.trend2[[2]],
      ic_vals.trend2[[3]],
      ic_vals.trend2[[4]],
      ic_vals.trend2[[5]]
    )
  )
loo(
  rbind(
    ic_vals.trend2[[1]],
    ic_vals.trend2[[2]],
    ic_vals.trend2[[3]],
    ic_vals.trend2[[4]],
    ic_vals.trend2[[5]]
  )
)

comp.waic.sig3 <-
  waic(
    rbind(
      ic_vals.trend.sc[[1]],
      ic_vals.trend.sc[[2]],
      ic_vals.trend.sc[[3]],
      ic_vals.trend.sc[[4]],
      ic_vals.trend.sc[[5]]
    )
  )

cat(
  comp.waic.statio$waic,
  comp.waic.trend$waic,
  comp.waic.trend2$waic,
  comp.waic.sig3$waic
)
loo::compare(comp.waic.statio,
             comp.waic.trend,
             comp.waic.trend2,
             comp.waic.sig3)



library(bbefkr)  # Compute the marginal likelihood using Chib's (1995) method
# To be able to compute the Bayes factor.
logdensity_admkr()
library(BayesFactor)


#save.image("/home/proto4426/Documents/Thesis/Extreme/R resources/IRM/data1_bayes.Rdata")
proto4426/PissoortThesis documentation built on May 26, 2019, 10:31 a.m.