library(dplyr)
library(ggplot2)
library(tidyr)
library(inlaBGEV)
library(sf)
# In this script we estimate GEV parameters locally at all weather stations with
# long enough time series. We then visualise the estimates to see if a spatially
# varying model is needed for parameter estimation. Additionally we compare the
# parameter estimates with explanatory variables to look for trends.
# Finally, we standardise the observations using the standard deviation of
# heavy precipitation at each location. We then estimate parameters and look
# for trends once more.
# function for bootstrapping ========================================================
bootstrap_gev_fit = function(x, n, q = c(.025, .25, .5, .75, .975), ...) {
pars = matrix(NA, nrow = n, ncol = length(x$estimate))
for (b in 1:n) {
y = sample(x$data, length(x$data), replace = TRUE)
pars[b, ] = gev_fit(y, ...)$estimate
}
stats = data_stats(t(pars), q)
stats$par = names(x$estimate)
stats
}
# Estimate parameters for different time periods with PWM ============================
min_years = 4 # Minimum number of years needed before we estimate the GEV parameters
α = .5; β = .8 # Probabilities used in the location and spread parameters
hour_vec = c(1, 3, 6) # Which aggregation lengths are we examining?
# All the explanatory variables we are interested in
covariate_names = c("x", "y", "height", "dist_sea", "precipitation")
params = list()
for (i in seq_along(hour_vec)) {
# Prepare the data
data = dplyr::filter(observations, n_hours == hour_vec[i], n_years >= min_years)
standardisation_stats = get_mean_and_sd_stats(data, covariate_names)
data = standardise(data, standardisation_stats)
# Estimate parameters
ids = unique(data$id)
fits = list()
set.seed(1)
for (j in seq_along(ids)) {
obs = dplyr::filter(data, id == ids[j])$value
fits[[j]] = gev_fit(obs, method = "pwm", α = α, β = β)
}
# Estimate confidence intervals using bootstrap
bootstraps = list()
set.seed(1)
for (j in seq_along(fits)) {
bootstraps[[j]] = bootstrap_gev_fit(fits[[j]], n = 1000, method = "pwm", α = α, β = β)
bootstraps[[j]]$estimator = fits[[j]]$estimate
bootstraps[[j]]$id = ids[j]
}
bootstraps = do.call(rbind, bootstraps)
# Add explanatory variables to the estimated variables
explanatory_variables = data %>%
dplyr::select(-value, -year) %>%
dplyr::distinct(id, .keep_all = TRUE)
bootstraps = dplyr::left_join(bootstraps, explanatory_variables, by = "id")
params[[i]] = bootstraps
message("Done with ", hour_vec[i], " hours")
}
params = do.call(rbind, params)
# Plot parameter estimates for all stations ====================
plot = params %>%
dplyr::filter(!is.na(mean)) %>%
# Remove one stations for purely aesthetic reasons. The station with id SN49085
# has four observations, and for n_hours = 6, one of them is really large.
# This causes a very high estimate and a really wide confidence interval for the spread,
# making it harder to see the patterns in the parameter estimates from other stations.
# You are welcome to uncomment the line below if you wish
dplyr::filter(id != "SN49085") %>%
dplyr::mutate(
par = factor(
par, levels = c("q", "s", "ξ"),
labels = c("$\\hat{q}_\\alpha$", "$\\hat{s}_\\beta$", "$\\hat{\\xi}$")),
n_hours = factor(n_hours, levels = hour_vec,
labels = paste(hour_vec, "hours"))) %>%
ggplot(aes(x = as.numeric(factor(id)))) +
geom_point(aes(y = estimator), size = .2) +
geom_linerange(aes(ymin = `2.5%`, ymax = `97.5%`), alpha = .3) +
facet_grid(par ~ n_hours, scales = "free_y") +
labs(x = "Location nr.", y = "Value") +
theme_bw() +
theme(text = element_text(size = 20),
strip.text.y = element_text(angle = 0))
if (interactive()) print(plot)
tikz_plot(file.path(here::here(), "results", "pwm_gev_parameters.pdf"),
plot, width = 10, height = 7)
# Plot parameter estimates with covariates ==============================
q_plot1 = params %>%
dplyr::filter(par %in% c("q"), !is.na(mean)) %>%
dplyr::mutate(n_hours = factor(n_hours, levels = hour_vec,
labels = paste(hour_vec, "hours"))) %>%
tidyr::pivot_longer(all_of(covariate_names)) %>%
dplyr::mutate(name = factor(
name, levels = covariate_names,
labels = c("Easting", "Northing", "Altitude",
"Distance\nto sea", "Annual\nprecipitation"))) %>%
ggplot() +
geom_point(aes(y = estimator, x = value), size = .2) +
geom_smooth(aes(y = estimator, x = value), formula = y ~ x, method = "lm",
col = "black", size = .5) +
facet_grid(n_hours ~ name, scales = "free_y") +
labs(y = "$\\hat{q}_\\alpha$", x = "Standardised value") +
theme_bw() +
theme(text = element_text(size = 20),
strip.text.y = element_text(angle = 0),
axis.title.y = element_text(angle = 0, vjust = .5))
s_plot1 = params %>%
dplyr::filter(par %in% c("s"), !is.na(mean)) %>%
dplyr::mutate(n_hours = factor(n_hours, levels = hour_vec,
labels = paste(hour_vec, "hours"))) %>%
tidyr::pivot_longer(all_of(covariate_names)) %>%
dplyr::mutate(name = factor(
name, levels = covariate_names,
labels = c("Easting", "Northing", "Altitude",
"Distance\nto sea", "Annual\nprecipitation"))) %>%
ggplot() +
geom_point(aes(y = estimator, x = value), size = .2) +
geom_smooth(aes(y = estimator, x = value), formula = y ~ x, method = "lm",
col = "black", size = .5) +
labs(y = "$\\hat{s}_\\beta$", x = "Standardised value") +
facet_grid(n_hours ~ name, scales = "free_y") +
theme_bw() +
theme(text = element_text(size = 20),
strip.text.y = element_text(angle = 0),
axis.title.y = element_text(angle = 0, vjust = .5))
tikz_plot(file.path(here::here(), "results", "pwm_gev_parameters_with_covariates.pdf"),
list(q_plot1, s_plot1), width = 15, height = 11)
# Standardise the response using the standard deviation of heavy precipitation,
# then do everything once more
params2 = list()
for (i in seq_along(hour_vec)) {
# Prepare the data
data = dplyr::filter(observations, n_hours == hour_vec[i], n_years >= min_years)
standardisation_stats = get_mean_and_sd_stats(data, covariate_names)
data = standardise(data, standardisation_stats)
sd_vals = dplyr::filter(estimated_sd, n_hours == hour_vec[i], n_years >= min_years)
data = dplyr::left_join(data, sd_vals, by = c("id", "n_hours", "n_years"))
data$value = data$value / data$sd
# Estimate parameters
ids = unique(data$id)
fits = list()
set.seed(1)
for (j in seq_along(ids)) {
obs = dplyr::filter(data, id == ids[j])$value
fits[[j]] = gev_fit(obs, method = "pwm", α = α, β = β)
}
# Estimate confidence intervals using bootstrap
bootstraps = list()
set.seed(1)
for (j in seq_along(fits)) {
bootstraps[[j]] = bootstrap_gev_fit(fits[[j]], n = 1000, method = "pwm", α = α, β = β)
bootstraps[[j]]$estimator = fits[[j]]$estimate
bootstraps[[j]]$id = ids[j]
}
bootstraps = do.call(rbind, bootstraps)
# Add explanatory variables to the estimated variables
explanatory_variables = data %>%
dplyr::select(-value, -year, -sd) %>%
dplyr::distinct(id, .keep_all = TRUE)
bootstraps = dplyr::left_join(bootstraps, explanatory_variables, by = "id")
params2[[i]] = bootstraps
message("Done with ", hour_vec[i], " hours")
}
params2 = do.call(rbind, params2)
# Plot parameter estimates for all stations ====================
plot2 = params2 %>%
dplyr::filter(!is.na(mean)) %>%
# Remove one stations for purely aesthetic reasons. The station with id SN49085
# has four observations, and for n_hours = 6, one of them is really large.
# This causes a very high estimate and a really wide confidence interval for the spread,
# making it harder to see the patterns in the parameter estimates from other stations.
# You are welcome to uncomment the line below if you wish
#dplyr::filter(id != "SN49085") %>%
dplyr::mutate(
par = factor(
par, levels = c("q", "s", "ξ"),
labels = c("$\\hat{q}_\\alpha$", "$\\hat{s}_\\beta$", "$\\hat{\\xi}$")),
n_hours = factor(n_hours, levels = hour_vec,
labels = paste(hour_vec, "hours"))) %>%
ggplot(aes(x = as.numeric(factor(id)))) +
geom_point(aes(y = estimator), size = .2) +
geom_linerange(aes(ymin = `2.5%`, ymax = `97.5%`), alpha = .3) +
facet_grid(par ~ n_hours, scales = "free_y") +
labs(x = "Location nr.", y = "Value") +
theme_bw() +
theme(text = element_text(size = 20),
strip.text.y = element_text(angle = 0))
if (interactive()) print(plot2)
tikz_plot(file.path(here::here(), "results", "pwm_gev_parameters_twostep.pdf"),
plot2, width = 10, height = 7)
# Plot parameter estimates with covariates ==============================
q_plot2 = params2 %>%
dplyr::filter(par %in% c("q"), !is.na(mean)) %>%
dplyr::mutate(n_hours = factor(n_hours, levels = hour_vec,
labels = paste(hour_vec, "hours"))) %>%
tidyr::pivot_longer(all_of(covariate_names)) %>%
dplyr::mutate(name = factor(
name, levels = covariate_names,
labels = c("Easting", "Northing", "Altitude",
"Distance\nto sea", "Annual\nprecipitation"))) %>%
ggplot() +
geom_point(aes(y = estimator, x = value), size = .2) +
geom_smooth(aes(y = estimator, x = value), formula = y ~ x, method = "lm",
col = "black", size = .5) +
facet_grid(n_hours ~ name, scales = "free_y") +
labs(y = "$\\hat{q}_\\alpha$", x = "Standardised value") +
theme_bw() +
theme(text = element_text(size = 20),
strip.text.y = element_text(angle = 0),
axis.title.y = element_text(angle = 0, vjust = .5))
s_plot2 = params2 %>%
dplyr::filter(par %in% c("s"), !is.na(mean)) %>%
dplyr::mutate(n_hours = factor(n_hours, levels = hour_vec,
labels = paste(hour_vec, "hours"))) %>%
tidyr::pivot_longer(all_of(covariate_names)) %>%
dplyr::mutate(name = factor(
name, levels = covariate_names,
labels = c("Easting", "Northing", "Altitude",
"Distance\nto sea", "Annual\nprecipitation"))) %>%
ggplot() +
geom_point(aes(y = estimator, x = value), size = .2) +
geom_smooth(aes(y = estimator, x = value), formula = y ~ x, method = "lm",
col = "black", size = .5) +
labs(y = "$\\hat{s}_\\beta$", x = "Standardised value") +
facet_grid(n_hours ~ name, scales = "free_y") +
theme_bw() +
theme(text = element_text(size = 20),
strip.text.y = element_text(angle = 0),
axis.title.y = element_text(angle = 0, vjust = .5))
tikz_plot(
file.path(here::here(), "results", "pwm_gev_parameters_twostep_with_covariates.pdf"),
list(q_plot2, s_plot2), width = 15, height = 11)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.