article_weighted_scores.R
In scoringRules: Scoring Rules for Parametric and Simulated Distribution Forecasts

## ----preliminaries, echo=FALSE, results='hide'----------------------
# Using knitr for manuscript
library(knitr)
render_sweave()
opts_chunk$set(engine='R', tidy=FALSE)

# JSS code formatting
options(prompt = "R> ", continue = "+  ", width = 70, useFancyQuotes = FALSE)

# Formatting
options(scipen = 1, digits = 3)
Sys.setenv(LANG = 'en')

# RNG initialization
set.seed(42)

# Required packages
library(scoringRules)
if (requireNamespace("crch", quietly = TRUE)) {
  use_crch <- TRUE
  library(crch)
  } else {
  use_crch <- FALSE
}
if (requireNamespace("ggplot2", quietly = TRUE)) {
  use_ggplot <- TRUE
  library(ggplot2)
  } else {
  use_ggplot <- FALSE
}

## ----Chaining-func-example, echo=FALSE, dev='pdf', fig.width=2.5, fig.height=2.5, fig.align="center"----
if (use_ggplot) {
  a1 <- a2 <- 2.5
  b1 <- b2 <- 7.5
  
  x <- pmin(pmax(rnorm(10, 5, 1.5), 0.5), 9.5)
  y <- pmin(pmax(rnorm(10, 5, 1.5), 0.5), 9.5)
  w_ind <- (x < a1) | (x > b1) | (y < a2) | (y > b2)
  v_x <- pmin(pmax(x, a1), b1)
  v_y <- pmin(pmax(y, a2), b2)
  
  ggplot() + geom_rect(aes(xmin = a1, xmax = b1, ymin = a2, ymax = b2), fill = "lightblue") + 
    scale_x_continuous(expand = c(0, 0), limits = c(0, 10), breaks = c(a1, b1), 
                       labels = c(expression(a[1]), expression(b[1])), name = expression(z[1])) + 
    scale_y_continuous(expand = c(0, 0), limits = c(0, 10), breaks = c(a2, b2), 
                       labels = c(expression(a[2]), expression(b[2])), name = expression(z[2])) +
    theme_bw() + theme(panel.grid = element_blank()) +
    geom_segment(aes(x = x[w_ind], y = y[w_ind], xend = v_x[w_ind], yend = v_y[w_ind]),
                 lineend = "round", linejoin = "round", linewidth = 1,
                 arrow = arrow(length = unit(0.1, "cm")), col = "red") +
    geom_point(aes(x = x, y = y), size = 2) +
    geom_point(aes(x = v_x, y = v_y), size = 2, shape = 4)
}

## ----Univ-weighted-score-example-1----------------------------------
obs <- rnorm(5)
sample_m <- matrix(rnorm(5e4), nrow = 5)
score_df <- data.frame(crps = crps_sample(obs, sample_m),
                       owcrps = owcrps_sample(obs, sample_m),
                       twcrps = twcrps_sample(obs, sample_m))
print(score_df)

## ----Univ-weighted-score-example-2----------------------------------
t <- 0
score_df <- data.frame(crps = crps_sample(obs, sample_m),
                       owcrps = owcrps_sample(obs, sample_m, a = t),
                       twcrps = twcrps_sample(obs, sample_m, a = t))
print(score_df)

## ----Weighted-score-illustration, echo=FALSE, dev='pdf', fig.width=10.4, fig.height=4.1, fig.align="center", out.width = "\\linewidth"----

y <- rnorm(1000)
dat_norm <- matrix(rnorm(1000*100), nrow = 1000)
dat_logis <- matrix(rlogis(1000*100), nrow = 1000)
x <- seq(-3, 3, 0.1)
ow_scores_norm <- sapply(x, function(t) mean(owcrps_sample(y, dat_norm, a = t)))
tw_scores_norm <- sapply(x, function(t) mean(twcrps_sample(y, dat_norm, a = t)))
ow_scores_logis <- sapply(x, function(t) mean(owcrps_sample(y, dat_logis, a = t)))
tw_scores_logis <- sapply(x, function(t) mean(twcrps_sample(y, dat_logis, a = t)))

# Initialize plot
par(mai = c(0.9, 3.5, 0.3, 3.4), cex = 1.1)
plot(x, tw_scores_norm, type = "l", col = "purple", lwd = 2,
     xlim = c(-3, 3), ylim = c(0, 1),
     xlab = "Threshold a", ylab = "Score value",
     xaxt = "n", yaxt = "n")
lines(x, ow_scores_norm, col = "darkorange", lwd = 2)
lines(x, tw_scores_logis, col = "purple", lty = "dashed", lwd = 2)
lines(x, ow_scores_logis, col = "darkorange", lty = "dashed", lwd = 2)
axis(1, at = c(-3, 0, 3))
axis(2, at = c(0, 0.5, 1))

# Add legend
legend("top", bty = "n", legend = c("twCRPS", "owCRPS", "Normal", "Logistic"), 
  col = c("purple", "darkorange", "grey", "grey"),
  lty = c(1, 1, 1, 2), lwd = c(2, 2), ncol = 2)



## ----Multiv-weighted-score-example-1--------------------------------
d <- length(obs)
twes_sample(obs, sample_m, a = t)
twes_sample(obs, sample_m, a = rep(t, d))

## ----Custom-weight-function-example-1-------------------------------
mu <- 0; sigma <- 1
weight_func <- function(x) pnorm(x, mean = mu, sd = sigma)
owcrps_sample(obs, sample_m, weight_func = weight_func)

## ----Custom-weight-function-example-2-------------------------------
mu <- rnorm(d, 0, 0.5); sigma <- runif(d, 0.5, 1.5)
weight_func <- function(x) prod(pnorm(x, mean = mu, sd = sigma))
owes_sample(obs, sample_m, weight_func = weight_func)

## ----Custom-weight-function-example-3-------------------------------
weight_func <- get_weight_func(name = "norm_cdf", mu = 0, sigma = 1)
owcrps_sample(obs, sample_m, weight_func = weight_func)

## ----Custom-weight-function-example-4-------------------------------
weight_func <- get_weight_func(name = "norm_cdf", mu = mu, sigma = sigma)
owes_sample(obs, sample_m, weight_func = weight_func)

## ----Custom-weight-function-example-5-------------------------------
chain_func <- function(x) (x - mu)*pnorm(x, mu, sigma) + 
  (sigma^2)*dnorm(x, mu, sigma)
mu <- 0; sigma <- 1
twcrps_sample(obs, sample_m, chain_func = chain_func)

mu <- rnorm(d, 0, 0.5); sigma <- runif(d, 0.5, 1.5)
twes_sample(obs, sample_m, chain_func = chain_func)

## ----Custom-weight-function-example-6-------------------------------
chain_func <- get_weight_func("norm_cdf", mu = 0, sigma = 1, 
                              weight = FALSE)
twcrps_sample(obs, sample_m, chain_func = chain_func)

chain_func <- get_weight_func("norm_cdf", mu = mu, sigma = sigma, 
                              weight = FALSE)
twes_sample(obs, sample_m, chain_func = chain_func)

## ----Prepare-post-processing-example, echo=FALSE--------------------
if (use_crch) {
  # load data
  data("RainIbk", package = "crch")
  RainIbk <- sqrt(RainIbk)
  ensfc <- RainIbk[, grep('^rainfc', names(RainIbk))]
  RainIbk$ensmean <- apply(ensfc, 1, mean)
  RainIbk$enssd <- apply(ensfc, 1, sd)
  RainIbk <- subset(RainIbk, enssd > 0)  
  
  # split into training and test data
  data_train <- subset(RainIbk, as.Date(rownames(RainIbk)) <= "2004-11-30")
  data_eval <- subset(RainIbk, as.Date(rownames(RainIbk)) >= "2005-01-01")  
  
  # fit post-processing models
  CRCHgauss <- crch(rain ~ ensmean | log(enssd), data = data_train,
                    left = 0, dist = "gaussian")
  gauss_mu <- predict(CRCHgauss, data_eval, type = "location")
  gauss_sc <- predict(CRCHgauss, data_eval, type = "scale")
  
  CRCHlogis <- crch(rain ~ ensmean | log(enssd), data = data_train, 
                    left = 0, dist = "logistic")
  logis_mu <- predict(CRCHlogis, data_eval, type = "location")
  logis_sc <- predict(CRCHlogis, data_eval, type = "scale")
  
  CRCHstud <- crch(rain ~ ensmean | log(enssd), data = data_train, 
                   left = 0, dist = "student")
  stud_mu <- predict(CRCHstud, data_eval, type = "location")
  stud_sc <- predict(CRCHstud, data_eval, type = "scale")
  stud_df <- CRCHstud$df
  
  # get raw ensemble forecast
  ens_fc <- data_eval[, grep('^rainfc', names(RainIbk))] 
  obs <- data_eval$rain
}

## ----Computing-scores-Gauss-----------------------------------------
if (use_crch){
  gauss_crps <- crps_cnorm(y = obs, location = gauss_mu, scale = gauss_sc, 
                           lower = 0, upper = Inf)
}

## ----Computing-scores-logis-stud, echo=FALSE------------------------
if (use_crch){
  ens_crps <- crps_sample(obs, dat = as.matrix(ens_fc))
  logis_crps <- crps_clogis(obs, location = logis_mu, scale = logis_sc, 
                            lower = 0, upper = Inf)
  stud_crps <- crps_ct(obs, df = stud_df, location = stud_mu, scale = stud_sc, 
                       lower = 0, upper = Inf)
}

## ----Calculating-average-scores-------------------------------------
if (use_crch){
  scores <- data.frame(Logistic = logis_crps, Gaussian = gauss_crps,
                       Students_t = stud_crps, Ensemble = ens_crps)
  sapply(scores, mean)
}

## ----Post-processing-sample-----------------------------------------
if (use_crch){
  ens_size <- 1000
  n <- length(obs)
  gauss_sample <- replicate(ens_size, rnorm(n, gauss_mu, gauss_sc))
  gauss_sample[gauss_sample < 0] <- 0
}

## ----Post-processing-twcrps-Gauss-----------------------------------
if (use_crch){
  t <- sqrt(30)
  gauss_twcrps <- twcrps_sample(y = obs, dat = gauss_sample, a = t)
}

## ----Post-processing-twcrps-logis-stud, echo=FALSE------------------
if (use_crch){
  ens_twcrps <- twcrps_sample(obs, dat = as.matrix(ens_fc), a = t)
  logis_sample <- replicate(ens_size, rlogis(n, logis_mu, logis_sc))
  logis_sample[logis_sample < 0] <- 0
  logis_twcrps <- twcrps_sample(obs, dat = logis_sample, a = t)
  stud_sample <- replicate(ens_size, rt(n, df = stud_df)*stud_sc + stud_mu)
  stud_sample[stud_sample < 0] <- 0
  stud_twcrps <- twcrps_sample(obs, dat = stud_sample, a = t)
}

## ----Calculating-average-weighted-scores----------------------------
if (use_crch){
  scores <- data.frame(Logistic = logis_twcrps, Gaussian = gauss_twcrps,
                       Students_t = stud_twcrps, Ensemble = ens_twcrps)
  sapply(scores, mean)
}

## ----Post-processing-twcrps-custom-Gauss----------------------------
if (use_crch){
  sigma <- 1
  weight_func <- get_weight_func("norm_cdf", mu = t, sigma = sigma)
  chain_func <- get_weight_func("norm_cdf", mu = t, sigma = sigma,
                                weight = FALSE)
  gauss_twcrps <- twcrps_sample(obs, gauss_sample, chain_func = chain_func)
}

## ----Post-processing-twcrps-custom-logis-stud, echo=FALSE-----------
if (use_crch){
  ens_twcrps <- twcrps_sample(obs, dat = as.matrix(ens_fc), chain_func = chain_func)
  logis_twcrps <- twcrps_sample(obs, dat = logis_sample, chain_func = chain_func)
  stud_twcrps <- twcrps_sample(obs, dat = stud_sample, chain_func = chain_func)
}

## ----Calculating-average-weighted-scores-custom---------------------
if (use_crch){
  scores <- data.frame(CRCHlogis = logis_twcrps, CRCHgauss = gauss_twcrps,
                       CRCHstud = stud_twcrps, Ensemble = ens_twcrps)
  sapply(scores, mean)
}

## ----Data-MCMC-example, echo=FALSE----------------------------------
data("gdp", package = "scoringRules")
data_train <- subset(gdp, vint == "2014Q1")
data_eval <- subset(gdp, vint == "2015Q1" & grepl("2014", dt))

h <- 4
m <- 5000 # use modest number of MCMC iterations for computational simplicity
fc_params <- ar_ms(data_train$val, forecast_periods = h, n_rep = m)

mu <- t(fc_params$fcMeans)
Sd <- t(fc_params$fcSds)

X <- matrix(rnorm(h * m, mean = mu, sd = Sd), nrow = h, ncol = m)

obs <- data_eval$val
names(obs) <- data_eval$dt

## ----Univ-scores-MCMC-example---------------------------------------
scores_crps <- crps_sample(obs, X)
scores_logs <- logs_sample(obs, X)
print(cbind(scores_crps, scores_logs))

## ----Weighted-univ-scores-MCMC-example------------------------------
t <- 0
scores_owcrps <- owcrps_sample(obs, X, b = t)
scores_twcrps <- twcrps_sample(obs, X, b = t)
scores_cols <- clogs_sample(obs, X, b = t, cens = FALSE)
scores_cels <- clogs_sample(obs, X, b = t)
print(cbind(scores_owcrps, scores_twcrps, scores_cols, scores_cels))

## ----Custom-weight-MCMC-example-------------------------------------
a <- 0
b <- 9 
weight_func <- function(x) as.numeric((x < a) | (x > b))
chain_func <- function(x) (x < a)*(x - a) + (x > b)*(x - b) + a

## ----Custom-weighted-univ-scores-MCMC-example-----------------------
scores_owcrps <- owcrps_sample(obs, X, weight_func = weight_func)
scores_twcrps <- twcrps_sample(obs, X, chain_func = chain_func)
print(cbind(scores_owcrps, scores_twcrps))

## ----Weighted-multiv-scores-MCMC-example----------------------------
d <- 2
scores_twes <- twes_sample(obs[1:d], X[1:d, ], b = 0)
scores_twvs <- twvs_sample(obs[1:d], X[1:d, ], b = 0)
scores_twmmds <- twmmds_sample(obs[1:d], X[1:d, ], b = 0)
print(cbind(scores_twes, scores_twvs, scores_twmmds))