.tutorial <- FALSE .solutions <- FALSE begin_sol <- function(sol = .solutions) {if (!sol) {"<!--\n"} else {"\n<hr />**Solution:**\n"}} end_sol <- function(sol = .solutions) {if (!sol) {"-->\n"} else {"\n<hr />\n"}} library(StatCompLab) if (.tutorial) { library(learnr) require(gradethis) learnr::tutorial_options( exercise.timelimit = 120, exercise.checker = grade_learnr, exercise.startover = TRUE, exercise.diagnostics = TRUE, exercise.completion = TRUE # exercise.error.check.code = exercise.error.check.code. ) } library(ggplot2) knitr::opts_chunk$set( echo = FALSE, # dev = "png", # dev.args = list(type = "cairo-png"), fig.width = 6 ) suppressPackageStartupMessages(library(tidyverse)) theme_set(theme_bw()) set.seed(1234L)
In this lab session you will explore
Pareto smoothed importance sampling
Open one of your github lab repository clone projects
.R
file, but working with code
chunks in a .Rmd
is recommended.StatCompLab
package to version 21.9.0 or
higher.r begin_sol(!.solutions)
The accompanying Tutorial09Solutions
tutorial/vignette documents contain the solutions explicitly, to make it
easier to review the material after the workshops.
r end_sol(!.solutions)
Revisit the Tutorial 6 and load the 3D printer filament data:
data("filament1", package = "StatCompLab")
In tutorial 6, the two models A and B were estimated with
fit_A <- filament1_estimate(filament1, "A") fit_B <- filament1_estimate(filament1, "B")
Predictions and scores were done with
pred_A <- filament1_predict(fit_A, newdata = filament1) pred_B <- filament1_predict(fit_B, newdata = filament1) score_A <- cbind(pred_A, filament1) %>% mutate( se = proper_score("se", Actual_Weight, mean = mean), ds = proper_score("ds", Actual_Weight, mean = mean, sd = sd), interval = proper_score("interval", Actual_Weight, lwr = lwr, upr = upr, alpha = 0.1) ) score_B <- cbind(pred_B, filament1) %>% mutate( se = proper_score("se", Actual_Weight, mean = mean), ds = proper_score("ds", Actual_Weight, mean = mean, sd = sd), interval = proper_score("interval", Actual_Weight, lwr = lwr, upr = upr, alpha = 0.1) )
as well as with a 50% estimation/prediction data split.
Implement a function leave1out(data, model)
that performs
leave-one-out cross validation for the selected model
.
the two models; for each $i=1,\dots,N$, estimate the model parameters using
${(x_j,y_j), j\neq i}$, compute the prediction information based on $x_i$
for prediction model $F_i$, and compute the required scores $S(F_i,y_i)$.
The output should be a data.frame
with $N$ rows
that extends the original data frame with
four additional columns mean
, sd
, se
and ds
of leave-one-out prediction
means, standard deviations, and prediction scores
for the Squared Error and Dawid-Sebastiani scores.
leave1out <- function(data, model) { ??? }
leave1out <- function(data, model) { data <- data %>% mutate(mean = NA_real_, sd = NA_real_) for (i in seq_len(nrow(data))) { fit <- ??? pred <- ??? data[i, "mean"] <- ??? data[i, "sd"] <- ??? } data <- data %>% mutate( se = ??? ds = ??? ) data }
leave1out <- function(data, model) { data <- data %>% mutate(mean = NA_real_, sd = NA_real_) for (i in seq_len(nrow(data))) { fit <- filament1_estimate(???) pred <- filament1_predict(???) data[i, "mean"] <- pred$mean data[i, "sd"] <- pred$sd } data <- data %>% mutate( se = ??? ds = ??? ) data }
leave1out <- function(data, model) { data <- data %>% mutate(mean = NA_real_, sd = NA_real_) for (i in seq_len(nrow(data))) { fit <- filament1_estimate(data[-i, , drop = FALSE], model) pred <- filament1_predict(fit, newdata = data[i, , drop = FALSE]) data[i, "mean"] <- pred$mean data[i, "sd"] <- pred$sd } data <- data %>% mutate( se = proper_score("se", Actual_Weight, mean = mean), ds = proper_score("ds", Actual_Weight, mean = mean, sd = sd) ) data }
r begin_sol()
r end_sol()
The following code should work if your code is correct:
score_A <- leave1out(filament1, model = "A") score_B <- leave1out(filament1, model = "B") ggplot() + geom_point(aes(CAD_Weight, se, colour = "A"), data = score_A) + geom_point(aes(CAD_Weight, se, colour = "B"), data = score_B) ggplot() + geom_point(aes(CAD_Weight, ds, colour = "A"), data = score_A) + geom_point(aes(CAD_Weight, ds, colour = "B"), data = score_B)
We want to investigate whether one model is better at predicting than the other. If they are equivalent, it should not matter, on average, if we randomly swap the A and B prediction scores within each leave-one-out score pair $(S^A_i, S^B_i)$. Use the test statistic $\frac{1}{N}\sum_{i=1}^N (S^A_i - S^B_i)$, and make a Monte Carlo estimate of the p-value for a test of exchangeability between model predictions from A and from B against the alternative hypothesis that B is better than A. First compute the test statistic for the two prediction scores.
Hints: For this particular test statistic, one possible approach is to first compute the pairwise score differences and then generate randomisation samples by sampling random sign changes. Compute the test statistic for each randomly altered set of values and compare with the original test statistic. See the lecture on exchangeability for more information. Start with $J=1000$ iterations when testing your code. Increase to 10000 for more precise results.
score_diff <- data.frame(se = ???, ds = ???) statistic0 <- ??? J <- 10000 statistic <- data.frame(se = ???, ds = ???) for (loop in seq_len(J)) { random_sign <- sample(c(-1, 1), ???) statistic[loop, ] <- ??? } p_values <- statistic %>% summarise(se = mean(???), ds = mean(???)) # Estimates: p_values # Monte Carlo std.error:: sqrt(p_values * (1 - p_values) / J)
score_diff <- data.frame(se = score_A$se - score_B$se, ds = score_A$ds - score_B$ds) statistic0 <- score_diff %>% summarise(se = mean(se), ds = mean(ds)) J <- 10000 statistic <- data.frame(se = numeric(J), ds = numeric(J)) for (loop in seq_len(J)) { random_sign <- sample(c(-1, 1), size = nrow(score_diff), replace = TRUE) statistic[loop, ] <- ??? } p_values <- statistic %>% summarise(se = mean(???), ds = mean(???)) # Estimates: p_values # Monte Carlo std.error:: sqrt(p_values * (1 - p_values) / J)
score_diff <- data.frame(se = score_A$se - score_B$se, ds = score_A$ds - score_B$ds) statistic0 <- score_diff %>% summarise(se = mean(se), ds = mean(ds)) J <- 10000 statistic <- data.frame(se = numeric(J), ds = numeric(J)) for (loop in seq_len(J)) { random_sign <- sample(c(-1, 1), size = nrow(score_diff), replace = TRUE) statistic[loop, ] <- score_diff %>% summarise(se = mean(random_sign * se), ds = mean(random_sign * ds)) } p_values <- statistic %>% summarise(se = mean(se > statistic0$se), ds = mean(ds > statistic0$ds)) # Estimates: p_values # Monte Carlo std.error:: sqrt(p_values * (1 - p_values) / J)
r begin_sol()
We see that for the Square Error scores, the two model predictions appear
exchangeable, but when prediction uncertainty is taken into account in the Dawid-Sebastiani
score, B appears to be better than A, on average.
r end_sol()
Explore the Pareto smoothed importance sampling (PSIS) method. Use a t-distribution with 10 degrees of freedom, and importance sampling distribution $x\sim \pN(0, \sigma^2)$ for different values of $\sigma$. Use $N=100$ or more samples.
Use the loo::psis()
function
to compute Pareto smoothed importance log-weights and diagnostics. What effect does
different values of $\sigma$ have on the $k$ parameter estimate? (the $\hat{k}$ value is in the $diagnostics$pareto_k
field of the loo::psis()
output.)
Plot the original and smoothed log-weights
as a function of $x$, and with stat_ecdf
, for different values of $\sigma$, including $\sigma=1$ and $\sigma=2$.
sigma <- 1 x <- ??? log_ratio <- dt(???) - dnorm(???) result <- loo::psis(???)
sigma <- 1 x <- rnorm(100, sd = sigma) log_ratio <- dt(x, df = ???, log = TRUE) - dnorm(???) result <- loo::psis(???, r_eff = 1) result$??? df <- data.frame(x = ???, log_ratio = ???, log_weight = ???) ggplot(df) + geom_point(aes(???, color = "Original")) + ??? ggplot(df) + stat_ecdf(aes(???, color = "Original")) + ???
sigma <- 1 x <- rnorm(100, sd = sigma) log_ratio <- dt(x, df = 10, log = TRUE) - dnorm(x, sd = sigma, log = TRUE) result <- loo::psis(log_ratio, r_eff = 1) result$diagnostics$pareto_k result$diagnostics$n_eff df <- data.frame(x = x, log_ratio = log_ratio, log_weight = result$log_weights) ggplot(df) + geom_point(aes(x, log_ratio, color = "Original")) + geom_point(aes(x, log_weight, color = "Smoothed")) ggplot(df) + stat_ecdf(aes(log_ratio, color = "Original")) + stat_ecdf(aes(log_weight, color = "Smoothed"))
r begin_sol()
r end_sol()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.