Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
message = FALSE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(recalibratiNN)
## ----echo = F-----------------------------------------------------------------
library(glue)
library(RANN)
library(dplyr)
library(purrr)
library(tidyr)
library(ggplot2)
## -----------------------------------------------------------------------------
set.seed(42) # The Answer to the Ultimate Question of Life, The Universe, and Everything
n <- 10000
x <- cbind(x1 = runif(n, -3, 3),
x2 = runif(n, -5, 5))
mu_fun <- function(x) {
abs(x[,1]^3 - 50*sin(x[,2]) + 30)}
mu <- mu_fun(x)
y <- rnorm(n,
mean = mu,
sd=20*(abs(x[,2]/(x[,1]+ 10))))
split1 <- 0.6
split2 <- 0.8
x_train <- x[1:(split1*n),]
y_train <- y[1:(split1*n)]
x_cal <- x[(split1*n+1):(n*split2),]
y_cal <- y[(split1*n+1):(n*split2)]
x_test <- x[(split2*n+1):n,]
y_test <- y[(split2*n+1):n]
## ----eval=F-------------------------------------------------------------------
# model_nn <- keras_model_sequential()
#
# model_nn |>
# layer_dense(input_shape=2,
# units=800,
# use_bias=T,
# activation = "relu",
# kernel_initializer="random_normal",
# bias_initializer = "zeros") %>%
# layer_dropout(rate = 0.1) %>%
# layer_dense(units=800,
# use_bias=T,
# activation = "relu",
# kernel_initializer="random_normal",
# bias_initializer = "zeros") |>
# layer_dropout(rate = 0.1) |>
# layer_dense(units=800,
# use_bias=T,
# activation = "relu",
# kernel_initializer="random_normal",
# bias_initializer = "zeros") |>
# layer_batch_normalization() |>
# layer_dense(units = 1,
# activation = "linear",
# kernel_initializer = "zeros",
# bias_initializer = "zeros")
#
# model_nn |>
# compile(optimizer=optimizer_adam( ),
# loss = "mse")
#
# model_nn |>
# fit(x = x_train,
# y = y_train,
# validation_data = list(x_cal, y_cal),
# callbacks = callback_early_stopping(
# monitor = "val_loss",
# patience = 20,
# restore_best_weights = T),
# batch_size = 128,
# epochs = 1000)
#
#
# y_hat_cal <- predict(model_nn, x_cal)
# y_hat_test <- predict(model_nn, x_test)
## ----echo = F-----------------------------------------------------------------
# carregar os vetores .rds
file_path1 <- system.file("extdata", "mse_cal.rds", package = "recalibratiNN")
MSE_cal <- readRDS(file_path1)|> as.numeric()
file_path2 <- system.file("extdata", "y_hat_cal.rds", package = "recalibratiNN")
y_hat_cal <- readRDS(file_path2)|> as.numeric()
file_path3 <- system.file("extdata", "y_hat_test.rds", package = "recalibratiNN")
y_hat_test <- readRDS(file_path3)|> as.numeric()
## -----------------------------------------------------------------------------
## Global calibrations
pit <- PIT_global(ycal = y_cal,
yhat = y_hat_cal,
mse = MSE_cal)
gg_PIT_global(pit)
## -----------------------------------------------------------------------------
gg_CD_global(pit,
ycal = y_cal, # true response of calibration set
yhat = y_hat_cal, # predictions of calibration set
mse = MSE_cal) # mse from training on calibration set
## -----------------------------------------------------------------------------
pit_local <- PIT_local(xcal = x_cal,
ycal = y_cal,
yhat = y_hat_cal,
mse = MSE_cal
)
gg_PIT_local(pit_local)
## -----------------------------------------------------------------------------
gg_CD_local(pit_local, mse = MSE_cal)
## -----------------------------------------------------------------------------
coverage_model <- tibble(
x1cal = x_test[,1],
x2cal = x_test[,2],
y_real = y_test,
y_hat = y_hat_test) |>
mutate(lwr = qnorm(0.05, y_hat, sqrt(MSE_cal)),
upr = qnorm(0.95, y_hat, sqrt(MSE_cal)),
CI = ifelse(y_real <= upr & y_real >= lwr,
"in", "out" ),
coverage = round(mean(CI == "in")*100,1)
)
coverage_model |>
arrange(CI) |>
ggplot() +
geom_point(aes(x1cal,
x2cal,
color = CI),
alpha = 0.9,
size = 2)+
labs(x="x1" , y="x2",
title = glue("Original coverage: {coverage_model$coverage[1]} %"))+
scale_color_manual("Confidence Interval",
values = c("in" = "aquamarine3",
"out" = "steelblue4"))+
theme_classic()
## -----------------------------------------------------------------------------
recalibrated <-
recalibrate(
yhat_new = y_hat_test, # predictions of test set
space_cal = x_cal, # covariates of calibration set
pit_values = pit, # global pit values calculated earlier.
space_new = x_test, # covariates of test set
mse = MSE_cal, # MSE from calibration set
type = "local", # type of calibration
p_neighbours = 0.08) # proportion of calibration to use as nearest neighbors
y_hat_rec <- recalibrated$y_samples_calibrated_wt
## -----------------------------------------------------------------------------
n_clusters <- 6
n_neighbours <- length(y_hat_test)*0.08
# calculating centroids
cluster_means_cal <- kmeans(x_test, n_clusters)$centers
cluster_means_cal <- cluster_means_cal[order(cluster_means_cal[,1]),]
# finding neighbours
knn_cal <- nn2(x_test,
cluster_means_cal,
k = n_neighbours)$nn.idx
# geting corresponding ys (real and estimated)
y_real_local <- map(1:nrow(knn_cal), ~y_test[knn_cal[.,]])
y_hat_local <- map(1:nrow(knn_cal), ~y_hat_rec[knn_cal[.,],])
# calculate pit_local
pits <- matrix(NA,
nrow = 6,
ncol = n_neighbours)
for (i in 1:n_clusters) {
pits[i,] <- map_dbl(1:n_neighbours, ~{
mean(y_hat_local[[i]][.,] <= y_hat_local[[i]][.])
})
}
as.data.frame(t(pits)) |>
pivot_longer(everything()) |>
group_by(name) |>
mutate(p_value =ks.test(value,
"punif")$p.value,
name = gsub("V", "part_", name)) |>
ggplot()+
geom_density(aes(value,
color = name,
fill = name),
alpha = 0.5,
bounds = c(0, 1))+
geom_hline(yintercept = 1,
linetype="dashed")+
scale_color_brewer(palette = "Set2")+
scale_fill_brewer(palette = "Set2")+
theme_classic()+
geom_text(aes(x = 0.5,
y = 0.5,
label = glue("p-value: {round(p_value, 3)}")),
color = "black",
size = 3)+
theme(legend.position = "none")+
labs(title = "After Local Calibration",
subtitle = "It looks so much better!!",
x = "PIT-values",
y = "Density")+
facet_wrap(~name, scales = "free_y")
## -----------------------------------------------------------------------------
coverage_rec <- map_dfr( 1:nrow(x_test), ~ {
quantile(y_hat_rec[.,]
,c(0.05, 0.95))}) |>
mutate(
x1 = x_test[,1],
x2 = x_test[,2],
ytest = y_test,
CI = ifelse(ytest <= `95%`& ytest >= `5%`,
"in", "out"),
coverage = round(mean(CI == "in")*100,1)) |>
arrange(CI)
coverage_rec |>
ggplot() +
geom_point(aes(x1, x2, color = CI),
alpha = 0.9,
size = 2)+
labs(x="x1" , y="x2",
title = glue("Recalibrated coverage: {coverage_rec$coverage[1]} %"))+
scale_color_manual("Confidence Interval",
values = c("in" = "aquamarine3",
"out" = "steelblue4"))+
theme_classic()
## ----include = F--------------------------------------------------------------
data.frame(
real = mu_fun(x_test),
desc = y_hat_test,
recal = recalibrated$y_hat_calibrated
) |>
pivot_longer(-real) |>
arrange(name) |>
ggplot()+
geom_point(aes( x = value,
y = real,
color = name),
alpha = 0.7)+
scale_color_manual("", values = c( "#003366","#80b298"),
labels = c("Predicted", "Recalibrated"))+
geom_abline(color="red", linetype="dashed")+
labs(x="Estimated Mean", y="True Mean")+
theme_bw(base_size = 14) +
theme(axis.title.y=element_text(colour="black"),
axis.title.x = element_text(colour="black"),
axis.text = element_text(colour = "black"),
legend.position = c(0.8, 0.2),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
plot.margin = margin(0, 0, 0, 0.2, "cm"))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.