sim_code/section_4/reproducibility_example.R

# Repeatedly simulate lower and upper bounds from methods 2 and 3
# on single dataset.

library(R.utils)
library(data.table)
library(tidyverse)
library(progress)
library(ConformalTwoLayer)

# Data frame to store results
pred_ints <- data.table(sim = 1:1000,
                        method_2_lb = NA_real_,
                        method_2_ub = NA_real_,
                        method_3_lb = NA_real_,
                        method_3_ub = NA_real_)

# Generate a single data set
set.seed(20211221)

# Simulate data
k_val <- 100
n_val <- 100
alpha <- 0.1

# Simulate data
xy_data <- sup_generate_data(k = k_val, n = n_val, mu = 0, tau_sq = 1, sigma_sq = 1)
X_new <- 1

# Set up progress bar
pb <- progress_bar$new(format = paste0("sim :current / :total [:bar] :eta"),
                       total = nrow(pred_ints), clear = T, show_after = 0)

# Repeatedly simulate methods 2 and 3
for(i in 1:nrow(pred_ints)) {

  # Update progress bar
  pb$tick()

  ###### Construct method 2 prediction interval #####

  # Construct prediction interval from xy_data
  method2_pred_int <-
    sup_single_subsample(xy_data = xy_data,
                         model_formula = formula(Y ~ X1 - 1),
                         alpha = alpha, n_val = n_val, k_indices = 1:k_val,
                         grid_values = quantile(xy_data$Y,
                                                probs = seq(0, 1, by = 0.02)),
                         new_xy_data = data.frame(X1 = X_new))

  # Store method 2 results
  pred_ints[sim == i, method_2_lb := method2_pred_int$lower_bound]
  pred_ints[sim == i, method_2_ub := method2_pred_int$upper_bound]

  ###### Construct method 3 prediction interval #####

  # Construct prediction interval from xy_data
  n_resamp <- 100

  method3_pred_int <-
    sup_repeated_subsample(xy_data = xy_data,
                           model_formula = formula(Y ~ X1 - 1),
                           alpha = alpha, n_val = n_val, k_indices = 1:k_val,
                           n_resamp = n_resamp,
                           grid_values = quantile(xy_data$Y,
                                                  probs = seq(0, 1, by = 0.02)),
                           new_xy_data = data.frame(X1 = X_new))

  # Store method 3 results
  pred_ints[sim == i, method_3_lb := method3_pred_int$lower_bound]
  pred_ints[sim == i, method_3_ub := method3_pred_int$upper_bound]

}

# Min and max values for each bound
summary(pred_ints$method_2_lb)
summary(pred_ints$method_2_ub)

summary(pred_ints$method_3_lb)
summary(pred_ints$method_3_ub)

# Save results
fwrite(pred_ints,
       file = "sim_data/section_4/reproducibility_example.csv")
RobinMDunn/ConformalTwoLayer documentation built on March 22, 2022, 6:38 p.m.