Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----installation, eval=FALSE-------------------------------------------------
# if (!requireNamespace("devtools", quietly = TRUE)) {
# install.packages("devtools")
# }
# devtools::install_github("Chukyhenry/PosiR")
## ----eval=FALSE---------------------------------------------------------------
# install.packages("pbapply")
# install.packages("glmnet")
## -----------------------------------------------------------------------------
if (!requireNamespace("dplyr", quietly = TRUE)) {
stop("Please install the 'dplyr' package to run this vignette.")
}
library(PosiR)
library(dplyr)
library(stats)
## ----message=FALSE------------------------------------------------------------
set.seed(123) # for reproducibility
X <- matrix(rnorm(200 * 2), ncol = 2, dimnames = list(NULL, c("Age", "Income")))
y <- 0.5 * X[, "Age"] + rnorm(200) # the true intercept is 0, the true coefficient for Age is 0.5, and the true coefficient for Income is 0.
## -----------------------------------------------------------------------------
Q <- list(
Age_only = 1, # Model with Intercept + Age
Age_Income = c(1, 2) # Model with Intercept + Age + Income
)
# Indices refer to columns of X: 1="Age", 2="Income"
## ----warning=FALSE, message=FALSE---------------------------------------------
result <- simultaneous_ci(X, y, Q, B = 1000, cores = 2, verbose = FALSE)
## -----------------------------------------------------------------------------
# Show the calculated critical value
print(paste("Calculated K_alpha:", round(result$K_alpha, 3)))
# Display the intervals data frame
print(result$intervals)
## -----------------------------------------------------------------------------
plot(result, las.labels = 2, col.ci = "darkblue", main = "Simultaneous Confidence Intervals")
## -----------------------------------------------------------------------------
data(mtcars)
X_mtcars <- as.matrix(mtcars[, c("hp", "wt", "qsec")])
y_mtcars <- mtcars$mpg
## -----------------------------------------------------------------------------
Q_mtcars <- list(
HP_only = 1, # Model: mpg ~ Intercept + hp
WT_only = 2, # Model: mpg ~ Intercept + wt
QSEC_only = 3, # Model: mpg ~ Intercept + qsec
Full_Model = 1:3 # Model: mpg ~ Intercept + hp + wt + qsec
)
# Indices: 1="hp", 2="wt", 3="qsec"
## -----------------------------------------------------------------------------
result_mtcars <- simultaneous_ci(X = X_mtcars, y = y_mtcars, Q_universe = Q_mtcars,
B = 2000, cores = 2, seed = 123, verbose = FALSE)
# Display the critical value
print(paste("Calculated K_alpha:", round(result_mtcars$K_alpha, 3)))
## ----message=FALSE------------------------------------------------------------
z_alpha_2 <- stats::qnorm(0.975)
intervals_comparison <- result_mtcars$intervals %>%
mutate(
naive_lower = estimate - z_alpha_2 * se_nqj,
naive_upper = estimate + z_alpha_2 * se_nqj
) %>%
select(model_id, coefficient_name, estimate, lower, upper, naive_lower, naive_upper) %>%
arrange(model_id, coefficient_name) # Ensure consistent ordering
print(intervals_comparison, digits = 3)
## -----------------------------------------------------------------------------
plot(result_mtcars, las.labels = 2, main = "mtcars Data: Simultaneous CIs")
## -----------------------------------------------------------------------------
set.seed(123)
n_lasso <- 100
p_lasso <- 50
X_lasso <- matrix(rnorm(n_lasso * p_lasso), n_lasso, p_lasso,
dimnames = list(NULL, paste0("X", 1:p_lasso)))
# True coefficients: 1 for first 5, 0 for rest
beta_true <- c(rep(1.0, 5), rep(0, p_lasso - 5))
true_intercept <- 0.5
# Generate response (linear model)
y_lasso <- drop(true_intercept + X_lasso %*% beta_true + rnorm(n_lasso))
## -----------------------------------------------------------------------------
# This chunk only runs if glmnet is installed
if (requireNamespace("glmnet", quietly = TRUE)) {
library(glmnet)
# Code using glmnet
} else {
message("glmnet is not available. Skipping this example.")
}
library(glmnet)
# Use cv.glmnet to find optimal lambda
cv_lasso_fit <- cv.glmnet(X_lasso, y_lasso, alpha = 1) # alpha=1 for Lasso
# Get coefficients at lambda.min, excluding the intercept
lasso_coeffs <- coef(cv_lasso_fit, s = "lambda.min")[-1, 1]
# Identify indices of selected variables (non-zero coefficients)
# Indices are relative to the columns of X_lasso (1 to p_lasso)
select_var_index <- which(lasso_coeffs != 0)
# Get names of selected variables
select_var_names <- names(select_var_index)
cat("Lasso selected variables (indices in X_lasso):", paste(select_var_index, collapse = ", "), "\n")
cat("Lasso selected variables (names):", paste(select_var_names, collapse = ", "), "\n")
# If no variables selected (unlikely here), handle gracefully for Q definition
if (length(select_var_index) == 0) {
warning("Lasso selected no variables. Defining Q with intercept-only and full model.")
select_var_index1 <- 1 # Just intercept index for design matrix
} else {
# IMPORTANT: Need indices relative to the design matrix used in simultaneous_ci
# which includes intercept as column 1. Add 1 to Lasso indices.
select_var_index1 <- c(1, select_var_index + 1)
}
## -----------------------------------------------------------------------------
# Fallback if glmnet is not installed
if (!requireNamespace("glmnet", quietly = TRUE)) {
warning("glmnet package not found. Using arbitrary selection for vignette.", call. = FALSE)
select_var_index <- c(1, 2, 3, 4, 5, 9, 13, 18, 22, 24, 26, 29, 33, 36, 41)
select_var_names <- paste0("X", select_var_index)
select_var_index1 <- c(1, select_var_index + 1)
}
## -----------------------------------------------------------------------------
# Indices for X in simultaneous_ci context (assuming add_intercept=TRUE)
# Intercept is 1, X1 is 2, ..., Xp is p+1
full_model_indices <- 1:(p_lasso + 1)
Q_lasso <- list(
# Model selected by Lasso (Intercept + selected X's)
LassoSelected = select_var_index1,
# Full model (Intercept + All X's) - optional, but common for context
FullModel = full_model_indices
)
# Check if Lasso selection was empty
if (length(select_var_index1) == 1 && select_var_index1 == 1) {
Q_lasso$LassoSelected <- 1 # Ensure it's just the intercept index
}
print("Models in Q_lasso (indices refer to design matrix with intercept):")
# Print only first few indices for brevity if models are large
print(lapply(Q_lasso, function(idx) if(length(idx)>10) c(idx[1:5], "...", idx[length(idx)]) else idx))
## -----------------------------------------------------------------------------
# Note: We pass the original X matrix (without intercept) to simultaneous_ci
# It will add the intercept based on add_intercept=TRUE
result_lasso <- simultaneous_ci(
X_lasso, y_lasso, Q_lasso,
B = 2000, # Recommend B=2000+ for high-dim
cores = 2, seed = 123, verbose = FALSE
)
## -----------------------------------------------------------------------------
intervals_lasso_comp <- result_lasso$intervals %>%
filter(model_id == "LassoSelected") %>% # Focus on the Lasso model
mutate(
naive_lower = estimate - qnorm(0.975) * se_nqj,
naive_upper = estimate + qnorm(0.975) * se_nqj
) %>%
select(coefficient_name, estimate, lower, upper, naive_lower, naive_upper) %>%
arrange(match(coefficient_name, c("(Intercept)", paste0("X", 1:p_lasso)))) # Order numerically
print(intervals_lasso_comp, digits = 3)
## -----------------------------------------------------------------------------
# Plot only coefficients from the Lasso-selected model
selected_plot <- result_lasso$intervals %>%
filter(model_id == "LassoSelected") %>%
pull(coefficient_name)
plot(result_lasso, subset_pars = selected_plot,
main = "Simultaneous CIs for Lasso-Selected Model", las.labels = 1)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.