knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-", out.width = "100%", error = FALSE, warning = FALSE, message = FALSE )
library(rwa) library(dplyr) library(ggplot2)
Bootstrap confidence intervals represent a major advancement in Relative Weights Analysis, addressing a long-standing methodological limitation. This vignette provides comprehensive guidance on using bootstrap methods with the rwa package for statistical significance testing of predictor importance.
As noted by Tonidandel et al. (2009):
"The difficulty in determining the statistical significance of relative weights stems from the fact that the exact (or small sample) sampling distribution of relative weights is unknown."
Traditional RWA provides point estimates of relative importance but lacks a framework for statistical inference. Bootstrap methods solve this by empirically estimating the sampling distribution of relative weights.
Bootstrap resampling:
1. Creates multiple samples from your original data
2. Calculates RWA for each bootstrap sample
3. Estimates confidence intervals from the distribution of bootstrap results
4. Enables significance testing by examining whether CIs include zero
# Bootstrap analysis with 1000 samples result_bootstrap <- mtcars %>% rwa(outcome = "mpg", predictors = c("cyl", "disp", "hp", "gear"), bootstrap = TRUE, n_bootstrap = 1000, conf_level = 0.95) # View results with confidence intervals result_bootstrap$result
The bootstrap analysis enhances the standard RWA output with:
# Bootstrap-specific information cat("Bootstrap samples used:", result_bootstrap$bootstrap$n_bootstrap, "\n") # Detailed CI information print(result_bootstrap$bootstrap$ci_results$raw_weights) # Identify significant predictors significant_vars <- result_bootstrap$result %>% filter(Raw.Significant == TRUE) %>% pull(Variables) cat("Significant predictors:", paste(significant_vars, collapse = ", "))
For detailed analysis including focal variable comparisons:
# Comprehensive bootstrap with focal variable comparison result_comprehensive <- mtcars %>% rwa(outcome = "mpg", predictors = c("cyl", "disp", "hp", "gear", "wt"), bootstrap = TRUE, comprehensive = TRUE, focal = "wt", # Compare other variables to weight n_bootstrap = 500) # Fewer samples for speed # Access all bootstrap results names(result_comprehensive$bootstrap$ci_results)
Key parameters for bootstrap analysis:
n_bootstrap: Number of bootstrap samples (default: 1000)conf_level: Confidence level (default: 0.95)focal: Focal variable for comparative analysiscomprehensive: Enable additional bootstrap tests# Example with different parameters custom_bootstrap <- mtcars %>% rwa(outcome = "mpg", predictors = c("cyl", "disp"), bootstrap = TRUE, n_bootstrap = 2000, # More samples for precision conf_level = 0.99) # 99% confidence intervals custom_bootstrap$result
Rescaled weight confidence intervals should be interpreted with caution due to compositional data constraints. They are not recommended for formal statistical inference.
# Rescaled CIs (use with caution) result_rescaled_ci <- mtcars %>% rwa(outcome = "mpg", predictors = c("cyl", "disp", "hp"), bootstrap = TRUE, include_rescaled_ci = TRUE, n_bootstrap = 500) # Note the warning message about interpretation result_rescaled_ci$result
Rescaled weights are compositional data (they sum to 100%), which creates dependencies between variables. This violates assumptions needed for independent confidence intervals.
Recommendation: Focus on raw weight confidence intervals for statistical inference.
# Analyze diamond price drivers diamonds_subset <- diamonds %>% select(price, carat, depth, table, x, y, z) %>% sample_n(1000) # Sample for faster computation diamond_rwa <- diamonds_subset %>% rwa(outcome = "price", predictors = c("carat", "depth", "table", "x", "y", "z"), bootstrap = TRUE, applysigns = TRUE, n_bootstrap = 500) print(diamond_rwa$result)
# Focus on significant predictors (results are already sorted by importance) significant_drivers <- diamond_rwa$result %>% filter(Raw.Significant == TRUE) %>% select(Variables, Rescaled.RelWeight, Sign.Rescaled.RelWeight) cat("Significant diamond price drivers (sorted by importance):\n") print(significant_drivers) cat("\nModel R-squared:", round(diamond_rwa$rsquare, 3))
# Check your sample size n_obs <- mtcars %>% select(mpg, cyl, disp, hp, gear) %>% na.omit() %>% nrow() cat("Sample size:", n_obs) cat("\nRecommended bootstrap samples:", min(2000, n_obs * 10)) # Rule of thumb: At least 1000 bootstrap samples, more for smaller datasets
# Examine CI characteristics ci_data <- result_bootstrap$bootstrap$ci_results$raw_weights print(head(ci_data)) # Assess precision ci_analysis <- ci_data %>% mutate( significant = ci_lower > 0 | ci_upper < 0, ci_width = ci_upper - ci_lower, precision = case_when( ci_width < 0.05 ~ "High precision", ci_width < 0.15 ~ "Medium precision", TRUE ~ "Low precision" ) ) print(ci_analysis)
The package automatically selects the best available bootstrap CI method:
# Check which methods were used ci_methods <- result_bootstrap$bootstrap$ci_results$raw_weights %>% count(ci_method) print(ci_methods)
# For large datasets or many predictors, consider: # 1. Reduce bootstrap samples for initial exploration quick_result <- mtcars %>% rwa(outcome = "mpg", predictors = c("cyl", "disp"), bootstrap = TRUE, n_bootstrap = 500) # Faster # 2. Use comprehensive analysis only when needed # comprehensive = TRUE adds computational overhead # 3. Consider parallel processing for very large analyses # (not currently implemented but could be future enhancement)
# Bootstrap objects can be large - access specific components str(result_bootstrap$bootstrap, max.level = 1) # For memory efficiency, extract only needed results ci_summary <- result_bootstrap$bootstrap$ci_results$raw_weights %>% select(variable, ci_lower, ci_upper, ci_method) print(ci_summary)
# 1. Check for perfect multicollinearity cor_check <- mtcars %>% select(cyl, disp, hp, gear) %>% cor() # Look for correlations = 1.0 (excluding diagonal) perfect_cor <- which(abs(cor_check) == 1 & cor_check != diag(diag(cor_check)), arr.ind = TRUE) if(length(perfect_cor) > 0) { cat("Perfect multicollinearity detected - remove redundant variables") } else { cat("No perfect multicollinearity detected") } # 2. Ensure adequate sample size min_sample_size <- 5 * length(c("cyl", "disp", "hp", "gear")) # 5 obs per predictor actual_sample_size <- nrow(na.omit(mtcars[c("mpg", "cyl", "disp", "hp", "gear")])) cat("\nMinimum recommended sample size:", min_sample_size) cat("\nActual sample size:", actual_sample_size)
When reporting bootstrap RWA results, include:
# Generate a summary report report_data <- result_bootstrap$result %>% filter(Raw.Significant == TRUE) %>% arrange(desc(Rescaled.RelWeight)) %>% select(Variables, Rescaled.RelWeight, Raw.RelWeight.CI.Lower, Raw.RelWeight.CI.Upper) cat("Relative Weights Analysis Results\n") cat("=================================\n") cat("Sample size:", result_bootstrap$n, "\n") cat("Bootstrap samples:", result_bootstrap$bootstrap$n_bootstrap, "\n") cat("Model R-squared:", round(result_bootstrap$rsquare, 3), "\n\n") cat("Significant Predictors:\n") print(report_data)
Bootstrap Methods in RWA:
General Bootstrap Theory:
Compositional Data Analysis:
Bootstrap confidence intervals provide a robust solution for statistical inference in Relative Weights Analysis. By following the guidelines in this vignette, researchers can:
The bootstrap functionality in the rwa package represents a significant advancement in making RWA a complete tool for both exploratory analysis and confirmatory research.
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.