introduction-to-rwa.R
In rwa: Perform a Relative Weights Analysis

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/README-",
  out.width = "100%",
  error = FALSE,
  warning = FALSE,
  message = FALSE
)

## ----setup--------------------------------------------------------------------
library(rwa)
library(dplyr)
library(ggplot2)

## ----theoretical-demonstration------------------------------------------------
# Create controlled scenario to demonstrate RWA's theoretical properties
set.seed(123)
n <- 200

# Generate predictors with known correlation structure
x1 <- rnorm(n)
x2 <- 0.7 * x1 + 0.3 * rnorm(n)  # r ≈ 0.7 with x1
x3 <- 0.5 * x1 + 0.8 * rnorm(n)  # r ≈ 0.5 with x1
x4 <- rnorm(n)                    # Independent

# True population model with known coefficients
y <- 0.6 * x1 + 0.4 * x2 + 0.3 * x3 + 0.2 * x4 + rnorm(n, sd = 0.5)

theory_data <- data.frame(y = y, x1 = x1, x2 = x2, x3 = x3, x4 = x4)

# Compare traditional regression vs RWA
lm_theory <- lm(y ~ x1 + x2 + x3 + x4, data = theory_data)
rwa_theory <- rwa(theory_data, "y", c("x1", "x2", "x3", "x4"))

# Show how multicollinearity affects traditional coefficients
cat("True population contributions (designed into simulation):\n")
true_contributions <- c(0.6, 0.4, 0.3, 0.2)
names(true_contributions) <- c("x1", "x2", "x3", "x4")
print(true_contributions)

cat("\nStandardized regression coefficients (distorted by multicollinearity):\n")
std_betas <- summary(lm_theory)$coefficients[2:5, "Estimate"]
names(std_betas) <- c("x1", "x2", "x3", "x4")
print(round(std_betas, 3))

cat("\nRWA weights (better reflect true importance despite correlations):\n")
rwa_weights_theory <- rwa_theory$result$Raw.RelWeight
names(rwa_weights_theory) <- rwa_theory$result$Predictors
print(round(rwa_weights_theory, 3))

# Calculate correlation between methods and true values
cor_with_true <- data.frame(
  Method = c("Std_Betas", "RWA_Weights"),
  Correlation_with_True = c(
    cor(abs(std_betas), true_contributions),
    cor(rwa_weights_theory[names(true_contributions)], true_contributions)
  )
)
print("Correlation with true population values:")
print(cor_with_true)

## ----basic-example------------------------------------------------------------
# Basic RWA
result_basic <- mtcars %>%
  rwa(outcome = "mpg",
      predictors = c("cyl", "disp", "hp", "gear"))

# View the results
result_basic$result

## ----output-explanation-------------------------------------------------------
# Predictor variables used
result_basic$predictors

# Model R-squared
result_basic$rsquare

# Number of complete observations
result_basic$n

# Correlation matrices (for advanced users)
str(result_basic$RXX)  # Predictor correlation matrix
str(result_basic$RXY)  # Predictor-outcome correlations

## ----interpret-results--------------------------------------------------------
# Results are sorted by default (most important first)
result_basic$result

# Raw weights sum to R-squared
sum(result_basic$result$Raw.RelWeight)
result_basic$rsquare

# Rescaled weights sum to 100%
sum(result_basic$result$Rescaled.RelWeight)

## ----sorting-example----------------------------------------------------------
# Default behavior: sorted by importance (descending)
result_sorted <- mtcars %>%
  rwa(outcome = "mpg", predictors = c("cyl", "disp", "hp", "gear"))

result_sorted$result

# Preserve original predictor order
result_unsorted <- mtcars %>%
  rwa(outcome = "mpg", predictors = c("cyl", "disp", "hp", "gear"), sort = FALSE)

result_unsorted$result

## ----signs-example------------------------------------------------------------
result_signs <- mtcars %>%
  rwa(outcome = "mpg",
      predictors = c("cyl", "disp", "hp", "gear"),
      applysigns = TRUE)

result_signs$result

## ----visualization, fig.width=8, fig.height=5---------------------------------
# Generate RWA results 
rwa_result <- mtcars %>%
  rwa(outcome = "mpg",
      predictors = c("cyl", "disp", "hp", "gear", "wt"))

# Create plot
rwa_result %>% plot_rwa()

# The rescaled relative weights
rwa_result$result

## ----eval=FALSE---------------------------------------------------------------
# vignette("bootstrap-confidence-intervals", package = "rwa")

## ----bootstrap-example--------------------------------------------------------
# Basic bootstrap analysis
bootstrap_result <- mtcars %>%
  rwa(outcome = "mpg",
      predictors = c("cyl", "disp", "hp"),
      bootstrap = TRUE,
      n_bootstrap = 500)  # Reduced for speed

# View significant predictors
bootstrap_result$result %>%
  filter(Raw.Significant == TRUE) %>%
  select(Variables, Rescaled.RelWeight, Raw.RelWeight.CI.Lower, Raw.RelWeight.CI.Upper)

## ----diamonds-example---------------------------------------------------------
# Analyze diamond price drivers
diamonds_subset <- diamonds %>%
  select(price, carat, depth, table, x, y, z) %>%
  sample_n(1000)  # Sample for faster computation

diamond_rwa <- diamonds_subset %>%
  rwa(outcome = "price",
      predictors = c("carat", "depth", "table", "x", "y", "z"),
      applysigns = TRUE)

diamond_rwa$result

## ----eval=FALSE---------------------------------------------------------------
# vignette("bootstrap-confidence-intervals", package = "rwa")

## ----regression-comparison----------------------------------------------------
# Traditional regression
lm_model <- lm(mpg ~ cyl + disp + hp + gear, data = mtcars)
lm_summary <- summary(lm_model)

# Display regression summary
print(lm_summary)

# RWA results
rwa_model <- mtcars %>%
  rwa(outcome = "mpg", predictors = c("cyl", "disp", "hp", "gear"))

# Compare importance rankings
comparison <- data.frame(
  Variable = rwa_model$predictors,
  RWA_Rescaled = rwa_model$result$Rescaled.RelWeight,
  RWA_Rank = rank(-rwa_model$result$Rescaled.RelWeight)
)

print(comparison)

## ----efficiency-demonstration-------------------------------------------------
# Demonstrate computational considerations
predictors <- c("cyl", "disp", "hp", "gear")
n_predictors <- length(predictors)

cat("Number of predictors:", n_predictors, "\n")
cat("Dominance analysis would require", 2^n_predictors, "subset models\n")
cat("RWA solves this in a single matrix operation\n")

# Show RWA speed (for demonstration)
start_time <- Sys.time()
rwa_speed_test <- mtcars %>% rwa(outcome = "mpg", predictors = predictors)
end_time <- Sys.time()
cat("RWA computation time:", round(as.numeric(end_time - start_time, units = "secs"), 4), "seconds\n")

## ----redundancy-demonstration-------------------------------------------------
# Demonstrate the redundancy limitation
set.seed(456)
x1_orig <- rnorm(100)
x1_dup <- x1_orig + rnorm(100, sd = 0.05)  # Nearly identical (r ≈ 0.99)
y_simple <- 0.8 * x1_orig + rnorm(100, sd = 0.5)

redundant_data <- data.frame(y = y_simple, x1_original = x1_orig, 
                           x1_duplicate = x1_dup)

cat("Correlation between 'different' predictors:", cor(x1_orig, x1_dup), "\n")

# RWA correctly splits redundant variance
redundant_rwa <- rwa(redundant_data, "y", c("x1_original", "x1_duplicate"))
print("RWA with redundant predictors:")
print(redundant_rwa$result)

cat("\nEach variable appears less important individually,")
cat("\nbut together they account for most variance.\n")
cat("Combined contribution:", 
    sum(redundant_rwa$result$Raw.RelWeight), "\n")

## ----sample-size--------------------------------------------------------------
# Check your sample size
n_obs <- mtcars %>% 
  select(mpg, cyl, disp, hp, gear) %>% 
  na.omit() %>% 
  nrow()

cat("Sample size:", n_obs)
cat("\nRule of thumb: At least 5-10 observations per predictor")

## ----eval=FALSE---------------------------------------------------------------
# vignette("bootstrap-confidence-intervals", package = "rwa")

## ----multicollinearity-check--------------------------------------------------
# Check correlation matrix
cor_matrix <- mtcars %>%
  select(cyl, disp, hp, gear) %>%
  cor()

# Look for high correlations (>0.9)
high_cor <- which(abs(cor_matrix) > 0.9 & cor_matrix != 1, arr.ind = TRUE)
if(nrow(high_cor) > 0) {
  cat("High correlations detected between variables")
}

## ----missing-data-------------------------------------------------------------
# Check for missing data patterns
missing_summary <- mtcars %>%
  select(mpg, cyl, disp, hp, gear) %>%
  summarise_all(~sum(is.na(.)))

print(missing_summary)

## ----eval=FALSE---------------------------------------------------------------
# vignette("bootstrap-confidence-intervals", package = "rwa")