Savvy Parity Regression Model Estimation with 'savvyPR

library(savvyPR)
library(ggplot2)

Introduction

This vignette demonstrates how to use the savvyPR package for parity regression model estimation and cross-validation. The package handles multicollinearity by applying risk parity constraints, supporting both Budget-based and Target-based parameterizations.

Installation

# Install the development version from GitHub
# devtools::install_github("Ziwei-ChenChen/savvyPR)
library(savvyPR)
library(MASS)
library(glmnet)

Parity Regression Model Estimation

The savvyPR function is used to fit a parity regression model. Here we demonstrate its usage with synthetic, highly correlated data.

library(MASS)
library(glmnet)

# Function to create a correlation matrix for X
create_corr_matrix <- function(rho, p) {
  corr_matrix <- diag(1, p)
  for (i in 2:p) {
    for (j in 1:(i-1)) {
      corr_matrix[i, j] <- rho^(abs(i - j))
      corr_matrix[j, i] <- corr_matrix[i, j] # symmetric matrix
    }
  }
  return(corr_matrix)
}

# Function to generate beta values with both positive and negative signs
generate_beta <- function(p) {
  half_p <- ceiling(p / 2)
  beta <- rep(c(1, -1), length.out = p) * rep(1:half_p, each = 2)[1:p]
  return(beta)
}

set.seed(123)
n <- 1500  
p <- 15  
rho <- -0.5  

corr_matrix <- create_corr_matrix(rho, p)
x <- mvrnorm(n = n, mu = rep(0, p), Sigma = corr_matrix)
beta <- generate_beta(p + 1)
sigma_vec <- abs(rnorm(n = n, mean = 15, sd = sqrt(1)))
y <- rnorm(n, mean = as.vector(cbind(1,x)%*%beta), sd = sigma_vec)

# 1. Run OLS estimation with intercept
result_ols <- lm(y ~ x)
coef_ols <- coef(result_ols)

# 2. Run Ridge Regression (RR) estimation
result_RR <- glmnet(x, y, alpha = 0, lambda = 1)
coef_RR <- coef(result_RR)

# 3. Run PR estimation (Budget Method)
result_pr_budget <- savvyPR(x, y, method = "budget", val = 0.05, intercept = TRUE)
print(result_pr_budget)
coef_pr_budget <- coef(result_pr_budget)

# 4. Run PR estimation (Target Method)
result_pr_target <- savvyPR(x, y, method = "target", val = 1, intercept = TRUE)
print(result_pr_target)
coef_pr_target <- coef(result_pr_target)

# Calculate the L2 distance to true beta
ols_L2 <- sqrt(sum((beta - coef_ols)^2))
print(paste("OLS L2:", ols_L2))

RR_L2 <- sqrt(sum((beta - coef_RR)^2))
print(paste("Ridge L2:", RR_L2))

pr_budget_L2 <- sqrt(sum((beta - coef_pr_budget)^2))
print(paste("PR Budget L2:", pr_budget_L2))

pr_target_L2 <- sqrt(sum((beta - coef_pr_target)^2))
print(paste("PR Target L2:", pr_target_L2))

You can use the summary function to get detailed statistics for your models:

summary(result_pr_budget)
summary(result_pr_target)

The package also provides built-in plotting functions to visualize the coefficients and the risk parity optimization distributions.

# Plot the estimated coefficients
plot(result_pr_budget, plot_type = "estimated_coefficients", label = TRUE)
plot(result_pr_target, plot_type = "estimated_coefficients", label = FALSE)

# Plot the risk contributions and weights/target variables
plot(result_pr_budget, plot_type = "risk_contributions", label = TRUE)
plot(result_pr_target, plot_type = "risk_contributions", label = FALSE)

Cross-Validation for Parity Regression Models

The cv.savvyPR function performs cross-validation to select optimal parameters. It handles both the "budget" sequence and the "target" sequence automatically.

# Cross-validation with Ridge
result_rr_cv <- cv.glmnet(x, y, alpha = 0, folds = 5)
fit_rr1 <- glmnet(x, y, alpha = 0, lambda = result_rr_cv$lambda.min)
coef_rr_cv <- coef(fit_rr1)[,1]

# Cross-validation with model type PR1 (Budget Method)
result_pr_cv1 <- cv.savvyPR(x, y, method = "budget", folds = 5, model_type = "PR1", measure_type = "mse")
coef_pr_cv1 <- coef(result_pr_cv1)

# Cross-validation with model type PR2 (Target Method)
result_pr_cv2 <- cv.savvyPR(x, y, method = "target", folds = 5, model_type = "PR2", measure_type = "mse")
coef_pr_cv2 <- coef(result_pr_cv2)

# Cross-validation with model type PR3 (Budget Method)
result_pr_cv3 <- cv.savvyPR(x, y, method = "budget", folds = 5, model_type = "PR3", measure_type = "mse")
coef_pr_cv3 <- coef(result_pr_cv3)

# Calculate the L2 distance 
print(paste("Ridge CV L2:", sqrt(sum((beta - coef_rr_cv)^2))))
print(paste("PR1 CV (Budget) L2:", sqrt(sum((beta - coef_pr_cv1)^2))))
print(paste("PR2 CV (Target) L2:", sqrt(sum((beta - coef_pr_cv2)^2))))
print(paste("PR3 CV (Budget) L2:", sqrt(sum((beta - coef_pr_cv3)^2))))

We can summarize the cross-validation results to see the optimal tuning values chosen by the algorithm.

summary(result_pr_cv1)
summary(result_pr_cv2)
summary(result_pr_cv3)

We can also visualize the cross-validated models:

# Plot coefficients and risk contributions for PR1
plot(result_pr_cv1, plot_type = "estimated_coefficients", label = TRUE)
plot(result_pr_cv1, plot_type = "risk_contributions",label = TRUE)

# Plot coefficients and risk contributions for PR2
plot(result_pr_cv2, plot_type = "estimated_coefficients", label = FALSE)
# Cannot plot risk-contribution for PR2 since the tuning parameter val=0 is fixed.
#plot(result_pr_cv2, plot_type = "risk_contributions", label = FALSE)

We can visualize the cross-validation error curves to see exactly where the optimal minimum was found.

# Plot the cross-validation errors for each model
plot(result_rr_cv)
plot(result_pr_cv1, plot_type = "cv_errors", label = TRUE)
plot(result_pr_cv2, plot_type = "cv_errors")
plot(result_pr_cv3, plot_type = "cv_errors", label = FALSE)

Finally, we can plot the Coefficient Paths to see how the coefficients shrink or change as the tuning parameter varies. Notice that we use xvar = "val" to plot against the unified tuning parameter.

# Plot the coefficient paths for cross-validation models
plot(result_pr_cv1, plot_type = "cv_coefficients", xvar = "val", max_vars_per_plot = 10, label = TRUE)

# Show what happens when max_vars_per_plot exceeds the limit (will trigger a warning and reset to 10)
plot(result_pr_cv2, plot_type = "cv_coefficients", xvar = "norm", max_vars_per_plot = 12, label = FALSE)

# PR3 uses dual-optimization, so we can plot against lambda as well
plot(result_pr_cv3, plot_type = "cv_coefficients", xvar = "norm", max_vars_per_plot = 10, label = TRUE)
plot(result_pr_cv3, plot_type = "cv_coefficients", xvar = "lambda", max_vars_per_plot = 10, label = TRUE)
plot(result_pr_cv3, plot_type = "cv_coefficients", xvar = "dev", max_vars_per_plot = 10, label = TRUE)

Conclusion

This vignette has provided an overview of the main functionalities of the savvyPR package, covering both the Budget-based and Target-based risk parity constraints. For more details, refer to the function documentation.



Try the savvyPR package in your browser

Any scripts or data that you put into this service are public.

savvyPR documentation built on April 7, 2026, 5:08 p.m.