In khellton/influridge: Visualize Influence of Observations in Ridge Regression

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

influridge: Influence of single observations on the choice of the penalty parameter in ridge regression

This vignette describes how to use the R package influridge to identify influential observations on the choice of the penalty parameter in ridge regression. See the paper Hellton, Lingjærde and De Bin (2024) for details.

Installation

You can install the R package influridge directly from GitHub, after installing the package devtools. The help description files can be accessed by using ?

#Install and load the devtools package
install.packages("devtools")
library(devtools)

# Install the zibppca package directly from GitHub
install_github("khellton/influridge")

#Load the zibppca package
library(influridge)

#Check the help file
?influridge

influridge <- function(X, y, nw = 40, max.weight = 4, noExpand = 0, noShrink = 0, degreeFreedom = FALSE, control.list = list(factr = 1e-4)) {
  if (noShrink > 5) {
    print("Number of highlighted shrinkers must be 5 or less")
  }
  if (noExpand > 5) {
    print("Number of highlighted expanders must be 5 or less")
  }

  n <- dim(X)[1]
  weights <- seq(0, max.weight, length.out = nw) # Weight grid
  lambdaMatrix <- matrix(, n, nw)

  startLambda <- stats::optim(
    par = 1, tuning_cv_svd, w = rep(1, n), svd.int = svd(X), y.int = y,
    lower = -Inf, upper = Inf,
    method = "L-BFGS-B", control = control.list
  )$par

  for (i in 1:nw) {
    for (j in 1:n) {
      w <- rep(1, n)
      w[j] <- weights[i] # Weight to give observation j

      # find optimal lambda
      lambdaMatrix[j, i] <- stats::optim(
        par = startLambda, tuning_cv_svd, w = w / sum(w), svd.int = svd(X), y.int = y,
        lower = 0, upper = Inf,
        method = "L-BFGS-B", control = control.list
      )$par
    }
  }

  # Set
  lwt <- rep(1, n)
  col <- rep("grey", n)
  plotIndex <- rep("", n)

  sortIndexEnd <- sort(lambdaMatrix[, dim(lambdaMatrix)[2]],
    decreasing = FALSE, index.return = TRUE
  )$ix

  if (noExpand > 0) {
    lwt[sortIndexEnd[1:(noExpand)]] <- 3
    col[sortIndexEnd[1:(noExpand)]] <- "blue"
    plotIndex[sortIndexEnd[1:(noExpand)]] <- as.character(sortIndexEnd[1:(noExpand)])
  }

  if (noShrink > 0) {
    lwt[sortIndexEnd[(n + 1 - noShrink):n]] <- 3
    col[sortIndexEnd[(n + 1 - noShrink):n]] <- "red"
    plotIndex[sortIndexEnd[(n + 1 - noShrink):n]] <- as.character(sortIndexEnd[(n + 1 - noShrink):n])
  }

  if (degreeFreedom == FALSE) {
    graphics::matplot(weights, t(lambdaMatrix),
      type = "l",
      ylim = c(min(lambdaMatrix), max(lambdaMatrix)),
      xlim = c(0, max(weights) + 0.1),
      xlab = "Weight of observation",
      ylab = "Tuning parameter",
      lty = 1,
      lwd = lwt,
      xaxs = "i", yaxs = "i",
      cex.lab = 1.7, mgp = c(2.8, 1, 0),
      cex.axis = 1.5, col = col
    )
    graphics::axis(4,
      at = lambdaMatrix[, dim(lambdaMatrix)[2]], labels = plotIndex,
      las = 2, gap.axis = -1, tick = FALSE, cex.axis = 1.5, hadj = 0.6
    )
    graphics::abline(v = 1, lty = 2, col = "gray")
  } else {
    svd.df <- svd(X)
    df <- apply(lambdaMatrix, c(1, 2), function(lam) sum(svd.df$d^2 / (svd.df$d^2 + lam)))
    graphics::matplot(weights, t(df),
      type = "l",
      ylim = c(min(df), max(df)),
      xlim = c(0, max(weights) + 0.1),
      xlab = "Weight of observation",
      ylab = "Degrees of freedom",
      lty = 1,
      lwd = lwt,
      xaxs = "i", yaxs = "i",
      cex.lab = 1.7, mgp = c(2.8, 1, 0),
      cex.axis = 1.5, col = col
    )
    graphics::axis(4,
      at = df[, dim(df)[2]], labels = plotIndex,
      las = 2, gap.axis = -1, tick = FALSE, cex.axis = 1.5, hadj = 0.6
    )
    graphics::abline(v = 1, lty = 2, col = "gray")
  }
}

tuning_cv_svd <- function(lambda, w, svd.int, y.int) {
  H <- svd.int$u %*% diag(svd.int$d^2 / (svd.int$d^2 + lambda)) %*% t(svd.int$u)
  e <- (diag(length(y.int)) - H) %*% y.int
  return(mean(w * (e / (1 - diag(H)))^2))
}

Graphical tool visualizing the tuning parameter

We demonstrate first the basic functionality of the package using simulated data. The produced figure shows the optimal tuning parameter when the weight of each single observation is changed.

p <- 5
n <- 20
sigma <- 1
beta <- rep(1, p)

## Simulating design matrix, X
set.seed(556)
X <- matrix(rnorm(n * p), n, p)

## Simulate outcome vector, Y
y <- X %*% beta + rnorm(n, 0, sigma)

## Plot curves (no highlighted shrinkers/expanders)
influridge(X, y)

Large shrinker

We adding a large positive residual to the 10th observation to create an influential shrinker. We therefore add to the function that one shrinker should be highlighted in red.

y[10] <- y[10] + 3
influridge(X, y, noShrink = 1, nw = 20)

Degrees of freedom scale

We can visualize the same shrinker on the scale of degrees of freedom, by changing the input to degreeFreedom to TRUE. The degrees of freedom will be inverse to the scale of tuning parameter. For details see for instance Section 5.2 Hellton, Lingjærde and De Bin (2024).

## Plot degrees of freedom
influridge(X, y, noShrink = 1, nw = 20, degreeFreedom = TRUE)

Bodyfat data set

We also illustrate our graphical procedure using the Bodyfat data set (https://lib.stat.cmu.edu/datasets/bodyfat) analyzed in detail Section 5.1 Hellton, Lingjærde and De Bin (2024).

require(mfp)
data(bodyfat)
X <- bodyfat[, 6:17] # Omit non-continous age variable
y <- bodyfat$siri
n <- dim(X)[1]

X <- scale(X, center = FALSE) # Scale data
X <- cbind(rep(1, n), X) # Add intercept to design matrix

influridge(X, y, noShrink = 1, noExpand = 1, degreeFreedom = TRUE, nw = 30)