decorrelate.R
In decorrelate: Decorrelation Projection Scalable to High Dimensional Data

## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  echo = TRUE,
  warning = FALSE,
  message = FALSE,
  error = FALSE,
  tidy = TRUE,
  eval = TRUE,
  dev = c("png", "pdf"),
  cache = TRUE
)

## ----run.examples, fig.height=2.5, fig.width=9, fig.cap ="**Figure 1: Intuition for data whitening transformation**. Given the singular value decomposition of $Y/\\sqrt{n}$ after centering columns is $UDV^T$, the steps in the transformation are: **A)** Original data, **B)** Data rotated along principal components, **C)** Data rotated and scaled, **D)** Data rotated, scaled and rotated back to original axes. Green arrows indicate principal axes and lengths indicate eigen-values.", echo=FALSE----
library(decorrelate)
library(mvtnorm)
library(ggplot2)
library(cowplot)
library(colorRamps)
library(latex2exp)
set.seed(2)

Sigma <- matrix(c(1, .9, .9, 2), ncol = 2)

Y <- rmvnorm(200, c(0, 0), Sigma)

dcmp <- eigen(cov(Y))
U <- whitening:::makePosDiag(dcmp$vectors)
lambda <- dcmp$values

lim <- range(Y)

value <- (Y %*% U)[, 1]

plot_data <- function(Y, rotation, rank, lim) {
  dcmp <- eigen(cov(Y))
  U <- whitening:::makePosDiag(dcmp$vectors)
  lambda <- dcmp$values

  if (rotation == 1) {
    X <- Y

    H <- with(eigen(cov(Y)), vectors %*% diag(values))
  } else if (rotation == 2) {
    X <- Y %*% U

    H <- with(eigen(cov(X)), vectors %*% diag(values))
  } else if (rotation == 3) {
    X <- Y %*% U %*% diag(1 / sqrt(lambda))

    H <- matrix(c(1, 0, 0, 1), ncol = 2)
  } else if (rotation == 4) {
    X <- Y %*% U %*% diag(1 / sqrt(lambda)) %*% t(U)

    H <- with(eigen(cov(Y)), vectors)
  }

  df <- data.frame(X, rank = scale(rank))

  H <- whitening:::makePosDiag(H)

  ggplot(df, aes(X1, X2, color = rank)) +
    geom_segment(x = 0, y = -lim[2], xend = 0, yend = lim[2], color = "black") +
    geom_segment(x = -lim[2], y = 0, xend = lim[2], yend = 0, color = "black") +
    geom_point(size = .8) +
    theme_void() +
    scale_color_gradient2(low = "blue2", mid = "grey60", high = "red2") +
    theme(aspect.ratio = 1, legend.position = "none", plot.title = element_text(hjust = 0.5)) +
    xlim(lim) +
    ylim(lim) +
    geom_segment(x = 0, y = 0, xend = H[1, 1], yend = H[2, 1], color = "#2fbf2e", arrow = arrow(length = unit(0.03, "npc")), size = 1) +
    geom_segment(x = 0, y = 0, xend = H[1, 2], yend = H[2, 2], color = "#2fbf2e", arrow = arrow(length = unit(0.03, "npc")), size = 1)
}


fig1 <- plot_data(Y, rotation = 1, rank(value), lim) + ggtitle(TeX("$Y$"))
fig2 <- plot_data(Y, rotation = 2, rank(value), lim) + ggtitle(TeX("$YV$"))
fig3 <- plot_data(Y, rotation = 3, rank(value), lim) + ggtitle(TeX("$YVD^{-1}$"))
fig4 <- plot_data(Y, rotation = 4, rank(value), lim) + ggtitle(TeX("$YVD^{-1}V^T$"))
plot_grid(fig1, fig2, fig3, fig4, ncol = 4, labels = "AUTO")

## ----example.1, fig.height=5, fig.width=5-------------------------------------
library(decorrelate)
library(Rfast)

n <- 500 # number of samples
p <- 200 # number of features

# create correlation matrix
Sigma <- autocorr.mat(p, .9)

# draw data from correlation matrix Sigma
Y <- rmvnorm(n, rep(0, p), sigma = Sigma * 5.1, seed = 1)
rownames(Y) <- paste0("sample_", 1:n)
colnames(Y) <- paste0("gene_", 1:p)

# eclairs decomposition implements GIW-EB method:
# *E*stimate *c*ovariance/correlation with *l*ow *r*ank and *s*hrinkage
ecl <- eclairs(Y)

# decorrelate data using eclairs decomposition
Y_whitened <- decorrelate(Y, ecl)

# the same whitening can be performed with one command where
# the eigen-value shrinkage is performed internally
Y_whitened2 <- whiten(Y)

## ----plots, fig.height=3.2, fig.width=9, cache=TRUE---------------------------
oldpar <- par(mfrow = c(1, 3))

# plot shrinkage of eigen-values
plot(ecl)

# correlation between variables in observed data
image(cor(Y), axes = FALSE, main = "Correlation of observed data")

# decorrelate data using eclairs decomposition
image(cor(Y_whitened), axes = FALSE, main = "Correlation of whitened data")
par(oldpar)

## ----whitening----------------------------------------------------------------
# compute whitening matrix from eclairs decomposition
W <- getWhiteningMatrix(ecl)

# transform observed data using whitening matrix
Z <- tcrossprod(Y, W)

# evalute difference between whitened computed 2 ways
max(abs(Z - Y_whitened))

## ----cov.cor, eval=FALSE------------------------------------------------------
# # compute correlation matrix from eclairs
# getCor(ecl)
# 
# # compute covariance matrix from eclairs
# getCov(ecl)

## ----multivariate-------------------------------------------------------------
# draw from multivariate normal
n <- 1000
mu <- rep(0, ncol(Y))

# using eclairs decomposition
X.draw1 <- rmvnorm_eclairs(n, mu, ecl)

## ----example.2----------------------------------------------------------------
# use low rank decomposition with 50 components
ecl <- eclairs(Y, k = 60)

# decorrelate data using eclairs decomposition
Y_whitened <- decorrelate(Y, ecl)

## ----plots2, fig.height=3.2, fig.width=9, echo=FALSE--------------------------
oldpar <- par(mfrow = c(1, 3))

# plot shrinkage of eigen-values
plot(ecl)

# decorrelate data using eclairs decomposition
image(cor(Y_whitened), axes = FALSE, main = "Correlation of whitened data")
par(oldpar)

## ----kappa--------------------------------------------------------------------
kappa(ecl)

## ----examples3----------------------------------------------------------------
library(clusterGeneration)

# generate covariance matrix, where the diagonals (i.e. variances) vary
Sigma <- genPositiveDefMat(p, rangeVar=c(1, 1e6))$Sigma

Y <- rmvnorm(n, rep(0, p), sigma = Sigma)

# examine variances of the first 5 variables
apply(Y, 2, var)[1:5]

# transform removes covariance between columns
# so variance of transformed features are *approximately* equal
ecl_cov <- eclairs(Y, compute = "covariance")
Z1 <- decorrelate(Y, ecl_cov)

# variance are *approximately* equal
apply(Z1, 2, var)[1:5]

# transform removes **correlation** between columns
# but variables are not scaled
ecl_cor <- eclairs(Y, compute = "correlation")
Z2 <- decorrelate(Y, ecl_cor)

# variances are not standardized
apply(Z2, 2, var)[1:5]

## ----session, echo=FALSE------------------------------------------------------
sessionInfo()

Any scripts or data that you put into this service are public.

decorrelate documentation built on Aug. 8, 2025, 7:55 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

decorrelate
Decorrelation Projection Scalable to High Dimensional Data

inst/doc/decorrelate.R
In decorrelate: Decorrelation Projection Scalable to High Dimensional Data

Try the decorrelate package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

decorrelate Decorrelation Projection Scalable to High Dimensional Data

inst/doc/decorrelate.R In decorrelate: Decorrelation Projection Scalable to High Dimensional Data

Try the decorrelate package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

decorrelate
Decorrelation Projection Scalable to High Dimensional Data

inst/doc/decorrelate.R
In decorrelate: Decorrelation Projection Scalable to High Dimensional Data