Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
echo = TRUE,
warning = FALSE,
message = FALSE,
error = FALSE,
tidy = TRUE,
eval = TRUE,
dev = c("png", "pdf"),
cache = TRUE
)
## ----run.examples, fig.height=2.5, fig.width=9, fig.cap ="**Figure 1: Intuition for data whitening transformation**. Given the singular value decomposition of $Y/\\sqrt{n}$ after centering columns is $UDV^T$, the steps in the transformation are: **A)** Original data, **B)** Data rotated along principal components, **C)** Data rotated and scaled, **D)** Data rotated, scaled and rotated back to original axes. Green arrows indicate principal axes and lengths indicate eigen-values.", echo=FALSE----
library(decorrelate)
library(mvtnorm)
library(ggplot2)
library(cowplot)
library(colorRamps)
library(latex2exp)
set.seed(2)
Sigma <- matrix(c(1, .9, .9, 2), ncol = 2)
Y <- rmvnorm(200, c(0, 0), Sigma)
dcmp <- eigen(cov(Y))
U <- whitening:::makePosDiag(dcmp$vectors)
lambda <- dcmp$values
lim <- range(Y)
value <- (Y %*% U)[, 1]
plot_data <- function(Y, rotation, rank, lim) {
dcmp <- eigen(cov(Y))
U <- whitening:::makePosDiag(dcmp$vectors)
lambda <- dcmp$values
if (rotation == 1) {
X <- Y
H <- with(eigen(cov(Y)), vectors %*% diag(values))
} else if (rotation == 2) {
X <- Y %*% U
H <- with(eigen(cov(X)), vectors %*% diag(values))
} else if (rotation == 3) {
X <- Y %*% U %*% diag(1 / sqrt(lambda))
H <- matrix(c(1, 0, 0, 1), ncol = 2)
} else if (rotation == 4) {
X <- Y %*% U %*% diag(1 / sqrt(lambda)) %*% t(U)
H <- with(eigen(cov(Y)), vectors)
}
df <- data.frame(X, rank = scale(rank))
H <- whitening:::makePosDiag(H)
ggplot(df, aes(X1, X2, color = rank)) +
geom_segment(x = 0, y = -lim[2], xend = 0, yend = lim[2], color = "black") +
geom_segment(x = -lim[2], y = 0, xend = lim[2], yend = 0, color = "black") +
geom_point(size = .8) +
theme_void() +
scale_color_gradient2(low = "blue2", mid = "grey60", high = "red2") +
theme(aspect.ratio = 1, legend.position = "none", plot.title = element_text(hjust = 0.5)) +
xlim(lim) +
ylim(lim) +
geom_segment(x = 0, y = 0, xend = H[1, 1], yend = H[2, 1], color = "#2fbf2e", arrow = arrow(length = unit(0.03, "npc")), size = 1) +
geom_segment(x = 0, y = 0, xend = H[1, 2], yend = H[2, 2], color = "#2fbf2e", arrow = arrow(length = unit(0.03, "npc")), size = 1)
}
fig1 <- plot_data(Y, rotation = 1, rank(value), lim) + ggtitle(TeX("$Y$"))
fig2 <- plot_data(Y, rotation = 2, rank(value), lim) + ggtitle(TeX("$YV$"))
fig3 <- plot_data(Y, rotation = 3, rank(value), lim) + ggtitle(TeX("$YVD^{-1}$"))
fig4 <- plot_data(Y, rotation = 4, rank(value), lim) + ggtitle(TeX("$YVD^{-1}V^T$"))
plot_grid(fig1, fig2, fig3, fig4, ncol = 4, labels = "AUTO")
## ----example.1, fig.height=5, fig.width=5-------------------------------------
library(decorrelate)
library(Rfast)
n <- 500 # number of samples
p <- 200 # number of features
# create correlation matrix
Sigma <- autocorr.mat(p, .9)
# draw data from correlation matrix Sigma
Y <- rmvnorm(n, rep(0, p), sigma = Sigma * 5.1, seed = 1)
rownames(Y) <- paste0("sample_", 1:n)
colnames(Y) <- paste0("gene_", 1:p)
# eclairs decomposition implements GIW-EB method:
# *E*stimate *c*ovariance/correlation with *l*ow *r*ank and *s*hrinkage
ecl <- eclairs(Y)
# decorrelate data using eclairs decomposition
Y_whitened <- decorrelate(Y, ecl)
# the same whitening can be performed with one command where
# the eigen-value shrinkage is performed internally
Y_whitened2 <- whiten(Y)
## ----plots, fig.height=3.2, fig.width=9, cache=TRUE---------------------------
oldpar <- par(mfrow = c(1, 3))
# plot shrinkage of eigen-values
plot(ecl)
# correlation between variables in observed data
image(cor(Y), axes = FALSE, main = "Correlation of observed data")
# decorrelate data using eclairs decomposition
image(cor(Y_whitened), axes = FALSE, main = "Correlation of whitened data")
par(oldpar)
## ----whitening----------------------------------------------------------------
# compute whitening matrix from eclairs decomposition
W <- getWhiteningMatrix(ecl)
# transform observed data using whitening matrix
Z <- tcrossprod(Y, W)
# evalute difference between whitened computed 2 ways
max(abs(Z - Y_whitened))
## ----cov.cor, eval=FALSE------------------------------------------------------
# # compute correlation matrix from eclairs
# getCor(ecl)
#
# # compute covariance matrix from eclairs
# getCov(ecl)
## ----multivariate-------------------------------------------------------------
# draw from multivariate normal
n <- 1000
mu <- rep(0, ncol(Y))
# using eclairs decomposition
X.draw1 <- rmvnorm_eclairs(n, mu, ecl)
## ----example.2----------------------------------------------------------------
# use low rank decomposition with 50 components
ecl <- eclairs(Y, k = 60)
# decorrelate data using eclairs decomposition
Y_whitened <- decorrelate(Y, ecl)
## ----plots2, fig.height=3.2, fig.width=9, echo=FALSE--------------------------
oldpar <- par(mfrow = c(1, 3))
# plot shrinkage of eigen-values
plot(ecl)
# decorrelate data using eclairs decomposition
image(cor(Y_whitened), axes = FALSE, main = "Correlation of whitened data")
par(oldpar)
## ----kappa--------------------------------------------------------------------
kappa(ecl)
## ----examples3----------------------------------------------------------------
library(clusterGeneration)
# generate covariance matrix, where the diagonals (i.e. variances) vary
Sigma <- genPositiveDefMat(p, rangeVar=c(1, 1e6))$Sigma
Y <- rmvnorm(n, rep(0, p), sigma = Sigma)
# examine variances of the first 5 variables
apply(Y, 2, var)[1:5]
# transform removes covariance between columns
# so variance of transformed features are *approximately* equal
ecl_cov <- eclairs(Y, compute = "covariance")
Z1 <- decorrelate(Y, ecl_cov)
# variance are *approximately* equal
apply(Z1, 2, var)[1:5]
# transform removes **correlation** between columns
# but variables are not scaled
ecl_cor <- eclairs(Y, compute = "correlation")
Z2 <- decorrelate(Y, ecl_cor)
# variances are not standardized
apply(Z2, 2, var)[1:5]
## ----session, echo=FALSE------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.