#' Optimization problem 6.2
#'
#' @details
#' Some of the shrinkers do not have an analytical solution and the non-lineartiy
#' must be computed numerically. This function implements
#' Optimization problem 6.2 as described in (Donoho, Gavish, and Johnstone, 2013)
#'
#' @param lambda sample eigenvalue
#' @param gamma fitted value of variables/observations
#' @param type combination of (pivot+norm)/(statistical loss)
#' @param lambda sample eigenvalue to be shrunk
#'
.shrink.eigen2 <- function(ell, c, s, type, lambda) {
temp <- unlist(strsplit(type,"\\."))
isStatistical <- FALSE; mnorm <- ""; pivot <- -1
if (length(temp) == 2) {
mnorm <- temp[1]
pivot <- as.numeric(temp[2])
} else {
isStatistical <- TRUE
}
.obj <- function(eta) {
A <- matrix(c(ell, 0, 0, 1), nrow=2, byrow = TRUE)
B <- matrix(c(1 + (eta -1)*c^2, (eta -1)*c*s,
(eta -1)*c*s, 1 + (eta -1)*s^2), nrow=2, byrow = TRUE)
if(isStatistical) {
0.5*log(0.5*det(A + B)/sqrt(det(A)*det(B)))
} else {
M <-
if (pivot == 3) solve(A) %*% B - diag(rep(1,2))
else if (pivot == 4) solve(B) %*% A - diag(rep(1,2))
else if (pivot == 5) solve(A) %*% B + solve(B) %*% A - 2*diag(rep(1,2))
else if (pivot == 7) log(sqrt(solve(A)) %*% B %*% sqrt(solve(A)))
if (mnorm == "Frobenius") norm(M, type = "F")
else if (mnorm == "Operator") sqrt(eigen(M%*%t(M))$values[1])
else if (mnorm == "Nuclear") sum(sqrt(diag(t(M) %*% M)))
}
}
fit <- DEoptim(fn = .obj,
lower=1, upper = lambda,
control = list(itermax = 500, trace = 0))
fit$optim$bestmem
}
#' Table 2
#'
#' @details
#' Table 2 as described in (Donoho, Gavish, and Johnstone, 2013)
#'
#' @param lambda sample eigenvalue
#' @param gamma fitted value of variables/observations
#' @param type combination of (pivot+norm)/(statistical loss)
#'
.shrink.eigen <- function(lambda, gamma, type) {
ell <- .ell.lambda(lambda, gamma)
c <- .c.ell(lambda, gamma)
s <- .s.ell(lambda, gamma)
if (type == "Frobenius.1") ell*c^2 + s^2
else if (type == "Frobenius.2") ell/(c^2 + ell*s^2)
else if (type == "Frobenius.3") (ell*c^2 + ell^2*s^2)/(c^2 + ell^2*s^2)
else if (type == "Frobenius.4") (ell^2*c^2 + s^2)/(ell*c^2 + s^2)
else if (type == "Frobenius.5") .shrink.eigen2(ell, c, s, type, lambda)
else if (type == "Frobenius.6") c^2*(ell-1)/(c^2 + ell*s^2)^2
else if (type == "Frobenius.7") .shrink.eigen2(ell, c, s, type, lambda)
else if (type == "Operator.1") ell
else if (type == "Operator.2") ell
else if (type == "Operator.3") .shrink.eigen2(ell, c, s, type, lambda)
else if (type == "Operator.4") .shrink.eigen2(ell, c, s, type, lambda)
else if (type == "Operator.5") .shrink.eigen2(ell, c, s, type, lambda)
else if (type == "Operator.6") (ell-1)/(c^2 + ell*s^2)
else if (type == "Operator.7") .shrink.eigen2(ell, c, s, type, lambda)
else if (type == "Nuclear.1") max(1, 1 + (ell -1)*(1 - 2*s^2))
else if (type == "Nuclear.2") max(1, ell/(c^2 + (2*ell -1)*s^2))
else if (type == "Nuclear.3") max(1, ell/(c^2 + ell^2*s^2))
else if (type == "Nuclear.4") max(1, (ell^2*c^2 + s^2)/ell)
else if (type == "Nuclear.5") .shrink.eigen2(ell, c, s, type, lambda)
else if (type == "Nuclear.6") max(1, (ell-(ell-1)^2*c^2*s^2)/(c^2 + ell*s^2)^2)
else if (type == "Nuclear.7") .shrink.eigen2(ell, c, s, type, lambda)
else if (type == "Stein") ell/(c^2 + ell*s^2)
else if (type == "Entropy") ell*c^2 + s^2
else if (type == "Divergence") sqrt((ell^2*c^2 + ell*s^2)/(c^2 + ell*s^2))
else if (type == "Affinity") .shrink.eigen2(ell, c, s, type, lambda)
else if (type == "Frechet") (sqrt(ell)*c^2 + s^2)^2
else NA
}
#' Equation 4.4
#'
#' @details
#' Equation 4.4 as described in (Donoho, Gavish, and Johnstone, 2013)
#'
#' @param lambda sample eigenvalue
#' @param gamma fitted value of variables/observations
#'
#'
.ell.lambda <- function(lambda, gamma) {
temp <- lambda + 1 - gamma
0.5*(temp + sqrt(temp^2 - 4*lambda))
}
#' Equation 1.2
#'
#' @details
#' Equation 1.2 as described in (Donoho, Gavish, and Johnstone, 2013)
#'
#' @param lambda sample eigenvalue
#' @param gamma fitted value of variables/observations
#'
#'
.c.ell <- function(lambda, gamma) {
ell <- .ell.lambda(lambda, gamma)
sqrt((1 - gamma/(ell - 1)^2)/(1 + gamma/(ell - 1)))
}
#' Equation 6.3
#'
#' @details
#' Equation 6.3 as described in (Donoho, Gavish, and Johnstone, 2013)
#'
#' @param lambda sample eigenvalue
#' @param gamma fitted value of variables/observations
#'
.s.ell <- function(lambda, gamma) {
sqrt(1 - (.c.ell(lambda, gamma))^2)
}
#' Calculate unknown noise estimate
#'
#' @details
#' This method calculates the unknown unbiased estimate of noise for the eigenvalues
#' based on the procedure described in (Kritchman and Nadler, 2009). It involves
#' solving a couple of nonlinear equations.
#'
#' @param lambdas eigenvalues to which the distribution is fitted
#' @param numObs number of data observations
#' @param numOfSpikes number of spikes in the spike covariance model
#'
.sigma.sq.est <- function(lambdas , numObs, numOfSpikes) {
p <- length(lambdas)
sigma.sq <- 1/(p-numOfSpikes) * sum(lambdas[(numOfSpikes+1):p])
while(TRUE) {
tmp.b <- lambdas[1:numOfSpikes] + sigma.sq - sigma.sq*(p-numOfSpikes)/numObs
discriminant <- tmp.b^2 - 4 * lambdas[1:numOfSpikes]*sigma.sq
if (any( discriminant < 0)) break
rho <- 0.5*(tmp.b + sqrt(discriminant))
sigma.sq.new <- 1/(p-numOfSpikes) * (sum(lambdas) - sum(rho))
if (abs(sigma.sq.new - sigma.sq)/sigma.sq < 1e-8) break
sigma.sq <- sigma.sq.new
}
sigma.sq
}
#' Detect spikes in a covariance model
#'
#' @details
#' This method calculates the number of spikes in a spiked covariance model
#' by iteratively comparing a standardized eigenvalue to the qunatile of Tracy-Wisdom
#' distribution for given significance level. The procedure is described in
#' (Kritchman and Nadler, 2009).
#'
#' @param lambdas eigenvalues to which the distribution is fitted
#' @param numObs number of data observations
#' @param alpha significance level for the test
#'
.detectSpikes <- function(lambdas, numObs, alpha = 0.05) {
p <- length(lambdas); max.spikes <- min(numObs, p) - 1; sigma.sq <- 0
s_alpha <- (-3/2 * log(4*sqrt(pi) * alpha/100 ))^(2/3)
for(spikes in 1:max.spikes) {
mu_np <- 1/numObs*(sqrt(numObs-1/2) + sqrt((p - spikes)-1/2))^2
sigma_np <- sqrt(mu_np/numObs) * (1/sqrt(numObs-1/2) + 1 / sqrt((p-spikes)-1/2) )^(1/3)
sigma.sq.temp <- .sigma.sq.est(lambdas,numObs,spikes)
if(lambdas[spikes] <= sigma.sq.temp*(mu_np + s_alpha * sigma_np)) break
sigma.sq <- sigma.sq.temp
}
est.sigma.sq <- if(spikes > 1) sigma.sq else sum(lambdas)/p
list(numOfSpikes = spikes - 1, sigma.sq = est.sigma.sq)
}
#' Fitting median to the data
#'
#' @details
#' This method calculates the median of the eigenvalues given that they follow
#' a Marchenko Pastur distribution. The median is matched to the 50 percentile
#' quantile from the MP distribution. This helps fit scale.factor to the data
#'
#' @param theta parameter to be optimized. In this case it is
#' gamma, (variables/observations)
#' @param lambdas eigenvalues to which the distribution is fitted
#'
.mp.obj <- function(theta, lambdas, gamma) {
scale.factor <- theta
lambda.max <- scale.factor*(1 + sqrt(gamma))^2
lambda.min <- scale.factor*(1 - sqrt(gamma))^2
lambdas <- lambdas[(lambdas <= lambda.max) & (lambdas >= lambda.min)]
if(length(lambdas) < 1) return(.Machine$double.xmax)
m <- qmp(0.5, svr = 1/gamma, var = scale.factor)
abs(m - median(lambdas))
}
#' Fitting an MP distribution to the data
#'
#' @details
#' This method takes in the eigenvalues of the sample covariance matrix and fits
#' a Marchenko Pastur distribution. The eigenvalues are assumed to have unit variance
#'
#' @param lambdas eigenvalues of the sample covariance matrix
#' @param gamma ratio of varibales/observations
#' @param numOfSpikes number of spikes in the spike covariance model
#' @param numObs number of data observations
#' @param method KNTest/median fitting
#'
.getMPfit <- function(lambdas, gamma, numOfSpikes, numObs, method) {
scale.factor <-
if(is.na(numOfSpikes)) {
if (method == "median-fitting") {
counts <- hist(lambdas, plot = FALSE, breaks = "FD")$counts
temp.spikes <- sum(counts[which.max(counts == 0):length(counts)])
lower <- lambdas[temp.spikes + 1]/(1 + sqrt(gamma))^2
upper <- if(mean(lambdas) > lower) mean(lambdas) else lambdas[1]/(1 + sqrt(gamma))^2
fit <- DEoptim(fn = .mp.obj,
lower=lower, upper = upper,
control = list(itermax = 500, trace = 0),
lambdas = lambdas, gamma = gamma)
fit$optim$bestmem
} else {
kn.fit <- .detectSpikes(lambdas, numObs)
kn.fit$sigma.sq
}
}else
lambdas[numOfSpikes + 1]/(1 + sqrt(gamma))^2
lambdas <- lambdas/scale.factor
lambda.max <- (1 + sqrt(gamma))^2
spikes <- length(lambdas[lambdas > lambda.max])
if(!is.na(numOfSpikes) && spikes != numOfSpikes) browser()
list(lambda.max = lambda.max, lambdas = lambdas, gamma = gamma,
scale.factor = scale.factor, numOfSpikes = spikes)
}
#' (Donoho, Gavish, and Johnstone, 2013)
#'
#' @param R xts object of asset returns
#' @param gamma ratio of varibales/observations. If NA it will be set to
#' ratio of varibales/observations
#' @param numOfSpikes number of spikes in the spike covariance model.
#' If missing then it is estimated based on the number of
#' eigenvalues above the cutoff.
#' @param norm Type of matrix norm that must be calculated. Defaults to Frobenius
#' @param pivot takes values from 1...7. Details can be found in the paper
#' @param statistical Stein/Entropy/Divergence/Affinity/Frechet. Default is set to NA.
#' when a valid value is set norm and pivot values are ignored
#' @param fit list with 5 elements, cutoff for the bulk of MP distribution,
#' scaled lambdas, fitted gamma fitted scaling constant and
#' numOfSpikes
#' @param method KNTest/median-fitting. Default is KNTest
#'
#' @details
#' If the number of spikes are missing and the selected method is median-fitting
#' then, firstly the scale factor is estimated. We guess the number of spikes by
#' counting the number of breaks using the Freedman-Diaconis algorithm. The
#' initial number of spikes are guessed by counting the number of elements after
#' the first zero. We use this to lower bound the variance. Variance is computed
#' by fitting the median to the spectrum of eigenvalues. If the method is KNTest
#' then we follow the procedure in (Kritchman and Nadler, 2009)
#'
#'
#' @examples
#' \dontrun{
#' data("rmtdata")
#' model <- estSpikedCovariance(rmtdata, numOfSpikes=15)
#' }
#'
#' @author Rohit Arora
#'
#' @export
#'
#'
estSpikedCovariance <- function(R, gamma = NA,
numOfSpikes = NA,
method = c("KNTest", "median-fitting"),
norm = c("Frobenius", "Operator", "Nuclear"),
pivot = 1, statistical = NA,
fit = NA) {
.data <- if(is.xts(R)) coredata(R) else as.matrix(R)
T <- nrow(.data); M <- ncol(.data)
if (T < M) stop("Does not work when T < M")
if((!is.na(gamma)) && (gamma > 1 || gamma < 0)) stop("Invalid gamma")
if(is.na(gamma)) gamma <- M/T
else if (gamma > 1 || gamma < 0) stop("Invalid gamma")
if((!is.na(numOfSpikes)) && ((numOfSpikes > M) || (numOfSpikes < 0)))
stop("Invalid numOfSpikes")
if ("numOfSpikes" %in% names(fit) && (!is.na(numOfSpikes)) &&
(fit$numOfSpikes != numOfSpikes))
stop("Conflicting numOfSpikes values")
norm <- norm[1]
if (!norm %in% c("Frobenius", "Operator", "Nuclear"))
stop("Invalid norm value")
if (pivot < 0 || pivot > 7) stop("Invalid pivot selected")
if(!is.na(statistical) &&
!statistical %in% c("Stein","Entropy","Divergence","Affinity","Frechet"))
stop("Invalid statistical parameter selected")
method <- method[1]
if (!method %in% c("KNTest", "median-fitting")) stop("Invalid method")
S <- cov(.data)
eigen <- eigen(S, symmetric=T)
lambdas <- eigen$values
if(all(is.na(fit))) fit <- .getMPfit(lambdas, gamma, numOfSpikes, T, method)
lambda.max <- fit$lambda.max; lambdas <- fit$lambdas;
scale.factor <- fit$scale.factor
spiked.lambdas <- lambdas[lambdas > lambda.max]
if (length(spiked.lambdas) == 0) stop("Nothing to shrink. Change parameters")
type <- ifelse(is.na(statistical), paste(norm,".",pivot,sep=""), statistical)
spiked.lambdas <- sapply(spiked.lambdas,
function(lambda)
.shrink.eigen(lambda, gamma, type))
lambdas[lambdas > lambda.max] <- spiked.lambdas
lambdas[lambdas <= lambda.max] <- 1
rescaled.lambdas <- scale.factor * lambdas
C <- eigen$vectors %*% diag(rescaled.lambdas) %*% t(eigen$vectors)
model <- list(cov = C, data = R, orig.lambdas = eigen$values, pivot = pivot,
norm = norm, new.lambdas = rescaled.lambdas,
dist.fit = fit)
class(model) <- "spikedCovariance"
model
}
#' internal function for Eigenvalue plot
#'
#' @details
#' Compares shrunk eigenvalues against sample eigenvalues
#'
#' @param x model of the type spikedCovariance
#' @param norm Type of matrix norm that must be calculated. If missing value from
#' the model will be used
#' @param statistical Stein/Entropy/Divergence/Affinity/Frechet. Default is set to NA.
#' when a valid value is set norm and pivot values are ignored
.plot.spikedCovariance <- function(x, norm = NA, statistical = FALSE) {
data <- x$data; gamma <- x$dist.fit$gamma; numOfSpikes <- x$dist.fit$numOfSpikes
scale.factor <- x$dist.fit$scale.factor
if(is.na(norm)) norm <- x$norm
if (!norm %in% c("Frobenius", "Operator", "Nuclear"))
stop("Invalid norm value")
loss <- c("Stein","Entropy","Divergence","Affinity","Frechet")
numLosses <- if(statistical) length(loss) else 7
E <- matrix(NA, nrow = ncol(data), ncol = numLosses)
for(i in 1:numLosses) {
C <-
if (!statistical) {
estSpikedCovariance(data, numOfSpikes = numOfSpikes,
norm = norm, pivot = i, fit = x$dist.fit)
} else {
estSpikedCovariance(data, numOfSpikes = numOfSpikes,
statistical = loss[i], fit = x$dist.fit)
}
E[,i] <- C$new.lambdas
}
colnames(E) <- if (!statistical) paste("pivot",1:7,sep = ".") else loss
df <- data.frame(sample = x$orig.lambdas, E)
df <- melt(df, id = "sample")
colnames(df) <- c("samplee" ,"typeofloss" , "shrunke")
colors <- c("red" , "green" , "blue" , "cyan" , "hotpink" , "gold3" ,
"black", "purple")
colors <- c(head(colors, numLosses), tail(colors,1))
names(colors) <- c(colnames(E), "y=x")
norm <- if(statistical) "Statistical" else paste(norm," Norm", sep="")
p <- ggplot(data = df, aes_string(x = 'samplee', y = 'shrunke' )) +
geom_point(aes_string(color = 'typeofloss')) +
geom_line(aes_string(color = 'typeofloss')) +
geom_abline(aes_string(color = "y=x"), slope=1, intercept=0) +
scale_color_manual("", values = colors) +
xlab("Sample Eigenvalues") + ylab("Shrunk Eigenvalues") +
scale_x_continuous(breaks=pretty_breaks(n=10)) +
scale_y_continuous(breaks=pretty_breaks(n=10)) +
ggtitle(bquote(list(Discrepancy == .(norm), Spikes == .(numOfSpikes),
sigma^{2} == .(round(scale.factor,6))))) +
theme_bw() +
theme(axis.title=element_text(size=12), legend.key = element_blank())
p
}
#' Eigenvalue plot. Similar to figure 1 in the paper
#'
#' @details
#' Compares shrunk eigenvalues against sample eigenvalues for all norms and losses
#'
#' @importFrom scales pretty_breaks
#' @importFrom gridExtra grid.arrange
#' @importFrom grid textGrob gpar
#'
#' @param R xts object of asset returns
#' @param gamma ratio of varibales/observations. If NA it will be set to
#' ratio of varibales/observations
#' @param numOfSpikes model of the type spikedCovariance
#' @param method KNTest/median-fitting. Default is KNTest
#' @param ... additional arguments unused
#' @author Rohit Arora
#' @examples
#' \dontrun{
#' data("rmtdata")
#' plot(rmtdata, numOfSpikes=10)
#' }
#'
#' @export
#'
plotSpikedCovariance <- function(R, gamma = NA, numOfSpikes = NA,
method = "KNTest", ...) {
dummyModel <- estSpikedCovariance(R, gamma = gamma, numOfSpikes = numOfSpikes,
method = method, norm = "Frobenius", pivot = 1)
p.frob <- .plot.spikedCovariance(dummyModel, norm = "Frobenius")
p.oper <- .plot.spikedCovariance(dummyModel, norm = "Operator")
p.nuc <- .plot.spikedCovariance(dummyModel, norm = "Nuclear")
p.stat <- .plot.spikedCovariance(dummyModel, statistical = TRUE)
grid.arrange(p.frob, p.nuc, p.oper, p.stat, ncol = 2,
top =
textGrob("Optimal Shrinkers for 26 Component Loss Functions",
gp=gpar(fontsize=20, fontface = 'bold')))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.