Nothing
#'Smoothed FPCA via iterative penalized rank one SVDs.
#'
#'Implements the algorithm of Huang, Shen, Buja (2008) for finding smooth right
#'singular vectors of a matrix \code{X} containing (contaminated) evaluations of
#'functional random variables on a regular, equidistant grid. If the number of
#'smooth SVs to extract is not specified, the function hazards a guess for the
#'appropriate number based on the asymptotically optimal truncation threshold
#'under the assumption of a low rank matrix contaminated with i.i.d. Gaussian
#'noise with unknown variance derived in Donoho, Gavish (2013). Please note that
#'Donoho, Gavish (2013) should be regarded as experimental for functional PCA,
#'and will typically not work well if you have more observations than grid
#'points.
#'
#'@param Y data matrix (rows: observations; columns: grid of eval. points)
#'@param ydata a data frame \code{ydata} representing
#' irregularly observed functions. NOT IMPLEMENTED for this method.
#'@param argvals the argument values of the function evaluations in \code{Y},
#' defaults to a equidistant grid from 0 to 1. See Details.
#'@param npc how many smooth SVs to try to extract, if \code{NA} (the default)
#' the hard thresholding rule of Donoho, Gavish (2013) is used (see Details,
#' References).
#'@param center center \code{Y} so that its column-means are 0? Defaults to
#' \code{TRUE}
#'@param maxiter how many iterations of the power algorithm to perform at most
#' (defaults to 15)
#'@param tol convergence tolerance for power algorithm (defaults to 1e-4)
#'@param diffpen difference penalty order controlling the desired smoothness of
#' the right singular vectors, defaults to 3 (i.e., deviations from local
#' quadratic polynomials).
#'@param gridsearch use \code{\link[stats]{optimize}} or a grid search to find
#' GCV-optimal smoothing parameters? defaults to \code{TRUE}.
#'@param alphagrid grid of smoothing parameter values for grid search
#'@param lower.alpha lower limit for for smoothing parameter if
#' \code{!gridsearch}
#'@param upper.alpha upper limit for smoothing parameter if \code{!gridsearch}
#'@param verbose generate graphical summary of progress and diagnostic messages?
#' defaults to \code{FALSE}
#' @param integration ignored, see Details.
#'@details Note that \code{fpca.ssvd} computes smoothed orthonormal eigenvectors
#' of the supplied function evaluations (and associated scores), not (!)
#' evaluations of the smoothed orthonormal eigenfunctions. The smoothed
#' orthonormal eigenvectors are then rescaled by the length of the domain
#' defined by \code{argvals} to have a quadratic integral approximately equal
#' to one (instead of crossproduct equal to one), so they approximate the behavior
#' of smooth eigenfunctions. If \code{argvals} is not equidistant,
#' \code{fpca.ssvd} will simply return the smoothed eigenvectors without rescaling,
#' with a warning.
#'@return an \code{fpca} object like that returned from \code{\link{fpca.sc}},
#' with entries \code{Yhat}, the smoothed trajectories, \code{Y}, the observed
#' data, \code{scores}, the estimated FPC loadings, \code{mu}, the column means
#' of \code{Y} (or a vector of zeroes if \code{!center}), \code{efunctions},
#' the estimated smooth FPCs (note that these are orthonormal vectors, not
#' evaluations of orthonormal functions if \code{argvals} is not equidistant),
#' \code{evalues}, their associated eigenvalues, and \code{npc}, the number of
#' smooth components that were extracted.
#'@seealso \code{\link{fpca.sc}} and \code{\link{fpca.face}} for FPCA based on
#' smoothing a covariance estimate; \code{\link{fpca2s}} for a faster SVD-based
#' approach.
#'@author Fabian Scheipl
#'@references Huang, J. Z., Shen, H., and Buja, A. (2008). Functional principal
#' components analysis via penalized rank one approximation. \emph{Electronic
#' Journal of Statistics}, 2, 678-695
#'
#' Donoho, D.L., and Gavish, M. (2013). The Optimal Hard Threshold for Singular
#' Values is 4/sqrt(3). eprint arXiv:1305.5870. Available from
#' \url{https://arxiv.org/abs/1305.5870}.
#'@examples
#' ## as in Sec. 6.2 of Huang, Shen, Buja (2008):
#' set.seed(2678695)
#' n <- 101
#' m <- 101
#' s1 <- 20
#' s2 <- 10
#' s <- 4
#' t <- seq(-1, 1, l=m)
#' v1 <- t + sin(pi*t)
#' v2 <- cos(3*pi*t)
#' V <- cbind(v1/sqrt(sum(v1^2)), v2/sqrt(sum(v2^2)))
#' U <- matrix(rnorm(n*2), n, 2)
#' D <- diag(c(s1^2, s2^2))
#' eps <- matrix(rnorm(m*n, sd=s), n, m)
#' Y <- U%*%D%*%t(V) + eps
#'
#' smoothSV <- fpca.ssvd(Y, verbose=TRUE)
#'
#' layout(t(matrix(1:4, nr=2)))
#' clrs <- sapply(rainbow(n), function(c)
#' do.call(rgb, as.list(c(col2rgb(c)/255, .1))))
#' matplot(V, type="l", lty=1, col=1:2, xlab="",
#' main="FPCs: true", bty="n")
#' matplot(smoothSV$efunctions, type="l", lty=1, col=1:5, xlab="",
#' main="FPCs: estimate", bty="n")
#' matplot(1:m, t(U%*%D%*%t(V)), type="l", lty=1, col=clrs, xlab="", ylab="",
#' main="true smooth Y", bty="n")
#' matplot(1:m, t(smoothSV$Yhat), xlab="", ylab="",
#' type="l", lty=1,col=clrs, main="estimated smooth Y", bty="n")
#'@export
fpca.ssvd <- function(Y=NULL, ydata = NULL, argvals = NULL, npc = NA, center = TRUE, maxiter = 15,
tol = 1e-4, diffpen = 3, gridsearch = TRUE, alphagrid = 1.5^(-20:40),
lower.alpha = 1e-5, upper.alpha = 1e7, verbose = FALSE, integration = "trapezoidal"){
stopifnot(!is.null(Y))
if(any(is.na(Y))) stop("No missing values in <Y> allowed.")
m <- ncol(Y)
n <- nrow(Y)
if(!is.null(ydata)) {
stop(paste("<ydata> argument for irregular data is not supported,",
"please use fpca.sc instead."))
}
irregular <- FALSE
if(!is.null(argvals)) {
stopifnot(is.numeric(argvals),
length(argvals) == m,
all(!is.na(argvals)))
if(any(diff(argvals)/mean(diff(argvals)) > 1.05 |
diff(argvals)/mean(diff(argvals)) < 0.95)) {
warning(paste("non-equidistant <argvals>-grid detected:",
"fpca.ssvd will return orthonormal eigenvectors of the function evaluations",
"not evaluations of the orthonormal eigenvectors.",
"Use fpca.sc() for the latter instead."))
irregular <- TRUE
}
} else {
argvals <- seq(0, 1, length = m)
}
#GCV criterion from eq. (10), App. C:
gcv <- function(alpha, w, m, lambda){
sqrt(sum( (w * (alpha*lambda)/(1 + alpha*lambda))^2 ))/
(1- 1/m * sum( 1/(1 + alpha*lambda) ))
}
# Recursion for difference operator matrix
makeDiffOp <- function(degree, dim){
if(degree==0){
return(diag(dim))
} else {
return(diff(makeDiffOp(degree-1, dim)))
}
}
if(is.na(npc)){
npc <- getNPC.DonohoGavish(Y)
}
if(!is.numeric(npc)) stop("Invalid <npc>.")
if(npc<1 | npc>min(m,n)) stop("Invalid <npc>.")
if(verbose){
cat("Using ", npc , "smooth components based on Gavish and Donoho (2014).\n")
}
if(!is.numeric(alphagrid)) stop("Invalid <alphagrid>.")
if(any(is.na(alphagrid)) | any(alphagrid<.Machine$double.eps)) stop("Invalid <alphagrid>.")
uhoh <- numeric(0)
if(verbose & interactive()){
par(ask=TRUE)
on.exit(par(ask=FALSE))
}
if(verbose){
cat("Singular values of smooth and non-smooth ('noise') parts:")
}
Omega <- crossprod(makeDiffOp(degree=diffpen, dim=m))
eOmega <- eigen(Omega, symmetric=TRUE)
lambda <- eOmega$values
Gamma <- eOmega$vectors
U <- matrix(NA, nrow=n, ncol=npc)
V <- matrix(NA, nrow=m, ncol=npc)
d <- rep(NA, npc)
Yorig <- Y
if(center){
meanY <- predict(smooth.spline(x=1:m, y=colMeans(Y)), x=1:m)$y
Y <- t(t(Y) - meanY)
} else {
meanY <- rep(0, ncol(Y))
}
Ynow <- Y
for(k in 1:npc){
## 'Power algorithm', Section 3 / Appendix C :
YnowGamma <- Ynow%*%Gamma
vold <- svd(Ynow, nu=0, nv=1)$v[,1]
u <- Ynow %*% vold
iter <- 1; reldiff <- tol+1
while(all(c(reldiff > tol, iter<=maxiter))){
w <- t(YnowGamma)%*%u
if(gridsearch){
gridmin <- which.min(sapply(alphagrid, gcv, w=w, m=m, lambda=lambda))
minalpha <- alphagrid[gridmin]
} else {
minalpha <- optimize(f=gcv, interval=c(lower.alpha, upper.alpha),
w=w, m=m, lambda=lambda)$minimum
}
# v = S(alpha)^-1 Y u = Gamma%*%(I + minalpha*Lambda)^-1 Gamma' Y u)
vnew <- Gamma%*%(w/(1+minalpha*lambda))
vnew <- vnew/sqrt(sum(vnew^2))
reldiff <- sum((vold-vnew)^2)/sum(vold^2)
iter <- iter +1
vold <- vnew
u <- Ynow %*% vold
} # end while(reldiff)
if(reldiff > tol){
warning("Not converged for SV ", k,
"; relative difference was ", format(reldiff),".")
}
U[,k] <- u/sqrt(sum(u^2))
V[,k] <- vnew
d[k] <- sqrt(sum(u^2))
if(verbose){
layout(t(matrix(1:6, ncol=2)))
matlplot <- function(...) matplot(..., type="l", lty=1, col=1, lwd=.1)
matlplot(t(Ynow), ylim=range(Ynow), main=bquote(Ynow[.(k)]), xlab="", ylab="", bty="n")
matlplot(t(U[,k, drop=FALSE]%*%t(V[,k, drop=FALSE]*d[k])),
ylim=range(Ynow), main=bquote((UDV^T)[.(k)]), xlab="", ylab="", bty="n")
matlplot(t(Ynow - U[,k, drop=FALSE]%*%t(V[,k, drop=FALSE]*d[k])),
ylim=range(Ynow), main=bquote(Ynow[.(k)] - (UDV^T)[.(k)]), xlab="", ylab="", bty="n")
matlplot(t(Y), ylim=range(Y), main=bquote(Y), xlab="", ylab="", bty="n")
matlplot(t(U[,1:k, drop=FALSE]%*%(t(V[,1:k, drop=FALSE])*d[1:k])),
ylim=range(Y), main=bquote(Y[.(k)]), xlab="", ylab="", bty="n")
matlplot(t(Ynow - U[,k, drop=FALSE]%*%(t(V[,k, drop=FALSE])*d[k])),
ylim=range(Y), main=bquote(Y - Y[.(k)]), xlab="", ylab="", bty="n")
}
Ynow <- Ynow - U[, k, drop=FALSE] %*% (t(V[, k, drop=FALSE]) * d[k])
noisesv <- svd(Ynow, nu=0, nv=0)$d[1]
if(verbose){
cat("k:",k, "-- smooth:", d[k], "-- 'noise':", noisesv, "-- alpha:", minalpha, "\n")
}
if(noisesv > 1.1 * d[k]){
uhoh <- c(uhoh, k)
}
}# end for(k)
if(length(uhoh)) warning("First SV for remaining un-smooth signal larger than ",
"SV found for smooth signal for component(s) ", paste(uhoh, collapse=","))
if(!irregular){
# scale smooth eigenvectors so they're scaled as realizations of orthonormal
# eigenfunctions i.e. so that colSums(diff(argvals) * V^2) == 1 instead of
# crossprod(V) == diag(npc)
scale <- sqrt(mean(diff(argvals)))
V <- V / scale
d <- d * scale
} else {
scale <- 1
}
scores <- U%*%(d * diag(length(d)))
ret = list(
Yhat = t(meanY + t(scores%*%t(V))),
Y = Yorig,
scores = scores,
mu = meanY,
efunctions = V,
#FIXME: this should be d^2 but the scaling is way off....
evalues = diag(var(scores)),
npc = npc)
class(ret) = "fpca"
return(ret)
}
getNPC.DonohoGavish <- function(X){
# use Gavish and Donoho (2014) for estimating suitable number of sv's to extract:
n <- nrow(X)
m <- ncol(X)
beta <- n/m
if(beta > 1 | beta < 1e-3){
warning("Approximation for \\beta(\\omega) may be invalid.")
}
# ## approx for omega.beta below eq. (25):
# betaplus <- 1 + sqrt(beta)^2
# betaminus <- 1 - sqrt(beta)^2
# marcenkopastur <- function(x){
# abs(integrate(function(t){
# sqrt((betaplus - t) * (t - betaminus))/(2*pi*t)
# }, lower=betaminus, upper=x, subdivisions=1e5,
# stop.on.error = FALSE)$value - 0.5)
# }
# mu.beta <- optimize(marcenkopastur,
# interval=c(betaminus, betaplus))$minimum
# # eq. (10)
# lambda.beta <- sqrt(2*(beta+1) + (8*beta)/(beta + 1 + sqrt(beta^2 + 14*beta +1)))
# omega.beta <- lambda.beta/sqrt(mu.beta)
omega.beta <- .56*beta^3 - 0.95*beta^2 + 1.82*beta + 1.43
y <- svd(X, nu=0, nv=0)$d
rankY <- min(which(cumsum(y[y>0])/sum(y[y>0]) > .995))
y.med <- median(y)
npc <- min(max(1, sum(y > omega.beta * y.med)), rankY)
return(npc)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.