#' fast_estim_ncomp
#'
#' @param X
#' @param ncp.min
#' @param ncp.max
#' @param scale
#' @export
fast_estim_ncomp <- function(X, ncp.min=0,ncp.max=NULL, center=FALSE, scale=FALSE) {
p=ncol(X)
n=nrow(X)
if (is.null(ncp.max)) {
ncp.max <- min(dim(X))
}
ncp.max <- min(nrow(X),ncol(X),ncp.max)
crit <- NULL
X <- scale(X,center, scale)
crit0 <- mean(X^2, na.rm = TRUE)*(n*p)/(p*(n-1))
if (ncp.max == min(dim(X))) {
rr <- corpcor::fast.svd(X)
} else {
rr <- RSpectra::svds(X, ncp.max)
}
chash <- list()
chash[["0"]] <- crit0
compute_gcv <- function(q) {
rec <- tcrossprod(sweep(rr$u[, 1:q, drop = F], 2, rr$d[1:q], FUN = "*"), rr$v[, 1:q, drop = F])
mean(( n*p*(X-rec)/ ((n-1)*(p)- q*(n+p-q-1)))^2, na.rm=T)
}
run_gcv <- function(cand) {
crit <- NULL
for (i in seq_along(cand)) {
q <- cand[i]
message("q: ", q)
if (!is.null(chash[[as.character(q)]])) {
cval <- chash[[as.character(q)]]
} else {
cval <- compute_gcv(q)
chash[[as.character(q)]] <<- cval
}
crit <- c(crit, cval)
if (i > 1 && crit[i] > crit[i-1]) {
break
}
}
cbind(cand[1:length(crit)], crit)
}
if (ncp.max < 10) {
cand <- seq(ncp.min, ncp.max)
} else {
cand <- sort(unique(round(c(seq(ncp.min, ncp.max, by=sqrt(ncp.max)), ncp.max))))
}
if (length(cand) < 10) {
for (i in cand) {
if (i == 0) {
next
}
chash[[as.character(i)]] <- compute_gcv(i)
}
cnames <- names(chash)
delta <- diff(unlist(chash))
firstpos <- which(delta > 0)
opt <- if (length(firstpos) >= 1) {
as.integer(cnames[firstpos[1]])
} else {
as.integer(cnames[length(cnames)])
}
#opt <- as.integer(cnames[which.min(unlist(chash))])
} else {
while (TRUE) {
crit <- run_gcv(cand)
wmin <- which.min(crit[,2])
opt <-crit[wmin,1]
low <- if (wmin == 1) {
opt
} else {
max(ncp.min, crit[wmin-1,1] + 1)
}
high <- if (wmin == nrow(crit)) {
opt
} else {
min(ncp.max, crit[wmin+1,1] - 1)
}
if ((high - low) < 5) {
cand <- seq(low, high, by=1)
crit <- run_gcv(cand)
wmin <- which.min(crit[,2])
opt <- crit[wmin,1]
break
} else if (low == high) {
break
} else {
cand <- sort(unique(c(round(seq(low, high, by=sqrt(high-low))), high)))
}
}
}
ord <- order(as.numeric(names(chash)))
list(ncomp=as.integer(names(chash)[ord]), criterion=as.numeric(chash[ord]), bestcomp=opt)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.