Nothing
#### Originally from orphaned package SLmisc
#### (Version: 1.4.1, 2007-04-12, Maintainer: Matthias Kohl <kohl@sirs-lab.com>)
#### License: GPL (version 2 or later)
####
#### which said
#### "function corresponds to function gap in package SAGx"
## MM: SAGx is now in Bioconductor --- 1.10.1{devel} or 1.11.1{release}
## had gap() *corrected* to re-cluster using FUNcluster --> see ./gap-SAGx.R.~orig~
##
## MM: Package 'lga' -- has gap() and lga and robust lga [-> UBC]
## - it uses boot() nicely [2012-01: ORPHANED because Justin Harrington is amiss]
## MM: renamed arguments, and changed almost everything
clusGap <- function (x, FUNcluster, K.max, B = 100, d.power = 1,
spaceH0 = c("scaledPCA", "original"),
verbose = interactive(), ...)
{
stopifnot(is.function(FUNcluster), length(dim(x)) == 2, K.max >= 2,
(n <- nrow(x)) >= 1, ncol(x) >= 1)
if(B != (B. <- as.integer(B)) || (B <- B.) <= 0)
stop("'B' has to be a positive integer")
cl. <- match.call()
if(is.data.frame(x))
x <- as.matrix(x)
ii <- seq_len(n)
W.k <- function(X, kk) {
clus <- if(kk > 1) FUNcluster(X, kk, ...)$cluster else rep.int(1L, nrow(X))
## ---------- = = -------- kmeans() has 'cluster'; pam() 'clustering'
0.5* sum(vapply(split(ii, clus),
function(I) { xs <- X[I,, drop=FALSE]
sum(dist(xs)^d.power/nrow(xs)) }, 0.))
}
logW <- E.logW <- SE.sim <- numeric(K.max)
if(verbose) cat("Clustering k = 1,2,..., K.max (= ",K.max,"): .. ", sep='')
for(k in 1:K.max)
logW[k] <- log(W.k(x, k))
if(verbose) cat("done\n")
spaceH0 <- match.arg(spaceH0)
## Scale 'x' into hypercube -- later fill with H0-generated data
xs <- scale(x, center=TRUE, scale=FALSE)
m.x <- rep(attr(xs,"scaled:center"), each = n) # for back-trafo later
switch(spaceH0,
"scaledPCA" =
{
## (These & (xs,m.x) above basically do stats:::prcomp.default()
V.sx <- svd(xs, nu=0)$v
xs <- xs %*% V.sx # = transformed(x)
},
"original" = {}, # (do nothing, use 'xs')
## otherwise
stop("invalid 'spaceH0':", spaceH0))
rng.x1 <- apply(xs, 2L, range)
logWks <- matrix(0, B, K.max)
if(verbose) cat("Bootstrapping, b = 1,2,..., B (= ", B,
") [one \".\" per sample]:\n", sep="")
for (b in 1:B) {
## Generate "H0"-data as "parametric bootstrap sample" :
z1 <- apply(rng.x1, 2,
function(M, nn) runif(nn, min=M[1], max=M[2]),
nn=n)
z <- switch(spaceH0,
"scaledPCA" = tcrossprod(z1, V.sx), # back transformed
"original" = z1
) + m.x
for(k in 1:K.max) {
logWks[b,k] <- log(W.k(z, k))
}
if(verbose) cat(".", if(b %% 50 == 0) paste(b,"\n"))
}
if(verbose && (B %% 50 != 0)) cat("",B,"\n")
E.logW <- colMeans(logWks)
SE.sim <- sqrt((1 + 1/B) * apply(logWks, 2, var))
structure(class = "clusGap",
list(Tab = cbind(logW, E.logW, gap = E.logW - logW, SE.sim),
## K.max == nrow(T)
call = cl., spaceH0=spaceH0,
n = n, B = B, FUNcluster=FUNcluster))
}
## lga/R/gap.R --- has for Tibshirani et al (2001):
## ElogWks[k,] <- c(mean(BootOutput), sqrt(var(BootOutput)*(1+1/B)))
## GAP[k] <- ElogWks[k,1] - logWks[k]
## if (k > 1)
## if(GAP[k-1] >= GAP[k]-ElogWks[k,2] & !doall)
## finished <- TRUE
## so they effectively only look for the *first* (local) maximum which ..
## MM: <==> diff(GAP) = GAP[k] - GAP[k-1] <= +SE.sim[k]
## criteria.DandF() -- Dudoit and Fridlyand (2002)
## ---------------- looks at the *global* maximum and then to the left..
## y <- x$data
## crit <- diff(y[which.max(y[,"Gap"]), c("Sks", "Gap")])
## nclust <- min(which(y[,"Gap"] > crit))
## return(ifelse(nclust == nrow(y), NA, nclust))
maxSE <- function(f, SE.f,
method = c("firstSEmax", "Tibs2001SEmax",
"globalSEmax", "firstmax", "globalmax"),
SE.factor = 1)
{
method <- match.arg(method)
stopifnot((K <- length(f)) >= 1, K == length(SE.f), SE.f >= 0, SE.factor >= 0)
fSE <- SE.factor * SE.f
switch(method,
"firstmax" = { ## the first local maximum (== firstSEmax with SE.factor == 0)
decr <- diff(f) <= 0 # length K-1
if(any(decr)) which.max(decr) else K # the first TRUE, or K
},
"globalmax" = {
which.max(f)
},
"Tibs2001SEmax" = { ## The one Tibshirani et al (2001) proposed:
## "the smallest k such that f(k) >= f(k+1) - s_{k+1}"
g.s <- f - fSE
if(any(mp <- f[-K] >= g.s[-1])) which.max(mp) else K
},
"firstSEmax" = { ## M.Maechler(2012): rather ..
## look at the first *local* maximum and then to the left ..:
decr <- diff(f) <= 0 # length K-1
nc <- if(any(decr)) which.max(decr) else K # the first TRUE, or K
if(any(mp <- f[seq_len(nc - 1)] >= f[nc] - fSE[nc]))
which(mp)[1]
else nc
},
"globalSEmax" = { ## Dudoit and Fridlyand (2002) *thought* Tibshirani proposed..
## in 'lga', see criteria.DandF():
## looks at the *global* maximum and then to the left..
nc <- which.max(f)
if(any(mp <- f[seq_len(nc - 1)] >= f[nc] - fSE[nc]))
which(mp)[1]
else nc
})
}
print.clusGap <- function(x, method="firstSEmax", SE.factor = 1, ...)
{
method <- match.arg(method, choices = eval(formals(maxSE)$method))
stopifnot((K <- nrow(T <- x$Tab)) >= 1, SE.factor >= 0)
cat("Clustering Gap statistic [\"clusGap\"] from call:\n", deparse1(x$call),
sprintf("\nB=%d simulated reference sets, k = 1..%d; spaceH0=\"%s\"\n",
x$B, K, x$spaceH0), sep="")
nc <- maxSE(f = T[,"gap"], SE.f = T[,"SE.sim"],
method=method, SE.factor=SE.factor)
cat(sprintf(" --> Number of clusters (method '%s'%s): %d\n",
method, if(grepl("SE", method))
sprintf(", SE.factor=%g",SE.factor) else "", nc))
print(T, ...)
invisible(x)
}
plot.clusGap <- function(x, type="b", xlab = "k", ylab = expression(Gap[k]),
main = NULL,
do.arrows = TRUE,
arrowArgs = list(col="red3", length=1/16, angle=90, code=3),
...)
{
stopifnot(is.matrix(Tab <- x$Tab), is.numeric(Tab))
K <- nrow(Tab)
k <- seq_len(K) # == 1,2,... k
if(is.null(main))
main <- paste(strwrap(deparse1(x$call), width = 60, exdent = 7),
collapse="\n")
gap <- Tab[, "gap"]
plot(k, gap, type=type, xlab=xlab, ylab=ylab, main=main, ...)
if(do.arrows)
do.call(arrows,
c(list(k, gap+ Tab[, "SE.sim"], k, gap- Tab[, "SE.sim"]), arrowArgs))
invisible()
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.