R/gap.r

Defines functions gap

Documented in gap

# Tibshirani, Walther and Hastie (2000) #
# Check added 02JUL04;correction to uniform sampling 15MAY06    #
# se added 10SEP06, bug correction 14NOV06
# bug correction 22MAY07
gap <- function(data=swiss,class=g,B=500, cluster.func = myclus){
# data = swiss; class = cl$cluster; B = 100
library(stats)
class.tab <- table(class)
nclus <- length(class.tab)
if (min(class.tab)==1) stop("Singleton clusters not allowed")
if(!(length(class)==nrow(data))) stop("Length of class vector differs from nrow of data") 
data <- as.matrix(data) 
data <- scale(data, center = TRUE, scale = FALSE)
temp1 <- log(sum(by(data,factor(class),intern <- function(x)sum(dist(x)/ncol(x))/2)))
veigen <- svd(data)$v
x1 <- crossprod(t(data),veigen) # project data to pc-dimensions #
z1 <- matrix(data = NA, nrow = nrow(x1), ncol = ncol(x1)) 
tots <- vector(length = B) 
for (k in 1:B){
   for (j in 1:ncol(x1)){
         min.x <- min(x1[,j]);max.x <- max(x1[,j])
         z1[,j] <- runif(nrow(x1), min = min.x, max = max.x) # x1[sample(1:nrow(x1),nrow(x1),replace=TRUE),j]
      }
z <- crossprod(t(z1),t(veigen))
new.clus <- cluster.func(data = z, k = nclus)
new.class <- new.clus$cluster  
tots[k] <- log(sum(by(z, factor(new.class),intern <- function(x) sum(dist(x)/ncol(x))/2)))
}
out <- c(mean(tots)-temp1, sqrt(1+1/B)*sd(tots));names(out) <- c("Gap statistic", "one SE of simulation")
return(out)
}

Try the SAGx package in your browser

Any scripts or data that you put into this service are public.

SAGx documentation built on Nov. 8, 2020, 8:18 p.m.