Nothing
clujaccard <- function(c1,c2,zerobyzero=NA){
# print(c1)
# print(c2)
if (sum(c1)+sum(c2)-sum(c1 & c2)==0) out <- zerobyzero
else
out <- sum(c1 & c2)/(sum(c1)+sum(c2)-sum(c1 & c2))
out
}
noisemclustCBI <- function(data, G=NULL, k=NULL, modelNames=NULL,
nnk=0, hcmodel = NULL,
Vinv = NULL, summary.out=FALSE, ...){
# require(mclust)
# require(prabclus)
if (!is.null(k)) G <- k
data <- as.matrix(data)
# print(str(data))
if (nnk > 0) {
noise <- as.logical(1 - NNclean(data, nnk)$z)
# print(noise)
if (!is.null(hcmodel))
hcPairs <- hc(modelName = hcmodel, data = data)
if (is.null(Vinv) & is.null(hcmodel))
c1 <- mclustBIC(data, G, modelNames, initialization=list(noise=noise),...)
if (!is.null(Vinv) & is.null(hcmodel))
c1 <- mclustBIC(data, G, modelNames, initialization=list(noise=noise),
Vinv=Vinv,...)
if (is.null(Vinv) & !is.null(hcmodel))
c1 <- mclustBIC(data, G, modelNames,
initialization=list(hcPairs=hcPairs, noise=noise),...)
if (!is.null(Vinv) & !is.null(hcmodel))
c1 <- mclustBIC(data, G, modelNames,
initialization=list(hcPairs=hcPairs, noise=noise),
Vinv=Vinv,...)
}
else {
if (!is.null(hcmodel)) {
hcPairs <- hc(modelName = hcmodel, data = data)
c1 <- mclustBIC(data, G, modelNames,
initialization=list(hcPairs=hcPairs),
Vinv=Vinv,...)
}
else
c1 <- mclustBIC(data, G, modelNames,
Vinv=Vinv,...)
noise <- rep(0, nrow(data))
}
# print(c1)
sc1 <- summary(c1,data)
# print(str(sc1))
sc1c <- sc1$classification
cl <- list()
if (sc1$G==0)
sc1c <- rep(0,nrow(data))
nc <- nccl <- max(sc1c)
if (sum(sc1c==0)>0){
nc <- nccl+1
sc1c[sc1c==0] <- nc
}
for (i in 1:nc)
cl[[i]] <- sc1c == i
if (summary.out)
out <- list(result = c1, nc = nc, nccl = nccl, clusterlist = cl,
partition = sc1c, nnk = nnk, initnoise = as.logical(noise),
mclustsummary=sc1,
clustermethod = "mclustBIC")
else
out <- list(result = c1, nc = nc, nccl = nccl, clusterlist = cl,
partition = sc1c, nnk = nnk, initnoise = as.logical(noise),
clustermethod = "mclustBIC")
out
}
distnoisemclustCBI <- function(dmatrix, G=NULL, k=NULL, modelNames=NULL,
nnk=0, hcmodel = NULL,
Vinv = NULL, mdsmethod="classical",
mdsdim=4, summary.out=FALSE,
points.out=FALSE, ...){
dmatrix <- as.matrix(dmatrix)
if (!is.null(k)) G <- k
n <- ncol(dmatrix)
# require(MASS)
# require(prabclus)
# require(mclust)
if (mdsmethod != "classical") {
mindm <- min(dmatrix[dmatrix > 0])/10
dmatrix[dmatrix<mindm] <- mindm
}
data <- switch(mdsmethod, classical = cmdscale(dmatrix, k = mdsdim),
kruskal = isoMDS(dmatrix, k = mdsdim)$points, sammon =
sammon(dmatrix, k = mdsdim)$points)
data <- as.matrix(data)
if (nnk > 0) {
noise <- as.logical(1 - NNclean(data, nnk)$z)
# print(noise)
if (!is.null(hcmodel))
hcPairs <- hc(modelName = hcmodel, data = data)
if (is.null(Vinv) & is.null(hcmodel))
c1 <- mclustBIC(data, G, modelNames, initialization=list(noise=noise),...)
if (!is.null(Vinv) & is.null(hcmodel))
c1 <- mclustBIC(data, G, modelNames, initialization=list(noise=noise),
Vinv=Vinv,...)
if (is.null(Vinv) & !is.null(hcmodel))
c1 <- mclustBIC(data, G, modelNames,
initialization=list(hcPairs=hcPairs, noise=noise),...)
if (!is.null(Vinv) & !is.null(hcmodel))
c1 <- mclustBIC(data, G, modelNames,
initialization=list(hcPairs=hcPairs, noise=noise),
Vinv=Vinv,...)
}
else {
if (!is.null(hcmodel)) {
hcPairs <- hc(modelName = hcmodel, data = data)
c1 <- mclustBIC(data, G, modelNames,
initialization=list(hcPairs=hcPairs),
Vinv=Vinv,...)
}
else
c1 <- mclustBIC(data, G, modelNames,
Vinv=Vinv,...)
noise <- rep(0, nrow(data))
}
# print(c1)
sc1 <- summary(c1,data)
# print(str(sc1))
sc1c <- sc1$classification
cl <- list()
if (sc1$G==0)
sc1c <- rep(0,nrow(data))
nc <- nccl <- max(sc1c)
if (sum(sc1c==0)>0){
nc <- nccl+1
sc1c[sc1c==0] <- nc
}
for (i in 1:nc)
cl[[i]] <- sc1c == i
out <- list(result = c1, nc = nc, nccl = nccl, clusterlist = cl,
partition = sc1c, nnk = nnk, initnoise = as.logical(noise),
clustermethod = "mclustBIC")
if (summary.out)
out$mclustsummary <- sc1
if (points.out)
out$points <- data
out
}
mergenormCBI <- function(data, G=NULL, k=NULL, modelNames=NULL, nnk=0,
hcmodel = NULL,
Vinv = NULL, mergemethod="bhat",
cutoff=0.1,...){
# require(mclust)
# require(prabclus)
if (!is.null(k)) G <- k
if (nnk > 0) {
noise <- as.logical(1 - NNclean(data, nnk)$z)
# print(noise)
if (!is.null(hcmodel))
hcPairs <- hc(modelName = hcmodel, data = data)
if (is.null(Vinv) & is.null(hcmodel)){
c1 <- mclustBIC(data, G, modelNames, initialization=list(noise=noise),...)
# print("mclust done")
}
if (!is.null(Vinv) & is.null(hcmodel))
c1 <- mclustBIC(data, G, modelNames, initialization=list(noise=noise),
Vinv=Vinv,...)
if (is.null(Vinv) & !is.null(hcmodel))
c1 <- mclustBIC(data, G, modelNames,
initialization=list(hcPairs=hcPairs, noise=noise),...)
if (!is.null(Vinv) & !is.null(hcmodel))
c1 <- mclustBIC(data, G, modelNames,
initialization=list(hcPairs=hcPairs, noise=noise),
Vinv=Vinv,...)
}
else {
if (!is.null(hcmodel)) {
hcPairs <- hc(modelName = hcmodel, data = data)
c1 <- mclustBIC(data, G, modelNames,
initialization=list(hcPairs=hcPairs),
Vinv=Vinv,...)
}
else
c1 <- mclustBIC(data, G, modelNames,
Vinv=Vinv,...)
noise <- rep(0, nrow(data))
}
sc1 <- summary(c1,data)
# print("sum")
# print(max(sc1$classification))
jsc1 <- mergenormals(data,sc1,method=mergemethod,cutoff=cutoff,...)
# print(jsc1$clusternumbers)
# print("merge")
renumcl <- jsc1$clustering
# cln <- 1
# for (i in 1:max(jsc1$clustering)){
# if(sum(jsc1$clustering==i)>0){
# renumcl[jsc1$clustering==i] <- cln
# cln <- cln+1
# }
# }
cl <- list()
nc <- nccl <- max(jsc1$clustering)
if (sum(jsc1$clustering==0)>0){
nc <- nc+1
renumcl[jsc1$clustering==0] <- nc
}
for (i in 1:nc)
cl[[i]] <- renumcl == i
# if (nc>nccl)
# cl[[nc]] <- renumcl == 0
out <- list(result = jsc1, nc = nc, nccl = nccl, clusterlist = cl,
partition = renumcl, nnk = nnk, initnoise = as.logical(noise),
clustermethod = "mclust/mergenormals")
out
}
hclustCBI <- function(data,k,cut="number",method,scaling=TRUE,noisecut=0,...){
if(!identical(scaling,FALSE))
sdata <- scale(data,center=TRUE,scale=scaling)
else
sdata <- data
n <- nrow(data)
noise <- FALSE
c1 <- hclust(dist(sdata),method=method)
if (cut=="number")
partition <- cutree(c1,k=k)
else
partition <- cutree(c1,h=k)
cl <- list()
nc <- max(partition)
clsizes <- numeric(0)
for (i in 1:nc) clsizes[i] <- sum(partition==i)
ncn <- sum(clsizes>noisecut)
if (ncn<nc){
noise <- TRUE
newcln <- (1:nc)[clsizes>noisecut]
nc <- ncn+1
newpart <- rep(nc,n)
for (i in 1:ncn)
newpart[partition==newcln[i]] <- i
partition <- newpart
}
# print(nc)
# print(sc1)
for (i in 1:nc)
cl[[i]] <- partition==i
out <- list(result=c1,noise=noise,
nc=nc,clusterlist=cl,partition=partition,
clustermethod="hclust/cutree")
out
}
hclusttreeCBI <- function(data,minlevel=2,method,scaling=TRUE,...){
if(!identical(scaling,FALSE))
sdata <- scale(data,center=TRUE,scale=scaling)
else
sdata <- data
c1 <- hclust(dist(sdata),method=method)
n <- nrow(data)
clist <- list()
for (i in 1:n){
clist[[i]] <- rep(FALSE,n)
clist[[i]][i] <- TRUE
}
clcount <- n
for (j in 1:(n-2)){
clcount <- clcount+1
if (c1$merge[j,1]<0) clist1 <- clist[[-c1$merge[j,1]]]
else clist1 <- clist[[n+c1$merge[j,1]]]
if (c1$merge[j,2]<0) clist2 <- clist[[-c1$merge[j,2]]]
else clist2 <- clist[[n+c1$merge[j,2]]]
clist[[clcount]] <- clist2 | clist1
}
clusterlist <- list()
if (minlevel==1){
clusterlist <- clist
nc <- clcount
}
else{
for (j in (n+minlevel-1):clcount)
clusterlist[[j-minlevel-n+2]] <- clist[[j]]
nc <- clcount-minlevel-n+2
}
# print(nc)
# print(sc1)
out <- list(result=c1,nc=nc,clusterlist=clusterlist,partition=cutree(c1,2),
clustermethod="hclust, full tree")
out
}
# for clara & pam, data called x
claraCBI <- function(data,k,usepam=TRUE,diss=inherits(data,"dist"),...){
if (usepam)
c1 <- pam(data,k=k,diss=diss,...)
else
c1 <- clara(data,k=k,...)
partition <- c1$clustering
cl <- list()
nc <- k
# print(nc)
# print(sc1)
for (i in 1:nc)
cl[[i]] <- partition==i
out <- list(result=c1,nc=nc,clusterlist=cl,partition=partition,
clustermethod="clara/pam")
out
}
speccCBI <- function(data,k,...){
# require(kernlab)
data <- as.matrix(data)
options(show.error.messages = FALSE)
c1 <- try(specc(data,centers=k,...))
options(show.error.messages = TRUE)
if (inherits(c1,"try-error")){
partition <- rep(1,nrow(data))
cat("Function specc returned an error, probably a one-point cluster.\n All observations were classified to cluster 1.\n")
}
else
partition <- c1@.Data
cl <- list()
nc <- k
# print(nc)
# print(sc1)
for (i in 1:nc)
cl[[i]] <- partition==i
out <- list(result=c1,nc=nc,clusterlist=cl,partition=partition,
clustermethod="spectral")
out
}
tclustCBI <- function(data,k,trim=0.05,...){
if (requireNamespace("tclust", quietly = TRUE)) {
data <- as.matrix(data)
c1 <- tclust::tclust(data,k=k,alpha=trim,...)
sc1c <- c1$cluster
cl <- list()
nc <- nccl <- max(sc1c)
if (sum(sc1c==0)>0){
nc <- nccl+1
sc1c[sc1c==0] <- nc
}
for (i in 1:nc)
cl[[i]] <- sc1c == i
out <- list(result=c1,nc=nc,nccl=nccl,clusterlist=cl,partition=sc1c,
clustermethod="tclust")
out
}
else
warning("tclust could not be loaded")
}
# trimkmeansCBI <- function(data,k,scaling=TRUE,trim=0.1,...){
# c1 <- trimkmeans(data,k=k,scaling=scaling,trim=trim,...)
# partition <- c1$classification
# cl <- list()
# nc <- k+1
# nccl <- k
# # print(nc)
# # print(sc1)
# for (i in 1:nc)
# cl[[i]] <- partition==i
# out <- list(result=c1,nc=nc,clusterlist=cl,partition=partition,
# nccl=nccl,clustermethod="trimkmeans")
# out
# }
kmeansCBI <- function(data,krange,k=NULL,scaling=FALSE,runs=1,criterion="ch",...){
if (!is.null(k)) krange <- k
if(!identical(scaling,FALSE))
sdata <- scale(data,center=TRUE,scale=scaling)
else
sdata <- data
c1 <- kmeansruns(sdata,krange,runs=runs,criterion=criterion,...)
partition <- c1$cluster
cl <- list()
nc <- c1$bestk
# print(nc)
# print(sc1)
for (i in 1:nc)
cl[[i]] <- partition==i
out <- list(result=c1,nc=nc,clusterlist=cl,partition=partition,
clustermethod="kmeans")
out
}
pamkCBI <- function (data, krange = 2:10,k=NULL,
criterion="asw", usepam=TRUE,
scaling = FALSE, diss = inherits(data,"dist"), ...)
{
# require(cluster)
if (!is.null(k)) krange <- k
c1 <- pamk(data, krange = krange, criterion=criterion, usepam=usepam,
scaling = scaling, diss = diss,
...)
partition <- c1$pamobject$clustering
cl <- list()
nc <- c1$nc
# print(nc)
for (i in 1:nc) cl[[i]] <- partition == i
out <- list(result = c1, nc = nc, clusterlist = cl, partition = partition,
clustermethod = "pam/estimated k",criterion=criterion)
out
}
# for dbscan, data called x
dbscanCBI <- function(data,eps,MinPts,diss=inherits(data,"dist"),...){
if (diss)
c1 <- dbscan(data,eps,MinPts,method="dist",...)
else
c1 <- dbscan(data,eps,MinPts,...)
# plot(c1, data)
partition <- c1$cluster
cl <- list()
nccl <- max(partition)
partition[partition==0] <- nccl+1
nc <- max(partition)
# print(nc)
# print(sc1)
for (i in 1:nc)
cl[[i]] <- partition==i
out <- list(result=c1,nc=nc,nccl=nccl,clusterlist=cl,partition=partition,
clustermethod="dbscan")
out
}
mahalCBI <- function(data,clustercut=0.5,...){
c1 <- fixmahal(data,...)
partition <- rep(1,nrow(data))
cl <- fpclusters(c1)
nc <- length(cl)
if (nc>0){
for (i in 1:nc)
cl[[i]] <- as.integer(cl[[i]]>clustercut)
}
else{
nc <- 1
cl[[1]] <- as.integer(c1$g[[1]]>clustercut)
}
out <- list(result=c1,nc=nc,clusterlist=cl,partition=partition,
clustermethod="fixmahal")
out
}
pdfclustCBI <- function(data,...){
if (requireNamespace("pdfCluster", quietly = TRUE)) {
c1 <- pdfCluster::pdfCluster(data,...)
sc1c <- c1@clusters
cl <- list()
nc <- nccl <- max(sc1c)
for (i in 1:nc)
cl[[i]] <- sc1c == i
out <- list(result=c1,nc=nc,nccl=nccl,clusterlist=cl,partition=sc1c,
clustermethod="pdfCluster")
out
}
else
warning("pdfCluster could not be loaded")
}
# distr could also be mvt, msn, mst
# emskewCBI <- function(data,k,distr="mst",repeats=100,...){
# if (requireNamespace("EMMIXskew", quietly = TRUE)) {
# if (!exists("distr")) distr="mvn"
# attempt <- 1
# repeat{
# # c1 <- EMMIXskew::EmSkew(data,g=k,distr=distr,...)
# c1 <- EmSkew(data,g=k,distr=distr,...)
# if (!is.null(c1)){
# if (max(c1$clust)==k)
# break
# }
# attempt <- attempt+1
# if(attempt>repeats){
# cat("EmSkew failed after ",repeats," repetitions.")
# break
# }
# if(debug) print("Repeat EmSkew exection")
# }
# partition <- c1$clust
# nc <- max(partition)
# cl <- list()
# for (i in 1:nc) cl[[i]] <- partition == i
# out <- list(result = c1, nc = nc, clusterlist = cl, partition = partition,
# clustermethod = paste("emskew_",distr,sep=""))
# out
# }
# else
# warning("EMMIXskew could not be loaded")
# }
clusterboot <- function(data,B=100,
distances=(inherits(data,"dist")),
bootmethod="boot",
bscompare=TRUE, multipleboot=FALSE,
jittertuning=0.05, noisetuning=c(0.05,4),
subtuning=floor(nrow(data)/2),
clustermethod,noisemethod=FALSE,
count=TRUE,
showplots=FALSE,dissolution=0.5,
recover=0.75,seed=NULL,datatomatrix=TRUE,...){
sumlogic <- function(x,y,relation="eq")
switch (relation,
eq = sum(x==y, na.rm=TRUE),
s = sum(x<y, na.rm=TRUE),
l = sum(x>y, na.rm=TRUE),
se = sum(x<=y, na.rm=TRUE),
le = sum(x>=y, na.rm=TRUE))
if (!is.null(seed)) set.seed(seed)
invisible(distances)
if (datatomatrix)
data <- as.matrix(data)
if (distances & showplots & datatomatrix) dpoints <- cmdscale(data)
n <- nrow(data)
p <- ncol(data)
if (datatomatrix & !distances){
cod <- cov(data)
md <- colMeans(data)
}
lb <- length(bootmethod)
if (distances)
c1 <- clustermethod(as.dist(data),...)
else
c1 <- clustermethod(data,...)
# print(noisemethod)
# print(str(c1))
if (noisemethod){
if (c1$nccl==0)
stop("No clusters, only noise estimated!")
}
else
c1$nccl <- c1$nc
bootresult <- jitterresult <- noiseresult <-
bojitresult <- subsetresult <- matrix(0,nrow=c1$nc,ncol=B)
if (("jitter" %in% bootmethod) | ("bojit" %in% bootmethod)){
if (!datatomatrix | distances) stop("datatomatrix=FALSE and distances require boot or subset as bootmethod.")
jsd <- numeric(0)
ecd <- eigen(cod, symmetric=TRUE)
ecd$values[ecd$values<0] <- 0
ecd$values[is.na(ecd$values)] <- 0
rotdata <- data %*% solve(t(ecd$vectors))
for (i in 1:p){
sx <- sort(rotdata[,i])
dx <- sx[2:n]-sx[1:(n-1)]
dx <- dx[dx>0]
jsd[i] <- quantile(dx,jittertuning)
}
}
if ("noise" %in% bootmethod){
if (!datatomatrix | distances) stop("datatomatrix=FALSE and distances require boot or subset as bootmethod.")
ecd <- eigen(cod, symmetric=TRUE)
ecd$values[ecd$values<0] <- 0
}
if (showplots & datatomatrix){
if (distances)
plot(dpoints,pch=sapply(c1$partition,toString),col=c1$partition)
else
plot(data,pch=sapply(c1$partition,toString),col=c1$partition)
}
# matrixlist <- list()
for (l in 1:lb){
for (i in 1:B){
if (count) cat(bootmethod[l],i,"\n")
if (bootmethod[l]=="boot"){
bsamp <- sample(n,n,replace=TRUE)
if (!multipleboot) bsamp <- unique(bsamp)
if (distances)
mdata <- data[bsamp,bsamp]
else
mdata <- data[bsamp,]
}
if (bootmethod[l]=="subset"){
bsamp <- sample(n,subtuning,replace=FALSE)
if (distances)
mdata <- data[bsamp,bsamp]
else
mdata <- data[bsamp,]
}
if (bootmethod[l]=="jitter"){
jnoise <- matrix(0,ncol=p,nrow=n)
for (j in 1:p)
jnoise[,j] <- rnorm(n,sd=jsd[j])
jnoise <- jnoise %*% t(ecd$vectors)
mdata <- data+jnoise
bsamp <- 1:n
}
if (bootmethod[l]=="bojit"){
bsamp <- sample(n,n,replace=TRUE)
jnoise <- matrix(0,ncol=p,nrow=n)
for (j in 1:p)
jnoise[,j] <- rnorm(n,sd=jsd[j])
jnoise <- jnoise %*% t(ecd$vectors)
mdata <- data[bsamp,]+jnoise
}
if (bootmethod[l]=="noise"){
noiseind <- as.logical(rbinom(n,1,noisetuning[1]))
nn <- sum(noiseind)
jnoise <- matrix(0,ncol=p,nrow=nn)
for (j in 1:p)
jnoise[,j] <- runif(nn,min=-noisetuning[2]*sqrt(ecd$values[j]),
max=noisetuning[2]*sqrt(ecd$values[j]))
jnoise <- t(t(jnoise %*% t(ecd$vectors))+md)
mdata <- data
mdata[noiseind,] <- jnoise
bsamp <- (1:n)[!noiseind]
}
# print(mdata)
if ("diss" %in% names(formals(clustermethod)) & distances)
bc1 <- clustermethod(mdata,diss=TRUE,...)
else
bc1 <- clustermethod(mdata,...)
# print("clustermethod done")
if (showplots & datatomatrix){
if (distances)
plot(dpoints[bsamp,],pch=sapply(bc1$partition,toString),
col=bc1$partition)
else
plot(mdata,pch=sapply(bc1$partition,toString),col=bc1$partition)
}
if (noisemethod){
effnc1 <- c1$nccl
effnb1 <- bc1$nccl
}
else{
effnc1 <- c1$nc
effnb1 <- bc1$nc
}
for (j in 1:effnc1){
maxgamma <- 0
if (effnb1>0){
for (k in 1:effnb1){
if (multipleboot){
if (bscompare) ncases <- 1:n
else{
ncases <- 1
# print(bsamp)
# print(str(bsamp))
m <- 2 # for (m in 2:n)
if (m<=n){
if (!(bsamp[m] %in% bsamp[1:(m-1)]))
ncases <- c(ncases,m)
m <- m+1
}
}
}
else ncases <- 1:length(bsamp)
# print(ncases)
# if (j==c1$nc){
# print(c1$clusterlist[[j]][bsamp][ncases])
# print(bc1$clusterlist[[k]][ncases])
# }
# print(bc1$nc)
# print(bc1$nccl)
# print(bc1$partition)
# print(str(bc1$clusterlist))
cg <- switch(bootmethod[l],
boot=clujaccard(c1$clusterlist[[j]][bsamp][ncases],
bc1$clusterlist[[k]][ncases],zerobyzero=0),
bojit=clujaccard(c1$clusterlist[[j]][bsamp][ncases],
bc1$clusterlist[[k]][ncases],zerobyzero=0),
subset=clujaccard(c1$clusterlist[[j]][bsamp],
bc1$clusterlist[[k]],zerobyzero=0),
jitter=clujaccard(c1$clusterlist[[j]],bc1$clusterlist[[k]],zerobyzero=0),
noise=clujaccard(c1$clusterlist[[j]][!noiseind],
bc1$clusterlist[[k]][!noiseind],zerobyzero=0))
if (cg>maxgamma) maxgamma <- cg
}
}
if (bootmethod[l]=="boot") bootresult[j,i] <- maxgamma
if (bootmethod[l]=="subset") subsetresult[j,i] <- maxgamma
if (bootmethod[l]=="bojit") bojitresult[j,i] <- maxgamma
if (bootmethod[l]=="jitter") jitterresult[j,i] <- maxgamma
if (bootmethod[l]=="noise") noiseresult[j,i] <- maxgamma
}
if (noisemethod){
if (c1$nc>c1$nccl){
j <- c1$nc
if (bc1$nc>bc1$nccl)
maxgamma <- switch(bootmethod[l],
boot=clujaccard(c1$clusterlist[[c1$nc]][bsamp][ncases],
bc1$clusterlist[[bc1$nc]][ncases],zerobyzero=0),
bojit=clujaccard(c1$clusterlist[[c1$nc]][bsamp][ncases],
bc1$clusterlist[[bc1$nc]][ncases],zerobyzero=0),
subset=clujaccard(c1$clusterlist[[c1$nc]][bsamp],
bc1$clusterlist[[bc1$nc]],zerobyzero=0),
jitter=clujaccard(c1$clusterlist[[c1$nc]],bc1$clusterlist[[bc1$nc]],zerobyzero=0),
noise=clujaccard(c1$clusterlist[[c1$nc]][!noiseind],
bc1$clusterlist[[bc1$nc]][!noiseind],zerobyzero=0))
else
maxgamma <- 0
if (bootmethod[l]=="boot") bootresult[j,i] <- maxgamma
if (bootmethod[l]=="subset") subsetresult[j,i] <- maxgamma
if (bootmethod[l]=="bojit") bojitresult[j,i] <- maxgamma
if (bootmethod[l]=="jitter") jitterresult[j,i] <- maxgamma
if (bootmethod[l]=="noise") noiseresult[j,i] <- maxgamma
}
}
}
}
if (!("boot" %in% bootmethod))
bootresult <- bootmean <- bootbrd <- bootrecover <- NULL
else{
bootmean=apply(bootresult,1,mean, na.rm=TRUE)
bootbrd=apply(bootresult,1,sumlogic,y=dissolution,relation="se")
bootrecover=apply(bootresult,1,sumlogic,y=recover,relation="l")
}
if (!("jitter" %in% bootmethod))
jitterresult <- jittermean <- jitterbrd <- jitterrecover <- NULL
else{
jittermean=apply(jitterresult,1,mean, na.rm=TRUE)
jitterbrd=apply(jitterresult,1,sumlogic,y=dissolution,relation="se")
jitterrecover=apply(jitterresult,1,sumlogic,y=recover,relation="l")
}
if (!("subset" %in% bootmethod)) subsetresult <- subsetmean <-
subsetbrd <- subsetrecover <- NULL
else{
subsetmean=apply(subsetresult,1,mean, na.rm=TRUE)
subsetbrd=apply(subsetresult,1,sumlogic,y=dissolution,relation="se")
subsetrecover=apply(subsetresult,1,sumlogic,y=recover,relation="l")
}
if (!("noise" %in% bootmethod)) noiseresult <- noisemean <-
noisebrd <- noiserecover <- NULL
else{
noisemean=apply(noiseresult,1,mean, na.rm=TRUE)
noisebrd=apply(noiseresult,1,sumlogic,y=dissolution,relation="se")
noiserecover=apply(noiseresult,1,sumlogic,y=recover,relation="l")
}
if (!("bojit" %in% bootmethod)) bojitresult <- bojitmean <-
bojitbrd <- bojitrecover <- NULL
else{
bojitmean=apply(bojitresult,1,mean, na.rm=TRUE)
bojitbrd=apply(bojitresult,1,sumlogic,y=dissolution,relation="se")
bojitrecover=apply(bojitresult,1,sumlogic,y=recover,
relation="l")
}
if (showplots & datatomatrix){
if (distances)
plot(dpoints,pch=sapply(c1$partition,toString),col=c1$partition)
else
plot(data,pch=sapply(c1$partition,toString),col=c1$partition)
}
out <- list(result=c1,partition=c1$partition,
nc=c1$nc, nccl=c1$nccl,
clustermethod=c1$clustermethod,
B=B,
noisemethod=noisemethod,
bootmethod=bootmethod,
multipleboot=multipleboot,
dissolution=dissolution,
recover=recover,
bootresult=bootresult,
bootmean=bootmean,
bootbrd=bootbrd,
bootrecover=bootrecover,
jitterresult=jitterresult,
jittermean=jittermean,
jitterbrd=jitterbrd,
jitterrecover=jitterrecover,
subsetresult=subsetresult,
subsetmean=subsetmean,
subsetbrd=subsetbrd,
subsetrecover=subsetrecover,
bojitresult=bojitresult,
bojitmean=bojitmean,
bojitbrd=bojitbrd,
bojitrecover=bojitrecover,
noiseresult=noiseresult,
noisemean=noisemean,
noisebrd=noisebrd,
noiserecover=noiserecover)
class(out) <- "clboot"
out
}
print.clboot <- function(x, statistics=c("mean","dissolution","recovery"),...){
cat("* Cluster stability assessment *\n")
cat("Cluster method: ",x$clustermethod,"\n")
cat("Full clustering results are given as parameter result\n")
cat("of the clusterboot object, which also provides further statistics\n")
cat("of the resampling results.\n")
cat("Number of resampling runs: ",x$B,"\n\n")
cat("Number of clusters found in data: ",x$nc,"\n\n")
if (x$noisemethod & x$nc>x$nccl)
cat("The last cluster corresponds to noise/outliers\n\n")
for (i in 1:length(x$bootmethod)){
if (x$bootmethod[i]=="boot"){
cat(" Clusterwise Jaccard bootstrap ")
if (!x$multipleboot) cat("(omitting multiple points) ")
if ("mean" %in% statistics){
cat("mean:\n")
print(x$bootmean)
}
if ("dissolution" %in% statistics){
cat("dissolved:\n")
print(x$bootbrd)
}
if ("recovery" %in% statistics){
cat("recovered:\n")
print(x$bootrecover)
}
}
if (x$bootmethod[i]=="subset"){
cat(" Clusterwise Jaccard subsetting ")
if ("mean" %in% statistics){
cat("mean:\n")
print(x$subsetmean)
}
if ("dissolution" %in% statistics){
cat("dissolved:\n")
print(x$subsetbrd)
}
if ("recovery" %in% statistics){
cat("recovered:\n")
print(x$subsetrecover)
}
}
if (x$bootmethod[i]=="jitter"){
cat(" Clusterwise Jaccard jittering ")
if ("mean" %in% statistics){
cat("mean:\n")
print(x$jittermean)
}
if ("dissolution" %in% statistics){
cat("dissolved:\n")
print(x$jitterbrd)
}
if ("recovery" %in% statistics){
cat("recovered:\n")
print(x$jitterrecover)
}
}
if (x$bootmethod[i]=="noise"){
cat(" Clusterwise Jaccard replacement by noise ")
if ("mean" %in% statistics){
cat("mean:\n")
print(x$noisemean)
}
if ("dissolution" %in% statistics){
cat("dissolved:\n")
print(x$noisebrd)
}
if ("recovery" %in% statistics){
cat("recovered:\n")
print(x$noiserecover)
}
}
if (x$bootmethod[i]=="bojit"){
cat(" Clusterwise Jaccard bootstrap/jittering ")
if ("mean" %in% statistics){
cat("mean:\n")
print(x$bojitmean)
}
if ("dissolution" %in% statistics){
cat("dissolved:\n")
print(x$bojitbrd)
}
if ("recovery" %in% statistics){
cat("recovered:\n")
print(x$bojitrecover)
}
}
}
invisible(x)
}
plot.clboot <- function(x,xlim=c(0,1),breaks=seq(0,1,by=0.05),...){
par(mfrow=c(x$nc,length(x$bootmethod)))
for (j in 1:x$nc)
for (i in 1:length(x$bootmethod)){
if (x$bootmethod[i]=="boot")
hist(x$bootresult[j,],xlim=xlim,breaks=breaks,
xlab="Jaccard similarity",
main=paste("Bootstrap, cluster ",j))
if (x$bootmethod[i]=="subset")
hist(x$subsetresult[j,],xlim=xlim,breaks=breaks,
xlab="Jaccard similarity",
main=paste("Subsetting, cluster ",j))
if (x$bootmethod[i]=="jitter")
hist(x$jitterresult[j,],xlim=xlim,breaks=breaks,
xlab="Jaccard similarity",
main=paste("Jittering, cluster ",j))
if (x$bootmethod[i]=="noise")
hist(x$noiseresult[j,],xlim=xlim,breaks=breaks,
xlab="Jaccard similarity",
main=paste("Replacement by noise, cluster ",j))
if (x$bootmethod[i]=="bojit")
hist(x$bojitresult[j,],xlim=xlim,breaks=breaks,
xlab="Jaccard similarity",
main=paste("Bootstrap/jittering, cluster ",j))
}
par(mfrow=c(1,1))
invisible()
}
# disttrimkmeansCBI <- function(dmatrix,k,scaling=TRUE,trim=0.1,
# mdsmethod="classical",
# mdsdim=4,...){
# dmatrix <- as.matrix(dmatrix)
# n <- ncol(dmatrix)
# # require(MASS)
# if (mdsmethod != "classical") {
# mindm <- min(dmatrix[dmatrix > 0])/10
# dmatrix[dmatrix<mindm] <- mindm
# }
# data <- switch(mdsmethod, classical = cmdscale(dmatrix, k = mdsdim),
# kruskal = isoMDS(dmatrix, k = mdsdim)$points, sammon =
# sammon(dmatrix, k = mdsdim)$points)
# c1 <- trimkmeans(data,k=k,scaling=scaling,trim=trim,...)
# partition <- c1$classification
# cl <- list()
# nccl <- k
# nc <- k+1
# # print(nc)
# # print(sc1)
# for (i in 1:nc)
# cl[[i]] <- partition==i
# out <- list(result=c1,nc=nc,nccl=nccl,clusterlist=cl,partition=partition,
# clustermethod="trimkmeans plus MDS")
# out
# }
disthclustCBI <- function(dmatrix,k,cut="number",method,noisecut=0,...){
n <- nrow(as.matrix(dmatrix))
c1 <- hclust(as.dist(dmatrix),method=method)
noise <- FALSE
if (cut=="number")
partition <- cutree(c1,k=k)
else
partition <- cutree(c1,h=k)
nc <- max(partition)
clsizes <- numeric(0)
for (i in 1:nc) clsizes[i] <- sum(partition==i)
ncn <- sum(clsizes>noisecut)
if (ncn<nc){
noise <- TRUE
newcln <- (1:nc)[clsizes>noisecut]
nc <- ncn+1
newpart <- rep(nc,n)
for (i in 1:ncn)
newpart[partition==newcln[i]] <- i
partition <- newpart
}
cl <- list()
# print(nc)
# print(sc1)
for (i in 1:nc)
cl[[i]] <- partition==i
out <- list(result=c1,noise=noise,nc=nc,nccl=ncn,
clusterlist=cl,partition=partition,
clustermethod="hclust")
out
}
disthclusttreeCBI <- function(dmatrix,minlevel=2,method,...){
n <- nrow(as.matrix(dmatrix))
c1 <- hclust(as.dist(dmatrix),method=method)
clist <- list()
for (i in 1:n){
clist[[i]] <- rep(FALSE,n)
clist[[i]][i] <- TRUE
}
clcount <- n
for (j in 1:(n-2)){
clcount <- clcount+1
if (c1$merge[j,1]<0) clist1 <- clist[[-c1$merge[j,1]]]
else clist1 <- clist[[n+c1$merge[j,1]]]
if (c1$merge[j,2]<0) clist2 <- clist[[-c1$merge[j,2]]]
else clist2 <- clist[[n+c1$merge[j,2]]]
clist[[clcount]] <- clist2 | clist1
}
clusterlist <- list()
if (minlevel==1){
clusterlist <- clist
nc <- clcount
}
else{
for (j in (n+minlevel-1):clcount)
clusterlist[[j-minlevel-n+2]] <- clist[[j]]
nc <- clcount-minlevel-n+2
}
# print(nc)
# print(sc1)
out <- list(result=c1,nc=nc,clusterlist=clusterlist,partition=cutree(c1,2),
clustermethod="hclust, full tree")
out
}
# New with "fn" method
classifdist <- function (cdist, clustering, method = "averagedist", centroids = NULL, nnk = 1) {
k <- max(clustering)
n <- nrow(cdist)
cdist <- as.matrix(cdist)
topredict <- clustering < 0
if (method == "averagedist") {
prmatrix <- matrix(0, ncol = k, nrow = sum(topredict))
# print("classifdist")
# print(topredict)
# print(clustering)
# print(prmatrix)
for (j in 1:k) {
# print(j)
# print(cdist[topredict, clustering == j,drop=FALSE])
if (sum(clustering==j)>0)
prmatrix[, j] <- rowMeans(as.matrix(cdist[topredict, clustering == j,drop=FALSE]))
else
prmatrix[, j] <- rep(Inf,sum(topredict))
}
clpred <- apply(prmatrix, 1, which.min)
clustering[topredict] <- clpred
}
# print(method)
# print(str(centroids))
# print(str(topredict))
# print(str(cdist))
# print(str(clustering))
# if (method == "MSS") {
# RowMSS <- function(x) {
# rowSums((x - rowMeans(x))^2)
# }
# prmatrix <- matrix(0, ncol = k, nrow = sum(topredict))
# for (j in 1:k) {
# prmatrix[, j] <- RowMSS(as.matrix(cdist[topredict, clustering == j]))
# }
# clpred <- apply(prmatrix, 1, which.min)
# clustering[topredict] <- clpred
# }
if (method == "centroid"){
clustering[topredict] <- apply(cdist[topredict, centroids,
drop = FALSE], 1, which.min)
}
if (method == "knn") {
cdist[topredict, topredict] <- max(cdist) + 1
if (nnk == 1) {
bestobs <- apply(cdist[topredict, , drop = FALSE],
1, which.min)
clustering[topredict] <- clustering[bestobs]
}
else {
for (i in (1:n)[topredict]) {
bestobs <- order(cdist[i, ])[1:k]
clasum <- numeric(0)
for (j in 1:k) clasum[j] <- sum(clustering[bestobs] ==
j)
clustering[i] <- which.max(clasum)
}
}
}
if (method == "fn") {
fdist <- matrix(0, nrow=sum(topredict), ncol = k)
for (i in 1:k) {
if (sum(clustering==i)>0)
fdist[,i] <- apply(as.matrix(cdist[topredict, clustering==i, drop=FALSE]), 1, max)
else
fdist[,i] <- rep(Inf,sum(topredict))
}
bestobs1 <- apply(fdist, 1, which.min)
clustering[topredict] <- bestobs1
}
clustering
}
# New with "fn" and "lda" method
classifnp <- function (data, clustering, method = "centroid",
cdist = NULL, centroids = NULL, nnk = 1){
# require(class)
# require(MASS)
data <- as.matrix(data)
k <- max(clustering)
p <- ncol(data)
n <- nrow(data)
topredict <- clustering < 0
##### Average linkage
if (method == "averagedist") {
if (is.null(cdist)) { cdist <- as.matrix(dist(data)) }
else { cdist <- as.matrix(cdist) }
prmatrix <- matrix(0, ncol = k, nrow = sum(topredict))
# print("classifnp")
# print(topredict)
# print(clustering)
for (j in 1:k){
prmatrix[, j] <- rowMeans(as.matrix(cdist[topredict, clustering == j,drop=FALSE]))
}
clpred <- apply(prmatrix, 1, which.min)
clustering[topredict] <- clpred
}
#### Kmeans, PAM, specc, ...
if (method == "centroid") {
if (is.null(centroids)) {
centroids <- matrix(0, ncol = p, nrow = k)
for (j in 1:k) centroids[j, ] <- colMeans(as.matrix(data[clustering == j, ]))
}
# print(centroids)
# print(sum(topredict))
clustering[topredict] <- knn1(centroids, data[topredict,], 1:k)
}
#### Mclust
if (method == "qda") {
qq <- try(qda(data[!topredict, ], grouping = as.factor(clustering[!topredict])), silent = TRUE)
if (identical(attr(qq, "class"), "try-error"))
qq <- lda(data[!topredict, ], grouping = as.factor(clustering[!topredict]))
clustering[topredict] <- as.integer(predict(qq, data[topredict, ])$class)
}
if (method == "lda") {
qq <- lda(data[!topredict, ], grouping = as.factor(clustering[!topredict]))
clustering[topredict] <- as.integer(predict(qq, data[topredict, ])$class)
}
### Single linkage, specClust
if (method == "knn"){
clustering[topredict] <- as.integer(knn(data[!topredict, ],
data[topredict, ],
as.factor(clustering[!topredict]),
k = nnk))
}
#### Complete linkage
if (method == "fn") {
if (is.null(cdist)){ cdist <- as.matrix(dist(data)) }
else { cdist <- as.matrix(cdist) }
fdist <- matrix(0, nrow=sum(topredict), ncol = k)
for (i in 1:k) {
fdist[,i] <- apply(as.matrix(cdist[topredict, clustering==i]), 1, max)
}
bestobs1 <- apply(fdist, 1, which.min)
clustering[topredict] <- bestobs1
}
clustering
}
# Introduces parameter centroidname to indicate where in CBIoutput$result
# the centroids can be found (automatically for kmeansCBI, claraCBI);
# If NULL and data are not distances, mean is computed within classifnp
# largeisgood: Take 1-original value so that large is good (only stabk)
nselectboot <- function (data, B = 50, distances = inherits(data, "dist"),
clustermethod = NULL,
classification = "averagedist", centroidname = NULL,
krange = 2:10, count = FALSE,
nnk = 1, largeisgood=FALSE,...)
{
dista <- distances
data <- as.matrix(data)
if (classification == "averagedist") {
if (dista)
dmat <- data
else dmat <- as.matrix(dist(data))
}
stab <- matrix(0, nrow = B, ncol = max(krange))
n <- nrow(data)
for (k in krange) {
if (count)
cat(k, " clusters\n")
for (i in 1:B) {
if (count)
print(i)
d1 <- sample(n, n, replace = TRUE)
d2 <- sample(n, n, replace = TRUE)
if (dista) {
dmat1 <- data[d1, d1]
dmat2 <- data[d2, d2]
}
else {
dmat1 <- data[d1, ]
dmat2 <- data[d2, ]
}
if (distances){
if ("diss" %in% names(formals(clustermethod))){
clm1 <- clustermethod(as.dist(dmat1), k = k,
diss = TRUE, ...)
clm2 <- clustermethod(as.dist(dmat2), k = k,
diss = TRUE, ...)
}
else{
clm1 <- clustermethod(as.dist(dmat1), k = k, ...)
clm2 <- clustermethod(as.dist(dmat2), k = k, ...)
}
}
else {
clm1 <- clustermethod(dmat1, k = k, ...)
clm2 <- clustermethod(dmat2, k = k, ...)
}
# cl2 <- clm2$partition
centroids1 <- centroids2 <- NULL
cj1 <- cj2 <- rep(-1, n)
cj1[d1] <- clm1$partition
cj2[d2] <- clm2$partition
# centroids <- NULL
if (classification == "centroid") {
if (is.null(centroidname)){
if (identical(clustermethod, kmeansCBI))
centroidname <- "centers"
if (identical(clustermethod, claraCBI))
centroidname <- "medoids"
}
if (!is.null(centroidname)){
centroids1 <- clm1$result[centroidname][[1]]
centroids2 <- clm2$result[centroidname][[1]]
}
}
# print(centroidname)
# if (classification == "centroid" & dista){
# centroids1 <- unlist(centroids1)
# centroids2 <- unlist(centroids2,recursive=FALSE)
# }
# print(str(clm1))
# print(str(centroids1))
if (dista) {
# print("classifdist")
cj1 <- classifdist(data, cj1, method = classification,
centroids = centroids1, nnk = nnk)
# print(cj1)
# print(cj2)
# print(centroids1)
# print(centroids2)
cj2 <- classifdist(data, cj2, method = classification,
centroids = centroids2, nnk = nnk)
}
else {
cj1 <- classifnp(data, cj1, method = classification,
centroids = centroids1, nnk = nnk)
cj2 <- classifnp(data, cj2, method = classification,
centroids = centroids2, nnk = nnk)
}
ctable <- table(cj1, cj2)
nck1 <- rowSums(ctable)
stab[i, k] <- sum(nck1^2 - rowSums(ctable^2))
}
}
stab <- stab/n^2
stabk <- rep(NA, max(krange))
for (k in krange) stabk[k] <- mean(stab[, k])
kopt <- which.min(stabk)
if (largeisgood){
stabk <- 1-stabk
}
out <- list(kopt = kopt, stabk = stabk, stab = stab)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.