R/cquality20.R

Defines functions print.valstat plot.valstat print.clusterbenchstats clusterbenchstats cluster.magazine cgrestandard randomclustersim clustatsum stupidkavenCBI stupidkaven stupidkfnCBI stupidkfn stupidknnCBI stupidknn stupidkcentroidsCBI stupidkcentroids distrsimilarity print.summary.cquality summary.cquality

Documented in cgrestandard clustatsum clusterbenchstats cluster.magazine distrsimilarity plot.valstat print.clusterbenchstats print.summary.cquality print.valstat randomclustersim stupidkaven stupidkavenCBI stupidkcentroids stupidkcentroidsCBI stupidkfn stupidkfnCBI stupidknn stupidknnCBI summary.cquality

# cquality 19 implements Serhat's stupid clustering methods
# cquality 20 allows for bootstrap stability

# Re-defined average.within with reweighting!
# Changed print.valstat output to be in line with documentation, but
# can make documentation clearar

# standardisation is for ave distance, separation, 
# largest within gap, 
# pamcrit, max.diameter, min.separation
# and can be "none", "max" (max overall distance), "ave" (ave overall distance),
# "q90" (90% quantile).
# Unless stan="none", densityindex, parsimony and
# entropy will be standardised by the maximum possible for given number
# clusters (entropy), maxk (parsimony); within and
# between cluster ss will be standardised by overall.ss, mnnd will be
# standardised by maximum distance to nnkth nearest neighbour
# standardisation can also be a number. pearsongamma will be standardised
# +1/2. dindex, denscut are automatically standardised, dhgap by maxd.
# cvnn by sqrt(n)

# sepall: Should every cluster be taken into account for sepindex? Or just best
# sepprob overall?

# nndist: compute average distance to nnkth nearest neighbour within cluster (mnnd)
# nnk will then also govern the computation of coefficient of variation of
# kth within-cluster nn distance; clusters with <nnk+1 points are ignored
# averagegap: wgap is average within cluster gaps, otherwise max
# pamcrit: compute pam criterion pamc (looking for optimal centroids pamcentroids)
# densityindex: density estimate is points close to
# dfactor*within cluster median distance, weighted down linearly

# maxk max number of clusters for parsimony
# cvstan: standardisation for coeffeicient of variation 
cqcluster.stats <- function (d = NULL, clustering, alt.clustering = NULL,
                             noisecluster = FALSE, 
    silhouette = TRUE, G2 = FALSE, G3 = FALSE, wgap = TRUE, sepindex = TRUE, 
    sepprob = 0.1, sepwithnoise = TRUE, compareonly = FALSE, 
    aggregateonly = FALSE,
# new:                             
    averagegap=FALSE, pamcrit=TRUE,
#                             densityindex=TRUE,
    dquantile=0.1,
    nndist=TRUE, nnk=2, standardisation="max", sepall=TRUE, maxk=10,
    cvstan=sqrt(length(clustering)))
# dk=5, doweight=0.25,    
{
  lweight <- function(x,md)
    (x<md)*(-x/md+1)

## preparations    
    if (!is.null(d)) 
        d <- as.dist(d)
    cn <- max(clustering)
    clusteringf <- as.factor(clustering)
    clusteringl <- levels(clusteringf)
    cnn <- length(clusteringl)
    if (cn != cnn) {
        warning("clustering renumbered because maximum != number of clusters")
        for (i in 1:cnn) clustering[clusteringf == clusteringl[i]] <- i
        cn <- cnn
    }
    n <- length(clustering)
    noisen <- 0
    cwn <- cn
    if (noisecluster) {
        noisen <- sum(clustering == cn)
        cwn <- cn - 1
    }
    parsimony <- cn/maxk
# cn: number of clusters including noise; cwn: number of clusters w/o noise
    diameter <- average.distance <- median.distance <- separation <- average.toother <- cluster.size <- within.dist <- between.dist <- numeric(0)
    for (i in 1:cn) cluster.size[i] <- sum(clustering == i)
## standardisation    
    if (is.numeric(standardisation)) stan <- standardisation
    else
      stan <- switch(standardisation,
                   max=max(d),
                   ave=mean(d),
                   q90=quantile(d,0.9),
                   1)
## entropy, corrected rand, vi    
    pk1 <- cluster.size/n
    pk10 <- pk1[pk1 > 0]
    h1 <- -sum(pk10 * log(pk10))
    if (!(standardisation=="none")){
      pkmax <- rep(1,cn)/cn
      h1 <- -h1/sum(pkmax * log(pkmax))
    }
    corrected.rand <- vi <- NULL
    if (!is.null(alt.clustering)) {
        choose2 <- function(v) {
            out <- numeric(0)
            for (i in 1:length(v)) out[i] <- ifelse(v[i] >= 2, 
                choose(v[i], 2), 0)
            out
        }
        cn2 <- max(alt.clustering)
        clusteringf <- as.factor(alt.clustering)
        clusteringl <- levels(clusteringf)
        cnn2 <- length(clusteringl)
        if (cn2 != cnn2) {
            warning("alt.clustering renumbered because maximum != number of clusters")
            for (i in 1:cnn2) alt.clustering[clusteringf == clusteringl[i]] <- i
            cn2 <- cnn2
        }
        nij <- table(clustering, alt.clustering)
        dsum <- sum(choose2(nij))
        cs2 <- numeric(0)
        for (i in 1:cn2) cs2[i] <- sum(alt.clustering == i)
        sum1 <- sum(choose2(cluster.size))
        sum2 <- sum(choose2(cs2))
        pk2 <- cs2/n
        pk12 <- nij/n
        corrected.rand <- (dsum - sum1 * sum2/choose2(n))/((sum1 + 
            sum2)/2 - sum1 * sum2/choose2(n))
        pk20 <- pk2[pk2 > 0]
        h2 <- -sum(pk20 * log(pk20))
        icc <- 0
        for (i in 1:cn) for (j in 1:cn2) if (pk12[i, j] > 0) 
            icc <- icc + pk12[i, j] * log(pk12[i, j]/(pk1[i] * 
                pk2[j]))
        vi <- h1 + h2 - 2 * icc
    }
    if (compareonly) {
        out <- list(corrected.rand = corrected.rand, vi = vi)
    }
    else { # begin !if compareonly
## functions of within/between cluster distances        
#        if (silhouette) 
#            require(cluster)
        dmat <- as.matrix(d)
        within.cluster.ss <- 0
        overall.ss <- nonnoise.ss <- sum(d^2)/n
        if (noisecluster) 
            nonnoise.ss <- sum(as.dist(dmat[clustering <= cwn, 
                clustering <= cwn])^2)/sum(clustering <= cwn)
        ave.between.matrix <- separation.matrix <- matrix(0, 
            ncol = cn, nrow = cn)
        nnd <- numeric(0)
        cvnndc <- rep(NA,cn)
        mnnd <- cvnnd <- NULL
        di <- list()
        for (i in 1:cn) { 
            cluster.size[i] <- sum(clustering == i)
            di <- as.dist(dmat[clustering == i, clustering == 
                i])
            if (i <= cwn) {
                within.cluster.ss <- within.cluster.ss + sum(di^2)/cluster.size[i]
                within.dist <- c(within.dist, di)
            }
            if (length(di) > 0){ 
                diameter[i] <- max(di)                
                average.distance[i] <- mean(di)
                median.distance[i] <- median(di)
            }
            else diameter[i] <- average.distance[i] <- median.distance[i] <- NA
            bv <- numeric(0)
            for (j in 1:cn) {
                if (j != i) {
                  sij <- dmat[clustering == i, clustering == 
                    j]
                  bv <- c(bv, sij)
                  if (i < j) {
                    separation.matrix[i, j] <- separation.matrix[j, 
                      i] <- min(sij)
                    ave.between.matrix[i, j] <- ave.between.matrix[j, 
                      i] <- mean(sij)
                    if (i <= cwn & j <= cwn) 
                      between.dist <- c(between.dist, sij)
                  } # if i<j
                } # if j!=i
            } # for j
            separation[i] <- min(bv)
            average.toother[i] <- mean(bv)
        } # for i
## nndist  and coef of var
        if (nndist){
          kenough <- cluster.size>nnk
#          if (min(cluster.size)<nnk+1)
#            warning("min cluster size smaller than nnk. No distance to neighbours computed.") 
          for (i in (1:cn)[kenough]){
              nndi <- apply(dmat[clustering==i,clustering==i],
                     1,sort,partial=nnk+1)[nnk+1,]
#              print(nndi)
              nnd <- c(nnd,nndi)
              cvnndc[i] <- sd(nndi)/mean(nndi)
          }
          cvnnd <- weighted.mean(cvnndc,pk1,na.rm=TRUE)
          mnnd <- mean(nnd,na.rm=TRUE)
#          print("cluster.size")
#          print(cluster.size)
#          print(nnk)
#          print(kenough)
#          print(nnd)
#          print(mnnd)
          if (!standardisation=="none"){
                maxnnd <- max(apply(dmat,1,sort,partial=nnk+1)[nnk+1,])
#                print("stan")
#                print(maxnnd)
                mnnd <- mnnd/maxnnd
                cvnnd <- cvnnd/cvstan
          }
        }
## end nndist         
        average.between <- mean(between.dist)/stan
#        if (stanbound) average.between <- min(1,average.between)
#        average.within <- mean(within.dist)/stan
        average.within <- weighted.mean(average.distance,cluster.size,na.rm=TRUE)/stan
        nwithin <- length(within.dist)
        nbetween <- length(between.dist)
        between.cluster.ss <- nonnoise.ss - within.cluster.ss
        ch <- between.cluster.ss * (n - noisen - cwn)/(within.cluster.ss * 
            (cwn - 1))
        if (!(standardisation=="none")){
          within.cluster.ss <- within.cluster.ss/overall.ss
          between.cluster.ss <- between.cluster.ss/overall.ss
        }
#        if (stanbound) between.cluster.ss <- min(1,between.cluster.ss)
## silhouette widths        
        clus.avg.widths <- avg.width <- NULL
        if (silhouette) {
            sii <- silhouette(clustering, dmatrix = dmat)
            sc <- summary(sii)
            clus.avg.widths <- sc$clus.avg.widths
            if (noisecluster) 
                avg.width <- mean(sii[clustering <= cwn, 3])
            else avg.width <- sc$avg.width
        } # if silhouette
## g2        
        g2 <- g3 <- cn2 <- cwidegap <- widestgap <- sindex <- NULL
        if (G2) {
            splus <- sminus <- 0
            for (i in 1:nwithin) {
                splus <- splus + sum(within.dist[i] < between.dist)
                sminus <- sminus + sum(within.dist[i] > between.dist)
            }
            g2 <- (splus - sminus)/(splus + sminus)
        }
## g3        
        if (G3) {
            sdist <- sort(c(within.dist, between.dist))
            sr <- nwithin + nbetween
            dmin <- sum(sdist[1:nwithin])
            dmax <- sum(sdist[(sr - nwithin + 1):sr])
            g3 <- (sum(within.dist) - dmin)/(dmax - dmin)
        }
## pearsongamma, dunn, dunn2        
        pearsongamma <- cor(c(within.dist, between.dist), c(rep(0, 
            nwithin), rep(1, nbetween)))
        dunn <- min(separation[1:cwn])/max(diameter[1:cwn], na.rm = TRUE)
        acwn <- ave.between.matrix[1:cwn, 1:cwn]
        dunn2 <- min(acwn[upper.tri(acwn)])/max(average.distance[1:cwn])
        if (!standardisation=="none")
          pearsongamma <- (pearsongamma+1)/2
## widest within gap         
        if (wgap) {
            cwidegap <- rep(0, cwn)
            for (i in 1:cwn) if (sum(clustering == i) > 1) 
                cwidegap[i] <- max(hclust(as.dist(dmat[clustering == 
                  i, clustering == i]), method = "single")$height)
            if (averagegap)
              widestgap <- mean(cwidegap)/stan
            else    
              widestgap <- max(cwidegap)/stan
        }
## separation index        
        if (sepindex) {
          if (sepall){
            psep <- rep(NA,n)
            svec <- numeric(0)
            for (i in 1:n) psep[i] <- min(dmat[i, clustering != 
                  clustering[i]])
            lcn <- cwn+(sepwithnoise & noisecluster)
            for (j in 1:lcn){
              scj <- psep[clustering==j]
              qcj <- quantile(scj,sepprob)
              svec <- c(svec,scj[scj<=qcj])
            } # for j
            sindex <- mean(svec)/stan
          } # if sepall
          else{
            psep <- rep(NA, n)
            if (sepwithnoise | !noisecluster) {
                for (i in 1:n) psep[i] <- min(dmat[i, clustering != 
                  clustering[i]])
            } # if (sepwithnoise | !noisecluster)
            else {
                dmatnn <- dmat[clustering <= cwn, clustering <= 
                  cwn]
                clusteringnn <- clustering[clustering <= cwn]
                for (i in 1:(n - noisen)) psep[i] <- min(dmatnn[i, 
                  clusteringnn != clusteringnn[i]])
            } # else (!if (sepwithnoise | !noisecluster))
            sindex <- mean(psep[psep<=quantile(psep, sepprob)])/stan
          } # else (!if sepall)
        } # if sepindex
## pam criterion
        pamc <- pamcentroids <- NULL
        if (pamcrit){
          pamc <- 0
          pamcentroids <- rep(0,cwn)
          for (i in 1:cwn){
            mindsum <- Inf
            for (j in (1:n)[clustering==i]){
              dsum <- sum(dmat[j,clustering==i])
              if (dsum<=mindsum){
                mindsum <-  dsum
                pamcentroids[i] <- j
              } # if dsum best up to now
            } # for j
            pamc <- pamc+mindsum 
          } # for i
          pamc <- pamc/(n*stan)
        } # if pamcrit
## densityindex
        withindensp <- densoc <- percdensoc <- percwdens <- rep(0,n)
        thgap <- numeric(0)
  # withindensp: density
  # densoc: contribution to density that comes from other clusters
  # percdensoc: densoc/max(withindensp)
  # percwdens: withindensp/max(withindensp)      
        npenalty <- dpenalty <- rep(0,cwn)
        cutdist <- quantile(d,dquantile)
        for (i in 1:cwn){
          for (j in (1:n)[clustering==i]){
#              if (cluster.size[i]>1){
               withindensp[j] <- sum(lweight(dmat[j,],cutdist))
               densoc[j] <- sum(lweight(dmat[j,clustering!=i],cutdist))
#              }
#              else{
#                mdd <- min(d[d>0])/2
#                withindensp[j] <- sum(lweight(dmat[j,],mdd))
#                densoc[j] <- sum(lweight(dmat[j,clustering!=i],mdd))
#              }
          } # for j
        } # for i
        percwdens <- withindensp/max(withindensp)
        percdensoc <- densoc/max(withindensp)
        for (i in 1:cwn){
          npenalty[i] <- sum(percdensoc[clustering==i]*percwdens[clustering==i])
        } # for i
        pdistto <- distto <- pclosetomode <- list()
  # In the following:
  # modei: Mode of cluster i
  # closetomode: sequence of points with lowest distance to any point already
  # in closetomode (first is modei)
  # distto: difference in relative densities (percwdens) between new point
  # in closetomode and the one to which it is closest.
  # If this is positive, it is added to dpenalty.
  # pdistto is the number of the old point in closetomode that is closest.
        for (i in 1:cwn){
          di <- dmat[clustering==i,clustering==i,drop=FALSE]
          oindexes <- (1:n)[clustering==i]
          modei <- which.max(withindensp[clustering==i])
          di[modei,] <- Inf
          closetomode <- modei
          pdistto[[i]] <- distto[[i]] <- NA
          if (cluster.size[i]>1){
            for (j in 2:cluster.size[i]){
              minval <- apply(di[,closetomode,drop=FALSE],1,min)
              remainindexes <- oindexes[-closetomode]
              closetomode[j] <- which.min(minval)
              minctm <- which.min(di[closetomode[j],closetomode[1:(j-1)]])
              minctm <- oindexes[closetomode[1:(j-1)][minctm]]
              newp <- oindexes[closetomode[j]]
              thgap <- c(thgap,min(minval)*max(percwdens[remainindexes]))
              distto[[i]][j] <- percwdens[newp]-percwdens[minctm]
              if (percwdens[newp]>percwdens[minctm])
                dpenalty[i] <- dpenalty[i]+(distto[[i]][j])^2
              di[closetomode[j],] <- Inf
              pdistto[[i]][j] <- minctm
            } # for j
          } # if
          pclosetomode[[i]] <- oindexes[closetomode]
        } # for i
        dindex <- sqrt(sum(dpenalty)/n)
        denscut <- sum(npenalty)/n
        highdgap <- max(thgap)
        if (!standardisation=="none"){
          highdgap <- highdgap/stan
        }
        
# ## new density index, no dismissed
#         withindensp <- densoc <- percdensoc <- percwdens <- rep(0,n)
# # withindensp: density
# # densoc: contribution to density that comes from other clusters
# # percdensoc: densoc/max(withindensp within cluster)
# # percwdens: withindensp/max(withindensp within cluster)      
#         denscutc <- rep(0,cwn)
# # denscutc: clusterwise penalty for density contributions from other clusters
# # denscut: sum over all clusters      
#         cutdist <- quantile(d,dquantile)
#         for (i in 1:cwn){
#           for (j in (1:n)[clustering==i]){
# #            if (cluster.size[i]>1){
#               withindensp[j] <- sum(lweight(dmat[j,],cutdist))/n
#               densoc[j] <- sum(lweight(dmat[j,clustering!=i],cutdist))/n
# #            }
# #            else{
# #              mdd <- min(d[d>0])/2
# #              withindensp[j] <- sum(lweight(dmat[j,],mdd))
# #              densoc[j] <- sum(lweight(dmat[j,clustering!=i],mdd))
# #            }
#           } # for j
#           percwdens[clustering==i] <-
#             withindensp[clustering==i]/max(withindensp[clustering==i])
#           percdensoc[clustering==i] <-
#             densoc[clustering==i]/max(withindensp[clustering==i])
#           denscutc[i] <- sum(percdensoc[clustering==i]*percwdens[clustering==i])
#         } # for i
#         denscut <- sum(denscutc)/n
#         dindexc <- rep(0,cwn)
# # Clusterwise sum of squared differences between ordered density and
# # density ordered according to distance from mode.      
#         for (i in 1:cwn){
#           di <- dmat[clustering==i,clustering==i,drop=FALSE]
#           oindexes <- (1:n)[clustering==i]
#           modei <- which.max(withindensp[clustering==i])
#           orderfrommode <- order(di[modei,])
#           distordereddens <- withindensp[clustering==i][orderfrommode]
#           ordereddens <- sort(withindensp[clustering==i],decreasing=TRUE)
#           dindexc[i] <- sum((distordereddens-ordereddens)^2)
#         } # for i
#         dindex <- sum(dindexc)/n
## output        
        if (!aggregateonly) 
            out <- list(n = n, cluster.number = cn, cluster.size = cluster.size, 
                min.cluster.size = min(cluster.size[1:cwn]), 
                noisen = noisen, diameter = diameter, average.distance = average.distance, 
                median.distance = median.distance, separation = separation, 
                average.toother = average.toother, separation.matrix = separation.matrix, 
                ave.between.matrix = ave.between.matrix, avebetween = average.between, 
                avewithin = average.within, n.between = nbetween, 
                n.within = nwithin, maxdiameter = max(diameter[1:cwn], 
                  na.rm = TRUE)/stan, minsep = (sepwithnoise * 
                  min(separation) +
                      (!sepwithnoise) * min(separation[1:cwn]))/stan, 
                withinss = within.cluster.ss,
#                        ave.within.cluster.ss = within.cluster.ss/(n - noisen),
                        clus.avg.silwidths = clus.avg.widths, 
                asw = avg.width, g2 = g2, g3 = g3, pearsongamma = pearsongamma, 
                dunn = dunn, dunn2 = dunn2, entropy = h1, wb.ratio = average.within/average.between, 
                ch = ch, cwidegap = cwidegap, widestgap = widestgap, 
                corrected.rand = corrected.rand, 
                vi = vi, sindex = sindex,
# new
            svec=svec, psep=psep, stan=stan, nnk=nnk,mnnd=mnnd,pamc=pamc,pamcentroids=pamcentroids,
#            dindex=dindex,dsepi=dsepi, dratio=dratio,
#                        pdensity=pdensity,borderp=borderp,
            dindex=dindex,denscut=denscut,highdgap=highdgap,
#            dindexc=dindexc,denscutc=denscutc,
            npenalty=npenalty,dpenalty=dpenalty,
            withindensp=withindensp, densoc=densoc,
            pdistto=pdistto, pclosetomode=pclosetomode, distto=distto,
            percwdens=percwdens, percdensoc=percdensoc, 
                        parsimony=parsimony,cvnnd=cvnnd,cvnndc=cvnndc)
# end if !aggregateonly        
        else out <- list(n = n, cluster.number = cn, min.cluster.size = min(cluster.size[1:cwn]), 
            noisen = noisen, avebetween = average.between, 
            avewithin = average.within, maxdiameter = max(diameter[1:cwn], 
                na.rm = TRUE)/stan, minsep = (sepwithnoise * 
                min(separation) + (!sepwithnoise) * min(separation[1:cwn]))/stan, 
            withinss = within.cluster.ss, # was ave 
            asw = avg.width, g2 = g2, g3 = g3, pearsongamma = pearsongamma, 
            dunn = dunn, dunn2 = dunn2, entropy = h1, wb.ratio = average.within/average.between, 
            ch = ch, widestgap = widestgap, 
            corrected.rand = corrected.rand, vi = vi, sindex = sindex, 
# new
            svec=svec, psep=psep, stan=stan, nnk=nnk,mnnd=mnnd,pamc=pamc,pamcebtroids=pamcentroids,
            dindex=dindex,denscut=denscut,highdgap=highdgap,
#            cpenalty=cpenalty,dpenalty=dpenalty,
#            npenalty=npenalty,
#            dsepi=dsepi, dratio=dratio,
#                        pdensity=pdensity,borderp=borderp,
                         parsimony=parsimony,cvnnd=cvnnd,cvnndc=cvnndc)
# end else !if aggregateonly        
    } # else (!if compareonly)   
    class(out) <- "cquality"
    out
}

# old densityindex
#         dsep <- despi <- dindex <- dratio <- pdensity <- borderp <- NULL
#         if (densityindex){
#           borderp <- matrix(TRUE,nrow=n,ncol=cwn)
#           pdensity <- rep(0,n)
#           for (i in 1:n){
#             sdi <- sort(dmat[i,],partial=dk+1)
#             pdensity[i] <- dk/(2*mean(sdi[2:(dk+1)]))
#             for (j in (1:cwn)[-clustering[i]])
#               borderp[i,j] <- any(dmat[i,clustering==j]<=sdi[dk+1])
#           } # for i
#           pdensity[pdensity==Inf] <- 2*max(pdensity[pdensity<Inf])
#           odensity <- mean(pdensity)
#           dsepi <- nni <- rep(1,cwn)
#           aidensity <- numeric(0)
# #          dsep <- matrix(1,nrow=cwn,ncol=cwn)          
# #           for (i in 1:(cwn-1)){
# #             for (j in (i+1):cwn)
# #               if (any((clustering==i & borderp[,j])|
# #                       ( clustering==j & borderp[,i])))
# #                 dsep[i,j] <- dsep[j,i] <-
# #                   min(c(1,
# #                     max(c(pdensity[clustering==i & borderp[,j]],
# #                         pdensity[clustering==j & borderp[,i]]))/
# #                     min(c(mean(pdensity[clustering==i]),
# #                         mean(pdensity[clustering==j])))))
# #           }
#           for (i in 1:cwn){
#             bdensity <- c(pdensity[clustering!=i & borderp[,i]],
#                           pdensity[clustering==i & apply(borderp,1,sum)>1])
#             idensity <- pdensity[clustering==i & apply(borderp,1,sum)==1]
#             nni[i] <- length(idensity)
#             aidensity <- c(aidensity,idensity)
#             if (nni[i]>0){
#               if (length(bdensity)>0)  
#                 d1 <- mean(bdensity)/mean(idensity)
#               else
#                 d1 <- 0
#               dsepi[i] <- min(c(1,d1))
#             if (length(bdensity)==0) dsepi[i] <- 0
#             } # nni>0
#           } # for i 
#           dratio <- odensity/mean(aidensity)
#           dindex1 <- 1-weighted.mean(dsepi,nni/sum(nni))
#           dindex2 <- 1-min(c(1,dratio))
#           nonoise <- n-noisen
#           doweight <- 0.5-abs((nonoise-sum(nni))/nonoise-0.5)
#           dindex <- (1-doweight)*dindex1+doweight*dindex2
# #          fdsep <- cwn-sum(dsep)
# #          dindex <- fdsep/sqrt(cwn)+(sqrt(maxk)-sqrt(cwn))*(1-dborder)
# #          if (standardisation!="none")
# #            dindex <- dindex/sqrt(maxk)




# largeisgood will transform ave within distance, within.cluster.ss, dindex,
# largest within gap, nndist, pam criterion by 1-x, so that large is good
# all will be bounded by 1 if stanbound, this also bounds
# pearsongamma by 0 from below.
summary.cquality <- function(object,stanbound=TRUE,largeisgood=TRUE,...){
  x <- object
  if (stanbound){
      x$mnnd <- min(1,x$mnnd)
      x$avewithin <- min(1,x$avewithin)
      x$pearsongamma <- max(0,x$pearsongamma)
      x$dindex <- min(1,x$dindex)
      x$denscut <- min(1,x$denscut)
      x$highdgap <- min(1,x$highdgap)
      x$asw <- max(0,x$asw)
      x$widestgap <- min(1,x$widestgap)
      if(!is.null(x$pamc))
        x$pamc <- min(1,x$pamc)
      x$maxdiameter <- min(1,x$maxdiameter)
      x$minsep <- min(1,x$minsep)
      x$sindex <- min(1,x$sindex)
      x$cvnnd <- min(1,x$cvnnd)
  }
  if (largeisgood){
      x$avewithin <- 1-x$avewithin
      x$mnnd <- 1-x$mnnd
      x$widestgap <- 1-x$widestgap
      x$denscut <- 1-x$denscut
      x$withinss <- 1-x$withinss
      if(!is.null(x$pamc))
        x$pamc <- 1-x$pamc
      x$maxdiameter <- 1-x$maxdiameter
      x$dindex <- 1-x$dindex
      x$highdgap <- 1-x$highdgap
      x$cvnnd <- 1-x$cvnnd
  }
  out <- list(avewithin=x$avewithin,nnk=x$nnk,mnnd=x$mnnd,
              asw=x$asw,
              widestgap=x$widestgap,sindex=x$sindex,
              pearsongamma=x$pearsongamma,entropy=x$entropy,pamc=x$pamc,
              withinss=x$withinss,
              dindex=x$dindex,denscut=x$denscut,highdgap=x$highdgap,
              parsimony=x$parsimony,maxdiameter=x$maxdiameter,
              minsep=x$minsep,cvnnd=x$cvnnd)
  class(out) <- "summary.cquality"
  out
}

print.summary.cquality <- function(x,...){
  cat("\n")  
  cat("average.within= ",x$avewithin,"\n")
  cat(x$nnk," nearest neighbour distance= ", x$mnnd,"\n")
  cat("coefficient of variation of ",x$nnk," nearest neighbour distance=",
      x$cvnnd,"\n")
  cat("max diameter= ",x$maxdiameter,"\n")
  cat("within-cluster widest gap= ",x$widestgap,"\n\n")
  cat("separation index= ",x$sindex,"\n")
  cat("min separation= ",x$minsep,"\n\n")
  cat("density index= ",x$dindex,"\n")
  cat("density cut= ",x$denscut,"\n")
  cat("density gap= ",x$highdgap,"\n")
  cat("average silhouette width= ",x$asw,"\n\n")
  cat("Pearson Gamma= ",x$pearsongamma,"\n")
  cat("pam criterion= ",x$pamc,"\n")
  cat("within ss= ",x$withinss,"\n\n")
  cat("entropy= ",x$entropy,"\n")
  cat("parsimony= ",x$parsimony,"\n")
}
  



# Similarity to "normal" or "uniform" distribution 
# x: data, clustering: clustering (has to be 1:k), noisecluster: last cluster
# is to be ignored, nnk: nearest neighbour k for uniform
# The uniform thing is misleading; this will indicate uniformity
# even on disconncted sets.
distrsimilarity <- function(x,clustering,noisecluster = FALSE,
                            distribution=c("normal","uniform"),nnk=2,
                           largeisgood=FALSE,messages=FALSE){
  cn <- max(clustering)
  clusteringf <- as.factor(clustering)
  clusteringl <- levels(clusteringf)
  cnn <- length(clusteringl)
  if (cn != cnn) {
        warning("clustering renumbered because maximum != number of clusters")
        for (i in 1:cnn) clustering[clusteringf == clusteringl[i]] <- i
  }
  x <- as.matrix(x)
  nn <- n <- nrow(x)
  p <- ncol(x)
  k <- cnn
  if (noisecluster){
    nn <- n-sum(clustering==k)
    k <- k-1
#    x <- x[clustering<=k,]
  }
  xmahal <- xdknn <- rep(NA,n)
  kdnorm <- kdunif <- NA
  kdnormc <- kdunifc <- wkdnormc <- wkdunifc <- rep(NA,k)
  if ("normal" %in% distribution){
    for (i in 1:k){
#      print(k)
#      print(str(x))      
      cm <- colMeans(x[clustering==i,,drop=FALSE])
      ccov <- cov(x[clustering==i,,drop=FALSE])
#      cat("Cluster",i,"\n")
#      print(sum(clustering==i))
#      print(nn)
#      print(cm)
#      print(ccov)
      cmah <- try(mahalanobis(x[clustering==i,,drop=FALSE],cm,ccov),silent=TRUE)
      if (!is.null(attr(cmah,"class"))){
        cmah <- rep(0,sum(clustering==i))
        if (messages)
          print("cov-matrix not invertible")
      }
      if (any(is.na(cmah))){
        cmah <- rep(0,sum(clustering==i))
        if (messages)
          print("cov-matrix not invertible")
      }
#      print("after try")
      emahal <- ecdf(cmah)
      emahalm <- emahal(cmah)
      memahalm <- min(emahalm)
#      print(emahalm)
#      print(emahalm-pchisq(cmah,df=p))
#      print(emahalm-memahalm-pchisq(cmah,df=p))      
#      print(memahalm)
      kdnormc[i] <- max(abs(emahalm-pchisq(cmah,df=p)))
      kdnormc[i] <- max(c(abs(emahalm-memahalm-pchisq(cmah,df=p)),kdnormc[i]))
      wkdnormc[i] <- kdnormc[i]*sum(clustering==i)/nn
      xmahal[clustering==i] <- cmah
    } # for i
    kdnorm <- sum(wkdnormc)
    if (largeisgood) kdnorm <- 1-kdnorm
#    print(kdnormc)
#    print(wkdnormc)
  } # normal distribution: Mahalanobis distances vs. chisq
#  print("normal end")
  if ("uniform" %in% distribution){
    dmat <- as.matrix(dist(x))
    for (i in 1:k){
#      cat("Cluster",i,"\n")
      dmati <- dmat[clustering==i,clustering==i]
      nnki <- nnk
      if(sum(clustering==i)<nnk+1) nnki <- 1
      if(sum(clustering==i)>1){
        knnd <- numeric(0)
        for (j in 1:sum(clustering==i))
          knnd[j] <- sort(dmati[j,],partial=nnki+1)[nnki+1]
      }
      else
        knnd <- 0
      alphap <- (2 * pi^(p/2))/(p * gamma(p/2))
      if (mean(knnd^p)>0)
        lambdap <- nnki/(alphap * mean(knnd^p))
      else
        lambdap <- 1/alphap
      ekn <- ecdf(knnd^p)
      eknm <- ekn(knnd^p)
      meknm <- min(eknm)
#      print(alphap)
#      print(lambdap)
#      print(knnd^p)
#      print(eknm)
#      print(eknm-pgamma(knnd^p,shape=nnki,rate=lambdap*alphap))
      kdunifc[i] <- max(abs(eknm-pgamma(knnd^p,shape=nnki,rate=lambdap*alphap)))
      kdunifc[i] <- max(c(abs(eknm-
            meknm-pgamma(knnd^p,shape=nnki,rate=lambdap*alphap)),kdunifc[i]))
      wkdunifc[i] <- kdunifc[i]*sum(clustering==i)/nn
      xdknn[clustering==i] <- knnd
    }
    kdunif <- sum(wkdunifc)
    if (largeisgood) kdunif <- 1-kdunif
  }
  out <- list(kdnorm=kdnorm,kdunif=kdunif,kdnormc=kdnormc,kdunifc=kdunifc,
              xmahal=xmahal,xdknn=xdknn)
# kdnorm,kdunif: Kolmogorov distance (norm: Mahal to chisq,
# unif: knndist to Gamma, clusterwise, weighted by cluster size),
# kdnormc, kdunifc: clusterwise values, xmahal: Mahalanobis distances
# xdknn: distance to kth nearest neighbour within cluster
  out
}

# This computes k-centroids clustering to k random centroids (Serhat's version)
stupidkcentroids <- function(xdata, k, distances = inherits(xdata, "dist")){
  if (distances){
    cdist <- as.matrix(xdata)
  }
  else{
    cdist <- as.matrix(dist(xdata))
  }
  n <- ncol(cdist)
  kcent <- sample(n,k)
  clustering <- rep(0,n)
  clustering[kcent] <- 1:k
  topredict <- (1:n)[clustering==0]
  clustering[topredict] <- apply(cdist[topredict,kcent], 1, which.min)
  
  if (distances){
    centroids <- kcent
  }
  else{
    centroids <- xdata[kcent,]
  }
# print(str(centroids))
  out <- list(partition = clustering, 
              centroids = centroids,
              distances=distances)
  out
}


# stupidkcentroids <- function(d,k){
#   cdist <- as.matrix(d)
#   n <- ncol(cdist)
#   kcent <- sample(n,k)
#   clustering <- rep(0,n)
#   clustering[kcent] <- 1:k
#   topredict <- (1:n)[clustering==0]
#   clustering[topredict] <- apply(cdist[topredict,kcent], 1, which.min)
#   clustering
# }

stupidkcentroidsCBI <- function(dmatrix,k,distances=TRUE){
  c1 <- stupidkcentroids(dmatrix,k,distances=distances)
  partition <- c1$partition
  cl <- list()
  for (i in 1:k) cl[[i]] <- partition == i
  out <- list(result = c1, nc = max(c1$partition), clusterlist = cl, partition = partition, 
        clustermethod = "randomkcentroids")
#  print(str(out))
#  print(out$result["centroids"])
  out
}

# This computes a clustering starting from k centroids in which in each step
# the nearest neighbour of an existing cluster is added to that cluster.
stupidknn <- function(d,k){
  cdist <- as.matrix(d)
  n <- ncol(cdist)
  kcent <- sample(n,k)
  clustering <- rep(0,n)
  clustering[kcent] <- 1:k
  classified <- clustering>0
  repeat{
    topredict <- clustering==0
    cdistx <- cdist[topredict,classified,drop=FALSE]
#    print(str(cdistx))
    opt.ind <- which(cdistx==min(cdistx),arr.ind=TRUE)[1,,drop=FALSE]
    clustering[topredict][opt.ind[1,1]] <- clustering[classified][opt.ind[1,2]]
    classified <- clustering>0
    if(sum(classified)==n) break
  }
  clustering
}

stupidknnCBI <- function(dmatrix,k){
  c1 <- stupidknn(dmatrix,k)
  partition <- c1
  cl <- list()
  for (i in 1:k) cl[[i]] <- partition == i
  out <- list(result = c1, nc = max(c1), clusterlist = cl, partition = partition, 
        clustermethod = "randomkcentroids")
  out
}


# This computes a clustering starting from k centroids in which in each step
# the point that is nearest to the furthest point of an existing cluster is added to that cluster.
stupidkfn <- function(d,k){
  cdist <- as.matrix(d)
  n <- ncol(cdist)
  kcent <- sample(n,k)
  clustering <- rep(0,n)
  clustering[kcent] <- 1:k
  classified <- clustering>0
  repeat{
    cclustering  <-  clustering[clustering>0]
    topredict <- clustering==0
    cdistx <- cdist[topredict,classified,drop=FALSE]
#    print(str(cdistx))
    opt.ind <- minmaxd <- numeric(0)
    for (i in 1:k){
      cmk <- cdistx[,cclustering==i,drop=FALSE]
      maxd <- apply(cmk,1,max)
      minmaxd[i] <- min(maxd)
      opt.ind[i] <- which.min(maxd)
#      opt.ind2[i] <- which(cmk==minmaxd[i],arr.ind=TRUE)[1,,drop=FALSE]
    }
    kopt <- which.min(minmaxd)
    clustering[topredict][opt.ind[kopt]] <- kopt
    classified <- clustering>0
    if(sum(classified)==n) break
  }
  clustering
}

stupidkfnCBI <- function(dmatrix,k){
  c1 <- stupidkfn(dmatrix,k)
  partition <- c1
  cl <- list()
  for (i in 1:k) cl[[i]] <- partition == i
  out <- list(result = c1, nc = max(c1), clusterlist = cl, partition = partition, 
        clustermethod = "randomkcentroids")
  out
}


# This computes a clustering starting from k centroids in which in each step
# the point that has minimum average distiance to an existing cluster is added to that cluster.
stupidkaven <- function(d,k){
  cdist <- as.matrix(d)
  n <- ncol(cdist)
  kcent <- sample(n,k)
  clustering <- rep(0,n)
  clustering[kcent] <- 1:k
  classified <- clustering>0
  repeat{
    cclustering  <-  clustering[clustering>0]
    topredict <- clustering==0
    cdistx <- cdist[topredict,classified,drop=FALSE]
#    print(str(cdistx))
    opt.ind <- minmaxd <- numeric(0)
    for (i in 1:k){
      cmk <- cdistx[,cclustering==i,drop=FALSE]
      maxd <- apply(cmk,1,mean)
      minmaxd[i] <- min(maxd)
      opt.ind[i] <- which.min(maxd)
#      opt.ind2[i] <- which(cmk==minmaxd[i],arr.ind=TRUE)[1,,drop=FALSE]
    }
    kopt <- which.min(minmaxd)
    clustering[topredict][opt.ind[kopt]] <- kopt
    classified <- clustering>0
    if(sum(classified)==n) break
  }
  clustering
}

stupidkavenCBI <- function(dmatrix,k){
  c1 <- stupidkaven(dmatrix,k)
  partition <- c1
  cl <- list()
  for (i in 1:k) cl[[i]] <- partition == i
  out <- list(result = c1, nc = max(c1), clusterlist = cl, partition = partition, 
        clustermethod = "randomkcentroids")
  out
}


# This prepares the output of cqcluster.stats for some other
# ways of plotting and printing, plot.clustatsum, print.clustatsum
# cbmethod: clustering method to be used for stability if useboot
clustatsum <- function(datadist=NULL,clustering,noisecluster=FALSE,
                       datanp=NULL,npstats=FALSE, useboot=FALSE,
                              bootclassif=NULL,
                              bootmethod="nselectboot",
                              bootruns=25, cbmethod=NULL,methodpars=NULL,
                       distmethod=NULL, dnnk=2,
                       pamcrit=TRUE,...){
  out <- list()
  if (is.null(datadist))
    datadist <- dist(datanp)
  outcstat <- cqcluster.stats(datadist,clustering,pamcrit=pamcrit,...)
#  out$sum <- summary(out$cstat)
  outsum <- summary(outcstat)
  out <- list()
  out$avewithin <- outsum$avewithin
  out$mnnd <- outsum$mnnd
  out$cvnnd <- outsum$cvnnd
  out$maxdiameter <- outsum$maxdiameter
  out$widestgap <- outsum$widestgap
  out$sindex <- outsum$sindex
  out$minsep <- outsum$minsep
  out$asw <-  outsum$asw
  out$dindex <- outsum$dindex
  out$denscut <- outsum$denscut
  out$highdgap <- outsum$highdgap
  out$pearsongamma <- outsum$pearsongamma
  out$withinss <- outsum$withinss
  out$entropy <- outsum$entropy
  if (pamcrit)
    out$pamc <- outsum$pamc
  if (npstats){
    outd <- distrsimilarity(datanp,clustering,nnk=dnnk,
                            noisecluster=noisecluster,
                            largeisgood=TRUE)
    out$kdnorm <- outd$kdnorm
    out$kdunif <- outd$kdunif
  }
  if (useboot){
    if (distmethod){
      if (bootmethod=="nselectboot"){
#        print(cbmethod)
        out$boot <- do.call(nselectboot,c(list(data=datadist,B=bootruns,
                                          distances=TRUE,
                              clustermethod=get(cbmethod),
                              classification=bootclassif,
                              krange=max(clustering),largeisgood=TRUE),
                              methodpars))$stabk[max(clustering)]
#        cat(cbmethod,"nselectboot end\n")
      }
      else
        out$boot <- do.call(prediction.strength,c(list(xdata=datadist,
                                                  M=bootruns,distances=TRUE,
                              clustermethod=get(cbmethod),
                              classification=bootclassif,
                              Gmin=max(clustering),
                              Gmax=max(clustering)),methodpars))$mean.pred[max(clustering)]
    }
    else{
      if (bootmethod=="nselectboot"){
#        print(cbmethod)
        out$boot <- do.call(nselectboot,c(list(data=datanp,B=bootruns,
                                          distances=FALSE,
                              clustermethod=get(cbmethod),
                              classification=bootclassif,
                              krange=max(clustering),largeisgood=TRUE),
                              methodpars))$stabk[max(clustering)]
#        cat(cbmethod,"nselectboot end\n")
      }
      else
        out$boot <- do.call(prediction.strength,c(list(xdata=datanp,M=bootruns,
                                                  distances=FALSE,
                              clustermethod=get(cbmethod),
                              classification=bootclassif,
                              Gmin=max(clustering),
                              Gmax=max(clustering)),methodpars))$mean.pred[max(clustering)]
    }
  } # if useboot
  out
}

    
# This applies stupidkcentroids, stupidknn, stupidkfn and stupidkaven
# lots of times to data.
randomclustersim <- function(datadist,datanp=NULL,npstats=FALSE,useboot=FALSE,
                              bootmethod="nselectboot",
                              bootruns=25, 
                      G,nnruns=100,kmruns=100,fnruns=100,avenruns=100,
                      nnk=4,dnnk=2,
                      pamcrit=TRUE, 
                      multicore=FALSE,cores=detectCores()-1,monitor=TRUE){
  nnsim <- function(k,g){
    if(monitor) cat(g," clusters; nn run ",k,"\n")
    solution <- data.frame(NA)
    names(solution) <- "avewithin"
    gcl <- stupidknn(datadist,g)
#    print(gcl)
    grs <- cqcluster.stats(datadist,gcl,nnk=nnk,pamcrit=pamcrit)
    grss <- summary(grs)
    solution$avewithin <- grss$avewithin
    solution$mnnd <- grss$mnnd
    solution$cvnnd <- grss$cvnnd
    solution$maxdiameter <- grss$maxdiameter
    solution$widestgap <- grss$widestgap
    solution$sindex <- grss$sindex
    solution$minsep <- grss$minsep
    solution$asw <-  grss$asw
    solution$dindex <- grss$dindex
    solution$denscut <- grss$denscut
    solution$highdgap <- grss$highdgap
    solution$pearsongamma <- grss$pearsongamma
    solution$withinss <- grss$withinss
    solution$entropy <- grss$entropy
    if (pamcrit)
      solution$pamc <- grss$pamc
    if (npstats){
      geysdist <- distrsimilarity(datanp,gcl,nnk=dnnk,largeisgood=TRUE)
      solution$kdnorm <- geysdist$kdnorm
      solution$kdunif <- geysdist$kdunif
    }
    if (useboot){
      if (bootmethod=="nselectboot")
        solution$boot <- nselectboot(datadist,B=bootruns,distances=TRUE,
                               clustermethod=stupidknnCBI,
                               classification="knn",
                               krange=g,largeisgood=TRUE)$stabk[g]
      else
        out$boot <- prediction.strength(datadist,M=bootruns,distances=TRUE,
                              clustermethod=stupidknnCBI,
                              classification="knn",
                              Gmin=g,
                              Gmax=g)$mean.pred[g]
    }
    solution
  }
  
  fnsim <- function(k,g){
    if(monitor) cat(g," clusters; fn run ",k,"\n")
#    print("start fnsim")
    solution <- data.frame(NA)
    names(solution) <- "avewithin"
    gcl <- stupidkfn(datadist,g)
#    print(gcl)
    grs <- cqcluster.stats(datadist,gcl,nnk=nnk,pamcrit=pamcrit)
    grss <- summary(grs)
    solution$avewithin <- grss$avewithin
    solution$mnnd <- grss$mnnd
    solution$cvnnd <- grss$cvnnd
    solution$maxdiameter <- grss$maxdiameter
    solution$widestgap <- grss$widestgap
    solution$sindex <- grss$sindex
    solution$minsep <- grss$minsep
    solution$asw <-  grss$asw
    solution$dindex <- grss$dindex
    solution$denscut <- grss$denscut
    solution$highdgap <- grss$highdgap
    solution$pearsongamma <- grss$pearsongamma
    solution$withinss <- grss$withinss
    solution$entropy <- grss$entropy
    if (pamcrit)
      solution$pamc <- grss$pamc
    if (npstats){
      geysdist <- distrsimilarity(datanp,gcl,nnk=dnnk,largeisgood=TRUE)
      solution$kdnorm <- geysdist$kdnorm
      solution$kdunif <- geysdist$kdunif
    }
    if (useboot){
#      print("boot")
      if (bootmethod=="nselectboot")
        solution$boot <- nselectboot(datadist,B=bootruns,distances=TRUE,
                               clustermethod=stupidkfnCBI,
                               classification="fn",
                               krange=g,largeisgood=TRUE)$stabk[g]
      else
        out$boot <- prediction.strength(datadist,M=bootruns,distances=TRUE,
                              clustermethod=stupidkfnCBI,
                              classification="fn",
                              Gmin=g,
                              Gmax=g)$mean.pred[g]
    }
#    print("end fnsim")
    solution
  }
  
  avensim <- function(k,g){
    if(monitor) cat(g," clusters; aven run ",k,"\n")
    solution <- data.frame(NA)
    names(solution) <- "avewithin"
    gcl <- stupidkaven(datadist,g)
#    print(gcl)
    grs <- cqcluster.stats(datadist,gcl,nnk=nnk,pamcrit=pamcrit)
    grss <- summary(grs)
    solution$avewithin <- grss$avewithin
    solution$mnnd <- grss$mnnd
    solution$cvnnd <- grss$cvnnd
    solution$maxdiameter <- grss$maxdiameter
    solution$widestgap <- grss$widestgap
    solution$sindex <- grss$sindex
    solution$minsep <- grss$minsep
    solution$asw <-  grss$asw
    solution$dindex <- grss$dindex
    solution$denscut <- grss$denscut
    solution$highdgap <- grss$highdgap
    solution$pearsongamma <- grss$pearsongamma
    solution$withinss <- grss$withinss
    solution$entropy <- grss$entropy
    if (pamcrit)
      solution$pamc <- grss$pamc
    if (npstats){
      geysdist <- distrsimilarity(datanp,gcl,nnk=dnnk,largeisgood=TRUE)
      solution$kdnorm <- geysdist$kdnorm
      solution$kdunif <- geysdist$kdunif
    }
    if (useboot){
      if (bootmethod=="nselectboot")
        solution$boot <- nselectboot(datadist,B=bootruns,distances=TRUE,
                               clustermethod=stupidkavenCBI,
                               classification="averagedist",
                               krange=g,largeisgood=TRUE)$stabk[g]
      else
        out$boot <- prediction.strength(datadist,M=bootruns,distances=TRUE,
                              clustermethod=stupidkavenCBI,
                              classification="averagedist",
                              Gmin=g,
                              Gmax=g)$mean.pred[g]
    }
     solution
  }
      
  kmsim <- function(k,g){
    if(monitor) cat(g," clusters; km run ",k,"\n")
    solution <- data.frame(NA)
    names(solution) <- "avewithin"
    gcl <- stupidkcentroids(datadist,g)$partition
#    print(gcl)
    grs <- cqcluster.stats(datadist,gcl,nnk=nnk,pamcrit=pamcrit)
    grss <- summary(grs)
    solution$avewithin <- grss$avewithin
    solution$mnnd <- grss$mnnd
    solution$cvnnd <- grss$cvnnd
    solution$maxdiameter <- grss$maxdiameter
    solution$widestgap <- grss$widestgap
    solution$sindex <- grss$sindex
    solution$minsep <- grss$minsep
    solution$asw <-  grss$asw
    solution$dindex <- grss$dindex
    solution$denscut <- grss$denscut
    solution$highdgap <- grss$highdgap
    solution$pearsongamma <- grss$pearsongamma
    solution$withinss <- grss$withinss
    solution$entropy <- grss$entropy
    if (pamcrit)
      solution$pamc <- grss$pamc
    if (npstats){
      geysdist <- distrsimilarity(datanp,gcl,nnk=dnnk,largeisgood=TRUE)
      solution$kdnorm <- geysdist$kdnorm
      solution$kdunif <- geysdist$kdunif
    }
    if (useboot){
      if (bootmethod=="nselectboot")
        solution$boot <- nselectboot(datadist,B=bootruns,distances=TRUE,
                               clustermethod=stupidkcentroidsCBI,
                               classification="centroid",
                               centroidname="centroids",
                               krange=g,largeisgood=TRUE)$stabk[g]
      else
        out$boot <- prediction.strength(datadist,M=bootruns,distances=TRUE,
                              clustermethod=stupidkcentroidsCBI,
                              classification="centroid",
                              centroidname="centroids",
                              Gmin=g,
                              Gmax=g)$mean.pred[g]
    }
     solution
  }
      
  out <- list()
  out$nn <- out$fn <- out$aven <- out$km <- list()
  out$nnruns <- nnruns
  out$fnruns <- fnruns
  out$avenruns <- avenruns
  out$kmruns <- kmruns
  for (g in G){
#    out$nn[[g]] <- out$km[[g]] <- data.frame()
    if (multicore){
      if (nnruns>0)
        out$nn[[g]] <- do.call(rbind,
                               mclapply(1:nnruns,nnsim,g=g,mc.cores=cores))
      if (fnruns>0)
        out$fn[[g]] <- do.call(rbind,
                               mclapply(1:fnruns,fnsim,g=g,mc.cores=cores))
      if (avenruns>0)
        out$aven[[g]] <- do.call(rbind,
                               mclapply(1:avenruns,avensim,g=g,mc.cores=cores))
      if(kmruns>0)
        out$km[[g]] <- do.call(rbind,
                               mclapply(1:kmruns,kmsim,g=g,mc.cores=cores))
    }
    else{
      if (nnruns>0)
        out$nn[[g]] <- do.call(rbind,lapply(1:nnruns,nnsim,g=g))
      if (fnruns>0)
        out$fn[[g]] <- do.call(rbind,lapply(1:fnruns,fnsim,g=g))
      if (avenruns>0)
        out$aven[[g]] <- do.call(rbind,lapply(1:avenruns,avensim,g=g))
      if (kmruns>0)
        out$km[[g]] <- do.call(rbind,lapply(1:kmruns,kmsim,g=g))
    }
  }
  out
}



######################################################
                                        # CBI-version
######################################################


# Standardise output of clustatsum by results of randomclustersim,
# to be used within clusterbenchstats
# percentage: quantile calibration method rather than sd
# useallmethods: use not only stupid clusterings
# useallg: Use all studpid clusterings with all numbers of clusters
# for calibration
# othernc: list of pairs of clustering method number and
# number of clusters larger than max(G).
# Only if useallg=TRUE, these are standardised as well.
# will add density mode index, 0.75*dindex+0.25*highdgap
cgrestandard <- function(clusum,clusim,G,percentage=FALSE,
                               useallmethods=FALSE,
                             useallg=FALSE, othernc=list()){

  qstan <- function(x,svec,nmethods){
#    print(x)
    ls <- length(svec[!is.na(svec)])
    out <- rep(NA,length(x))
    for (i in (1:nmethods)[!is.na(x)])
      out[i] <- (sum(x[i]>svec,na.rm=TRUE)+1)/(ls+1)
    out
  }
               
  sdstan <- function(x,svec){
    sm <- mean(svec,na.rm=TRUE)
    ssd <- sd(svec,na.rm=TRUE)
    (x-sm)/ssd
  }
    
  nmethods <- length(clusum$method)               
  statistics <- c("avewithin","mnnd","cvnnd","maxdiameter",
                  "widestgap","sindex","minsep","asw","dindex","denscut","highdgap",
                  "pearsongamma","withinss","entropy")
  if(!is.null(clusum[[1]][[min(G)]]$pamc))
     statistics <- c(statistics,"pamc")
  if(!is.null(clusum[[1]][[min(G)]]$kdnorm))
    statistics <- c(statistics,"kdnorm","kdunif")
  if(!is.null(clusum[[1]][[min(G)]]$boot))
    statistics <- c(statistics,"boot")

  out <- clusum
  lon <- length(othernc)
  for (st in statistics){
    if (useallg){
      svec <- numeric(0)
      sumvec <- list()
      for (g in G){
        sumvec[[g]] <- rep(NA,nmethods)
        for (i in 1:nmethods){
#          print(st)
#          print(g)
#          print(i)
          if (!identical(clusum[[i]][[g]],NA))
            sumvec[[g]][i] <- clusum[[i]][[g]][[st,exact=FALSE]]
          }
        if (clusim$kmruns>0)
          svec <- c(svec, clusim$km[[g]][[st,exact=FALSE]])
        if (clusim$nnruns>0)
          svec <- c(svec, clusim$nn[[g]][[st,exact=FALSE]])
        if (clusim$fnruns>0)
          svec <- c(svec, clusim$fn[[g]][[st,exact=FALSE]])
        if (clusim$avenruns>0)
          svec <- c(svec, clusim$aven[[g]][[st,exact=FALSE]])
        if (useallmethods)
          svec <- c(svec,sumvec[[g]])
      }
      for (g in G){
#        if(percentage){
#          print("cgrestandard")
#          print(str(sumvec))
#          print(str(svec))
#          print(nmethods)
#          rvec <- qstan(sumvec[[g]],svec,nmethods)
#        }
        if(percentage)
          rvec <- qstan(sumvec[[g]],svec,nmethods)
        else
          rvec <- sdstan(sumvec[[g]],svec)
        for (i in 1:nmethods)
          if (!identical(clusum[[i]][[g]],NA))
            out[[i]][[g]][st] <- rvec[i]
      }
      if (lon>0)
        for(j in 1:lon){
          i <- othernc[[j]][1]
          g <- othernc[[j]][2]
          if (g>1){
            if (percentage){
              ls <- length(svec[!is.na(svec)])
              out[[i]][[g]][st] <-
                (sum(clusum[[i]][[g]][[st,exact=FALSE]]>
                     svec,na.rm=TRUE)+1)/(ls+1)
            }
            else
              out[[i]][[g]][st] <-
                sdstan(clusum[[i]][[g]][[st,exact=FALSE]],svec)
          }
        }  
    } # if useallg
    else{
      for (g in G){
        svec <- numeric(0)
        sumvec <- rep(NA,nmethods)
#        browser()
        for (i in 1:nmethods){
#          print(st)
#          print(g)
#          print(i)
          if (!identical(clusum[[i]][[g]],NA))
            sumvec[i] <- clusum[[i]][[g]][[st,exact=FALSE]]
        }
        if (clusim$kmruns>0)
          svec <- c(svec, clusim$km[[g]][[st,exact=FALSE]])
        if (clusim$nnrun>0)
          svec <- c(svec, clusim$nn[[g]][[st,exact=FALSE]])
        if (clusim$fnruns>0)
          svec <- c(svec, clusim$fn[[g]][[st,exact=FALSE]])
        if (clusim$avenrun>0)
          svec <- c(svec, clusim$aven[[g]][[st,exact=FALSE]])
        if (useallmethods)
          svec <- c(svec,sumvec)
#        if(percentage){
#          print("cgres. not useallg")
#          print(str(sumvec))
#          print(str(svec))
#          print(nmethods)
#          rvec <- qstan(sumvec,svec,nmethods)
#        }
        if(percentage)
          rvec <- qstan(sumvec,svec,nmethods)
        else
          rvec <- sdstan(sumvec,svec)
        for (i in 1:nmethods)
          if (!identical(clusum[[i]][[g]],NA))
            out[[i]][[g]][st] <- rvec[i]
      } # for g
      if (lon>0)
        for(j in 1:lon){
          i <- othernc[[j]][1]
          g <- othernc[[j]][2]
          if (g>1)
              out[[i]][[g]][st] <- NA
        } 
    } # else (!useallg)
  } # for st
# Computation of dmode
  statistics <- c(statistics,"dmode")
#  print(nmethods)
  for (i in 1:nmethods){
    for (g in G){
      if (!identical(out[[i]][[g]],NA)){
#        print(i)
#        print(g)
#        print(out[[i]][[g]])
        out[[i]][[g]]$dmode <- 0.75*out[[i]][[g]]$dindex+0.25*out[[i]][[g]]$highdgap
      }
    }
  } # for i
  if (lon>0)
    for(j in 1:lon){
      i <- othernc[[j]][1]
      g <- othernc[[j]][2]
      if (g>1)
        out[[i]][[g]]$dmode <- 0.75*out[[i]][[g]]$dindex+0.25*out[[i]][[g]]$highdgap
    }
  class(out) <- "valstat"
  out
}
      




# clustermethod: string vector with CBI functions
# clustermethodpars: list of length length(clustermethod)
# that specifies parameters for all clustermethods
# distmethod: Will the method interpret input data as distances?
# ncinput: Does the method run with number of clusters as input?

# Note that different parameter choices of, e.g., dbscan, can be
# compared providing dbscanCBI several times as clustermethod
# with different clustermethodpars.
cluster.magazine <- function(data,G,diss = inherits(data, "dist"),
                             scaling=TRUE, clustermethod,
                             distmethod=rep(TRUE,length(clustermethod)),
                             ncinput=rep(TRUE,length(clustermethod)),
                             clustermethodpars,
#                             nstart=10,iter.max=100,
#                             nnk=0, emModelNames=NULL, mdsmethod="classical",
#                             mdsdim=4,
#                             summary.out=FALSE,points.out=FALSE,
#                             usepam=TRUE, samples=100,
#                             usepdf=TRUE, graphtype="pairs",
#                             lambda=c(0.1,0.01,0.001), 
                             trace=TRUE){
# scaling kmeans, linkage methods, pam/clara
# nstart kmeans    
# iter.max kmeans
# nnk mclust
# emModelNames mclust
# mdsmethod distnoisemclust
# mdsdim distnoisemclust 
# summary.out mclust
# points.out distnoisemclust
# samples clara
# graphtype pdfCluster  
# lambda pdfCluster, only effect if graphtype="pairs"    
  out <- list()
  out$output <- list()
  out$clustering <- list()
  out$noise <- list()
  out$othernc <- list()
  otherncc <- 1
#  out$method <- character(0)
#  else sdata <- data
  if(diss){
    if(all(distmethod))
      datadist <- as.dist(data)
    else
      stop("If the input is distance data, all methods must be distmethods.")
  }
  else{
    if (!identical(scaling, FALSE)) 
      sdata <- scale(data, center = TRUE, scale = scaling)
    else
      sdata <- data
    if (any(distmethod))
      datadist <- dist(sdata)
  }
  lmo <- length(clustermethod)
  for (i in 1:lmo){
    if (trace) print(clustermethod[i])
    out$output[[i]] <- out$clustering[[i]] <- out$noise[[i]] <- as.list(rep(NA,max(G)))
    clusterfunction <- get(clustermethod[i])
    if (ncinput[[i]]){
      for (g in G){
        cpars <- clustermethodpars[[i]]
        cpars$k <- g
        if (distmethod[i]){
          if (clustermethod[[i]] %in% c("disthclustCBI","distnoisemclustCBI",
                                        "disttrimkmeansCBI"))
            cpars$dmatrix <- datadist
          else
            cpars$data <- datadist
        }
        else
          cpars$data <- sdata
        out$output[[i]][[g]] <- do.call(clusterfunction, cpars)
        out$output[[i]][[g]]$clusterlist <- NULL
        out$clustering[[i]][[g]] <- out$output[[i]][[g]]$partition
        if (is.null(out$output[[i]][[g]]$nccl))
          out$noise[[i]][[g]] <- FALSE
        else{
          if (out$output[[i]][[g]]$nc>out$output[[i]][[g]]$nccl)
            out$noise[[i]][[g]] <- TRUE
          else
            out$noise[[i]][[g]] <- FALSE
        }
      }
    } # if ncinput
    else{
      cpars <- clustermethodpars[[i]]
      if (distmethod[i]){
        if (clustermethod[[i]] %in% c("disthclustCBI","distnoisemclustCBI",
                                      "disttrimkmeansCBI"))
          cpars$dmatrix <- datadist
        else
          cpars$data <- datadist
      }
      else
        cpars$data <- sdata
      coutput <- do.call(clusterfunction, cpars)
      coutput$clusterlist <- NULL
      if (is.null(coutput$nccl)){
        cnum <- coutput$nc
        out$noise[[i]][[cnum]] <- FALSE
      }
      else{
        cnum <- max(coutput$nccl,1)      
        if (coutput$nc>cnum)
          out$noise[[i]][[cnum]] <- TRUE
        else
          out$noise[[i]][[cnum]] <- FALSE
      }
      if (cnum>max(G) | cnum<min(G)){
        out$othernc[[otherncc]] <- c(i,cnum)
        otherncc <- otherncc+1
      }
      out$output[[i]][[cnum]] <- do.call(clusterfunction, cpars)
      out$clustering[[i]][[cnum]] <- out$output[[i]][[cnum]]$partition
    }
  }
  out  
}
# othernc will be a list of pairs of method number and number of clusters
# whenever the number of clusters is larger max(G)



# useallmethods, useallg: These are for restandardisation;
# useallmethods means that all method results are used, not only stupid
# clusterings; useallg: standardisation uses all values of G everywhere
# (not only for the same value)
# ncinput and others: see cluster.magazine.
# if ncinput=FALSE for some methods and number of clusters is
# estimated > max(G), see cgrestandard for the handling.
# This requires useallg=TRUE, otherwise these results are ignored
# for standardisation.
#
# useboot will involve a stability resampling method.
# bootclassif is a vector of classification methods to be used with
# the different clustering methods.
# bootmethod is either "nselectboot" or "prediction.strength"
# bootruns is the number of bootstrap replicates, all only
# active if useboot=TRUE.

# Help page: clustermethodpars last entry must be specified
# Mentions output list member statistics that doesn't exist
# (it's stat$statistics).
# Can't use methods like dbscan that don't take fixed nc with useboot!
clusterbenchstats <- function(data,G,diss = inherits(data, "dist"),
                                  scaling=TRUE, clustermethod,
                                  methodnames=clustermethod,
                              distmethod=rep(TRUE,length(clustermethod)),
                              ncinput=rep(TRUE,length(clustermethod)),
                              clustermethodpars,
#                              nstart=10,iter.max=100,
#                              nnk=0, emModelNames=NULL, mdsmethod="classical",
#                              mdsdim=4,summary.out=FALSE,points.out=FALSE,
#                              usepam=TRUE, samples=100,
                              npstats=FALSE,
                              useboot=FALSE,
                              bootclassif=NULL,
                              bootmethod="nselectboot",
                              bootruns=25,
#                              usepdf=TRUE, graphtype="pairs",
#                              lambda=c(0.1,0.01,0.001), 
                              trace=TRUE,
                              pamcrit=TRUE,snnk=2,
                              dnnk=2,
#                              noisecluster=FALSE,
                              nnruns=100,kmruns=100,fnruns=100,avenruns=100,
                              multicore=FALSE,cores=detectCores()-1,
                              useallmethods=TRUE,
                              useallg=FALSE,...){

  comsum <- function(i){
    if (trace)
      cat("comsum ",i,"\n")
    statl <- as.list(rep(NA,max(G)))
    for (g in G)
      if(!identical(out$cm$clustering[[i]][[g]],NA))
        statl[[g]] <- clustatsum(datadist,out$cm$clustering[[i]][[g]],
                                         noisecluster=out$cm$noise[[i]][[g]],
                                         datanp=data,npstats=npstats,
                                 useboot=useboot,
                                 bootclassif=bootclassif[i],
                                 bootmethod=bootmethod,
                                 bootruns=bootruns, cbmethod=clustermethod[i],
                                 methodpars=clustermethodpars[[i]],
                                 distmethod=distmethod[i],
                                 nnk=snnk,pamcrit=pamcrit,...)
    lon <- length(out$cm$othernc)
    if (lon>0)
      for (j in 1:lon)
        if(out$cm$othernc[[j]][1]==i){
          g <- out$cm$othernc[[j]][2]
          if (g>1)
            statl[[g]] <-
              clustatsum(datadist,out$cm$clustering[[i]][[g]],
                                         noisecluster=out$cm$noise[[i]][[g]],
                                         datanp=data,npstats=npstats,
                                 useboot=useboot,
                                 bootclassif=bootclassif[i],
                                 bootmethod=bootmethod,
                                 cbmethod=clustermethod[i],
                                 methodpars=clustermethodpars[[i]],
                                 bootruns=bootruns, 
                       nnk=snnk,pamcrit=pamcrit,...)
        }        
    statl
  }
   
  
  if(diss){
    datadist <- as.dist(data)
    data <- datadist
    datanp <- NULL
    if ("kmeansCBI" %in% clustermethod)
      warning("diss is TRUE, clusterbenchstats expects dissimilarities,
               but kmeansCBI doesn't operate on dissimilarities.
               kmeansCBI shouldn't be used with diss=TRUE")
    if ("noisemclustCBI" %in% clustermethod)
      warning("diss is TRUE, clusterbenchstats expects dissimilarities,
               but noisemclustCBI doesn't operate on dissimilarities.
               noisemclustCBI shouldn't be used with diss=TRUE")
    if ("tclustCBI" %in% clustermethod)
      warning("diss is TRUE, clusterbenchstats expects dissimilarities,
               but tclustCBI doesn't operate on dissimilarities.
               tclustCBI shouldn't be used with diss=TRUE")
    if ("mahalCBI" %in% clustermethod)
      warning("diss is TRUE, clusterbenchstats expects dissimilarities,
               but mahalCBI doesn't operate on dissimilarities.
               mahalCBI shouldn't be used with diss=TRUE")
    if ("speccCBI" %in% clustermethod)
      warning("diss is TRUE, clusterbenchstats expects dissimilarities,
               but speccCBI doesn't operate on dissimilarities.
               speccCBI shouldn't be used with diss=TRUE")
    if ("pdfclustCBI" %in% clustermethod)
      warning("diss is TRUE, clusterbenchstats expects dissimilarities,
               but pdfclustCBI doesn't operate on dissimilarities.
               pdfclustCBI shouldn't be used with diss=TRUE")
    if ("mergenormCBI" %in% clustermethod)
      warning("diss is TRUE, clusterbenchstats expects dissimilarities,
               but mergenormCBI doesn't operate on dissimilarities.
               mergenormCBI shouldn't be used with diss=TRUE")
    if ("hclusttreeCBI" %in% clustermethod)
      warning("diss is TRUE, clusterbenchstats expects dissimilarities,
               but hclusttreeCBI doesn't operate on dissimilarities.
               hclusttreeCBI shouldn't be used with diss=TRUE")
    if ("hclustCBI" %in% clustermethod)
      warning("diss is TRUE, clusterbenchstats expects dissimilarities,
               but hclustCBI doesn't operate on dissimilarities.
               With diss=TRUE use disthclustCBI!")
  }
  else{
    datanp <- data
    datadist <- dist(data)
  }
  out <- list()
  out$cm <- cluster.magazine(data,G=G,diss = diss,
                             scaling=scaling,
                             clustermethod=clustermethod,
                             distmethod=distmethod,
                             ncinput=ncinput,
                             clustermethodpars=clustermethodpars,
#                             nstart=nstart,iter.max=iter.max,
#                              nnk=nnk, emModelNames=emModelNames,
#                              mdsmethod=mdsmethod,
#                              mdsdim=mdsdim,summary.out=summary.out,
#                              points.out=points.out,
#                              usepam=usepam, samples=samples,
#                              usepdf=usepdf, graphtype=graphtype,
#                              lambda=lambda, 
                             trace=trace)
  nmethods <- length(clustermethod)
#  browser()
  if (trace)
    print("Computation of validity statistics")
  if (multicore)
    out$stat <- mclapply(1:nmethods,comsum,mc.cores=cores)
  else
    out$stat <- lapply(1:nmethods,comsum)
#  print(str(out$stat))
  out$stat$maxG <- max(G)
  out$stat$minG <- min(G)
  out$stat$method <-  clustermethod
  out$stat$name <-  methodnames
  class(out$stat) <- "valstat"
#  browser()
  if (trace)
    print("Simulation")
  out$sim <- randomclustersim(datadist,datanp,npstats=npstats,
                                 useboot=useboot,
                                 bootmethod=bootmethod,
                                 bootruns=bootruns, G=G,
                       nnruns=nnruns,kmruns=kmruns,fnruns=fnruns,
                       avenruns=avenruns,
                       nnk=snnk,dnnk=dnnk,pamcrit=pamcrit,
                       multicore=multicore,
                       cores=cores,monitor=trace)
  if (trace)
    print("Simulation quantile re-standardisation")
#  browser()
  out$qstat <- cgrestandard(out$stat,out$sim,G,percentage=TRUE,
                                useallmethods=useallmethods,useallg=useallg,
                                out$cm$othernc)
  if (trace)
    print("Simulation sd re-standardisation")
  out$sstat <- cgrestandard(out$stat,out$sim,G,percentage=FALSE,
                            useallmethods=useallmethods,useallg=useallg,
                                out$cm$othernc)
  ostatistics <- c("avewithin","mnnd","cvnnd","maxdiameter",
                  "widestgap","sindex","minsep","asw","dindex","denscut","highdgap",
                  "pearsongamma","withinss","entropy")
  if(pamcrit)
     ostatistics <- c(ostatistics,"pamc")
  if(npstats)
    ostatistics <- c(ostatistics,"kdnorm","kdunif")
  if(useboot)
    ostatistics <- c(ostatistics,"boot")
  ostatistics <- c(ostatistics,"dmode")
  out$stat$statistics <- out$qstat$statistics <- out$sstat$statistics <- ostatistics
  class(out) <- "clusterbenchstats"
  out
}
#> str(ds,max.level=1)
#List of 6
# $ cm        :List of 4
# $ stat      :List of 11
#  ..- attr(*, "class")= chr "clustatsum"
# $ sim       :List of 4
# $ qstat     :List of 11
#  ..- attr(*, "class")= chr "clustatsum"
# $ sstat     :List of 11
#  ..- attr(*, "class")= chr "clustatsum"
# $ statistics: chr [1:17] "avewithin" "mnnd" "cvnnd" "maxdiameter" ...

# out$cm has results of cluster.magazine
# > str(ds$cm,max.level=1)
#List of 4
# $ output    :List of 8
# $ clustering:List of 8
# $ noise     :List of 8
# $ othernc   :List of 1
# out$cm$output[[i]][[j]]: method i number of clusters j, CBI method output
# out$cm$clustering[[i]][[j]]: method i number of clusters j, clustering
# out$cm$noise[[i]][[j]]: method i number of clusters j is there noise?
# out$cm$othernc: numbers of clusters for methods that estimate this
# (list with methodsnumber, number of clusters) outside the specified range.

# out$stat has validity statistics
# out$stat[[i]][[j]]: method i number of clusters j, all statistics 
# out$stat$method has methods, out$stat$name has method names to be used for
# listing and plotting
# also out$stat$maxG and minG

# out$sim$km and out$sim$nn[[j]] have the stupid clustering results for j clusters
# There's also out$sim$kmruns and nnruns, number of runs

# out$qstat and out$sstat are organised like $stat, with quantile and
# stdev-standardised statistics


print.clusterbenchstats <- function(x,...){
  cat("Output object of clusterbenchstats.\n")
  cat("Clustering methods: ",x$stat$name," \n")
  cat("Cluster validation statistics: ",x$stat$statistics,"\n") 
  cat("Numbers of clusters minimum: ",x$stat$minG," maximum: ",
      x$stat$maxG,"\n")
  cat("Output components are cm, stat, sim, qstat, sstat.")
  cat("stat, qstat, and sstat are valstat-objects.")
  cat("Use plot.valstat and print.valstat on these to get more information.\n")
}


# include.othernc will overwrite xlim default.
# It should be clusterbenchoutput$cm$othernc otherwise. 
plot.valstat <- function(x,simobject=NULL,statistic="sindex",
                            xlim=NULL,ylim=c(0,1),
                            nmethods=length(x)-5,
                            col=1:nmethods,cex=1,pch=c("c","f","a","n"),
                            simcol=rep(grey(0.7),4),
                         shift=c(-0.1,-1/3,1/3,0.1),include.othernc=NULL,...){
  ion <- !is.null(include.othernc)
  if (ion) ion <- length(include.othernc)>0
  othernc <- numeric(0)
  q <- x$minG:x$maxG
  if (ion){
    for (i in 1:length(include.othernc))
      othernc[i] <- include.othernc[[i]][2]
    q <- sort(c(othernc,q))
  }
  if (is.null(xlim))
    xlim <- c(min(q)-0.5,max(q)+0.5)  
  plot(1,type="n",xlim=xlim,xlab="Number of clusters",
       ylim=ylim,ylab=statistic,xaxt="n")
  axis(1,at=q)
  if(!is.null(simobject)){
    for(g in x$minG:x$maxG){
      if(simobject$kmruns>0)
        points(rep(g+shift[1],simobject$kmruns),
               unlist(simobject$km[[g]][statistic]),
               pch=pch[1],col=simcol[1],cex=cex)
      if(simobject$nnruns>0)
        points(rep(g+shift[4],simobject$nnruns),
               unlist(simobject$nn[[g]][statistic]),
               pch=pch[4],col=simcol[4], cex=cex)
      if(simobject$fnruns>0)
        points(rep(g+shift[2],simobject$fnruns),
               unlist(simobject$fn[[g]][statistic]),
               pch=pch[2],col=simcol[2], cex=cex)
      if(simobject$avenruns>0)
        points(rep(g+shift[3],simobject$avenruns),
               unlist(simobject$aven[[g]][statistic]),
               pch=pch[3],col=simcol[3], cex=cex)
    }
  }    
  for(i in 1:nmethods)
    for(g in x$minG:x$maxG){
#      print(i)
#      print(g)
#      print(x[[i]][[g]])
      if(!identical(x[[i]][[g]],NA)){
#        points(g,x[[i]][[g]]$stats[statistic],pch=pch,col=col[i])
        text(g,x[[i]][[g]][statistic],labels=x$name[i],
             col=col[i],cex=cex)
      }
    }
    if (ion)
      for(h in 1:length(include.othernc)){
        mn <- include.othernc[[h]][1]
        ch <- include.othernc[[h]][2]
        if (ch>1)
          text(ch,x[[mn]][[ch]][statistic],labels=x$name[mn],
             col=col[mn],cex=cex)
      }        
}

print.valstat <- function(x,statistics=x$statistics,
                          nmethods=length(x)-5,aggregate=FALSE,
                          weights=NULL,digits=2,
                          include.othernc=NULL,...){

  q <- x$minG:x$maxG
  ion <- !is.null(include.othernc)
  if (ion) ion <- length(include.othernc)>0
  othernc <- numeric(0)
  if (ion){
    for (i in 1:length(include.othernc))
      othernc[i] <- include.othernc[[i]][2]
    q <- sort(c(othernc,q))
    q <- q[q>1]
  }
  lq <- length(q)
  if (aggregate){
    aggmatrix <- matrix(NA,nrow=nmethods,ncol=lq)
    for(j in 1:nmethods){
      for(i in 1:lq)
        if(length(x[[j]])>=q[i]){
          if(!identical(x[[j]][[q[i]]],NA)){
#            print(as.vector(as.matrix(x[[j]][[q[i]]])))
            x[[j]][[q[i]]]$aggregate <- weighted.mean(unlist(x[[j]][[q[i]]]),weights)
          }
        }
    }
    statistics <- c(statistics,"aggregate")
  } # if aggregate
  printobject <- list()
  l <- 1
  for(statistic in statistics){
    cat(statistic,"\n")
    printobject[[l]] <- data.frame(x$name)
    q <- x$minG:x$maxG
    ion <- !is.null(include.othernc)
    if (ion) ion <- length(include.othernc)>0
    othernc <- numeric(0)
    if (ion){
      for (i in 1:length(include.othernc))
        othernc[i] <- include.othernc[[i]][2]
      q <- sort(c(othernc,q))
      q <- q[q>1]
    }
    lq <- length(q)
    for(i in 1:lq){
      printobject[[l]][,i+1] <- rep(NA,nmethods)
      for (j in 1:nmethods){
          if(length(x[[j]])>=q[i])
            if(!identical(x[[j]][[q[i]]],NA)){
              if(is.null(unlist(x[[j]][[q[i]]][statistic])))
                printobject[[l]][j,i+1] <- NA
              else
                printobject[[l]][j,i+1] <- x[[j]][[q[i]]][statistic]
            } 
      } # for j
    } # for i
    names(printobject[[l]]) <- c("method",sapply(q,toString))
    print(printobject[[l]],digits=digits)
    l <- l+1
  } # for statistic
  invisible(printobject)
}

Try the fpc package in your browser

Any scripts or data that you put into this service are public.

fpc documentation built on Jan. 7, 2023, 1:13 a.m.