tests/clara-NAs.R

library(cluster)

x <- cbind(c(0, -4, -22, -14, 0, NA, -28, 1, 10, -1,
             100 + c(13, 0, 2, 4, 7, 8, 1)),
           c(-5, -14, NA, -35, -30, NA, 7, 2, -18, 13,
             47, 64, 48, NA, NA, 44, 65))
x
(d <- dist(x,'manhattan'))
summary(d, na.rm = TRUE) # max = 270
## First call with "trace" (seg.fault typically later ...):
try( clara(x, k=2, metric="manhattan", sampsize=10, trace = 3) )
## Originally:already shows the problem:  nbest[] = c(0,0,...,0) must be WRONG!!
## Now: gives the proper error message.

## S-plus 6.1.2 (rel.2 for Linux, 2002) gives
##> cc <- clara(x, k=2, metric="manhattan", samples=2, sampsize=10)
## Problem in .Fortran("clara",: Internal error: data for decrementing
## ref.count didn't point to a valid arena (0x0), while calling subroutine clara

## The large example from  clara.R -- made small enough to still provoke
## the    "** dysta2() ...  OUT"  problem  {no longer!}
x <- matrix(c(0, 3, -4, 62, 1, 3, -7, 45, 36, 46, 45, 54, -10,
              51, 49, -5, 13, -6, 49, 52, 57, 39, -1, 55, 68, -3, 51, 11, NA,
              9, -3, 50, NA, 58, 9, 52, 12, NA, 47, -12, -6, -9, 5, 30, 38,
              54, -5, 39, 50, 50, 54, 43, 7, 64, 55, 4, 0, 72, 54, 37, 59,
              -1, 8, 43, 50, -2, 56, -8, 43, 6, 4, 48, -2, 14, 45, 49, 56,
              51, 45, 11, 10, 42, 50, 2, -12, 3, 1, 2, 2, -14, -4, 8, 0, 3,
              -11, 8, 5, 14, -1, 9, 0, 19, 10, -2, -9, 9, 2, 16, 10, 4, 1,
              12, 7, -4, 27, -8, -9, -9, 2, 8, NA, 13, -23, -3, -5, 1, 15,
              -3, 5, -9, -5, 14, 8, 7, -4, 26, 20, 10, 8, 17, 4, 14, 23, -2,
              23, 2, 16, 5, 5, -3, 12, 5, 14, -2, 4, 2, -2, 7, 9, 1, -15, -1,
              9, 23, 1, 7, 13, 2, -11, 16, 12, -11, -14, 2, 6, -8),
            ncol = 2)
str(x) # 88 x 2
try(clara(x, 2, samples = 20, trace = 3))# 2nd sample did show dysta2() problem
## To see error message for > 1 missing:
try(clara(rbind(NA,x), 2))

x <- x[-33,]
## still had the ** dysta2() .. OUT" problem {no longer!}
c2  <- clara(x, 2, samples = 12, trace = 3)
c2. <- clara(x, 2, samples = 12, trace = 1, correct.d=TRUE)
p2g <- pam(daisy(x,"gower"), k=2, trace = 3)
if(FALSE) { ## disabled clara(*, "gower") for now (2023-11-30):
c2g <- clara(x, 2, samples = 12, sampsize=nrow(x), trace = 2, metric = "gower", pamLike=TRUE, correct.d=TRUE)
(icall <- which(names(c2) == "call"))
## c2g and p2g  are *quite* different !
table(c2g$clustering,
      p2g$clustering)
##    1  2
## 1 40 32
## 2 15  0  <<  not *one* pair of  {2,2}   !?!

stopifnot(exprs = {
    all.equal(c2[-icall], c2.[-icall])
})
}# no "gower" for now

data(xclara)
suppressWarnings(RNGversion("3.5.0")) # back compatibility of results
set.seed(123)
xclara[sample(nrow(xclara), 50),] <- NA
try( clara(xclara, k = 3) ) #-> "nice" error message  {.. first 12 missing obs} :
## Error in clara(xclara, k = 3) :
##   50 observations (74,126,137,308,411,423,438,451,642,686,689,735 ...) have *only* NAs
##     --> omit them for clustering!

Try the cluster package in your browser

Any scripts or data that you put into this service are public.

cluster documentation built on April 3, 2025, 10:33 p.m.