#### $Id: agnes.q 6801 2014-09-04 15:47:41Z maechler $
agnes <- function(x, diss = inherits(x, "dist"), metric = "euclidean",
stand = FALSE, method = "average", par.method,
keep.diss = n < 100, keep.data = !diss, trace.lev = 0)
{
METHODS <- c("average", "single","complete", "ward","weighted", "flexible", "gaverage")
## hclust has more; 1 2 3 4 5 6 7
meth <- pmatch(method, METHODS)
if(is.na(meth)) stop("invalid clustering method")
if(meth == -1) stop("ambiguous clustering method")
cl. <- match.call()
method <- METHODS[meth]
if(method == "flexible") {
## Lance-Williams formula (but *constant* coefficients):
stopifnot((np <- length(a <- as.numeric(par.method))) >= 1)
attr(method,"par") <- par.method <-
if(np == 1)## default (a1= a, a2= a, b= 1-2a, c = 0)
c(a, a, 1-2*a, 0)
else if(np == 3)
c(a, 0)
else if(np == 4) a
else stop("'par.method' must be of length 1, 3, or 4")
} else if (method == "gaverage") {
attr(method,"par") <- par.method <- if (missing(par.method)) {
## Default par.method: Using beta = -0.1 as advised in Belbin et al. (1992)
beta <- -0.1
c(1-beta, 1-beta, beta, 0)
} else {
stopifnot((np <- length(b <- as.numeric(par.method))) >= 1)
if(np == 1)## default (a1= 1-b, a2= 1-b, b= b, c= 0)
c(1-b, 1-b, b, 0)
else if(np == 3)
c(b, 0)
else if(np == 4) b
else stop("'par.method' must be of length 1, 3, or 4")
}
} else ## dummy (passed to C)
par.method <- double()
if((diss <- as.logical(diss))) {
## check type of input vector
if(any(is.na(x))) stop("NA-values in the dissimilarity matrix not allowed.")
if(data.class(x) != "dissimilarity") { # try to convert to
if(!is.null(dim(x))) {
x <- as.dist(x) # or give an error
} else {
## possibly convert input *vector*
if(!is.numeric(x) || is.na(n <- sizeDiss(x)))
stop("'x' is not and cannot be converted to class \"dissimilarity\"")
attr(x, "Size") <- n
}
class(x) <- dissiCl
if(is.null(attr(x,"Metric"))) attr(x, "Metric") <- "unspecified"
}
n <- attr(x, "Size")
dv <- x[lower.to.upper.tri.inds(n)]
## prepare arguments for the Fortran call
dv <- c(0., dv)# "double", 1st elem. "only for Fortran" (?)
jp <- 1
mdata <- FALSE
ndyst <- 0
x2 <- double(1)
}
else {
## check input matrix and standardize, if necessary
x <- data.matrix(x)
if(!is.numeric(x)) stop("x is not a numeric dataframe or matrix.")
x2 <- if(stand) scale(x, scale = apply(x, 2, meanabsdev)) else x
storage.mode(x2) <- "double"
ndyst <- if(metric == "manhattan") 2 else 1
n <- nrow(x2)
jp <- ncol(x2)
if((mdata <- any(inax <- is.na(x2)))) { # TRUE if x[] has any NAs
jtmd <- as.integer(ifelse(apply(inax, 2, any), -1, 1))
## VALue for MISsing DATa
valmisdat <- 1.1* max(abs(range(x2, na.rm=TRUE)))
x2[inax] <- valmisdat
valmd <- rep(valmisdat, jp)
}
dv <- double(1 + (n * (n - 1))/2)
}
if(n <= 1) stop("need at least 2 objects to cluster")
stopifnot(length(trace.lev <- as.integer(trace.lev)) == 1)
C.keep.diss <- keep.diss && !diss
res <- .C(twins,
as.integer(n),
as.integer(jp),
x2,
dv,
dis = double(if(C.keep.diss) length(dv) else 1),
jdyss = if(C.keep.diss) diss + 10L else as.integer(diss),
if(mdata) valmd else double(1),
if(mdata) jtmd else integer(jp),
as.integer(ndyst),
1L,# jalg = 1 <==> AGNES
meth,# integer
integer(n),
ner = integer(n),
ban = double(n),
ac = double(1),
par.method,
merge = matrix(0L, n - 1, 2), # integer
trace = trace.lev,
DUP = FALSE)
if(!diss) {
##give warning if some dissimilarities are missing.
if(res$jdyss == -1)
stop("No clustering performed, NA-values in the dissimilarity matrix.\n" )
if(keep.diss) {
## adapt Fortran output to S:
## convert lower matrix,read by rows, to upper matrix, read by rows.
disv <- res$dis[-1]
disv[disv == -1] <- NA
disv <- disv[upper.to.lower.tri.inds(n)]
class(disv) <- dissiCl
attr(disv, "Size") <- nrow(x)
attr(disv, "Metric") <- metric
attr(disv, "Labels") <- dimnames(x)[[1]]
}
##add labels to Fortran output
if(length(dimnames(x)[[1]]) != 0)
order.lab <- dimnames(x)[[1]][res$ner]
}
else {
if(keep.diss) disv <- x
##add labels to Fortran output
if(length(attr(x, "Labels")) != 0)
order.lab <- attr(x, "Labels")[res$ner]
}
clustering <- list(order = res$ner, height = res$ban[-1], ac = res$ac,
merge = res$merge, diss = if(keep.diss)disv,
call = cl., method = METHODS[meth])
if(exists("order.lab"))
clustering$order.lab <- order.lab
if(keep.data && !diss) {
if(mdata) x2[x2 == valmisdat] <- NA
clustering$data <- x2
}
class(clustering) <- c("agnes", "twins")
clustering
}
summary.agnes <- function(object, ...)
{
class(object) <- "summary.agnes"
object
}
print.agnes <- function(x, ...)
{
cat("Call: ", deparse(x$call),
"\nAgglomerative coefficient: ", format(x$ac, ...),
"\nOrder of objects:\n")
print(if(length(x$order.lab) != 0) x$order.lab else x$order,
quote = FALSE, ...)
cat("Height (summary):\n"); print(summary(x$height), ...)
cat("\nAvailable components:\n"); print(names(x), ...)
invisible(x)
}
print.summary.agnes <- function(x, ...)
{
## a bit more than print.agnes() ..
cat("Object of class 'agnes' from call:\n", deparse(x$call),
"\nAgglomerative coefficient: ", format(x$ac, ...),
"\nOrder of objects:\n")
print(if(length(x$order.lab) != 0) x$order.lab else x$order,
quote = FALSE, ...)
cat("Merge:\n"); print(x$merge, ...)
cat("Height:\n"); print(x$height, ...)
if(!is.null(x$diss)) { ## Dissimilarities:
cat("\n"); print(summary(x$diss, ...))
}
cat("\nAvailable components:\n"); print(names(x), ...)
invisible(x)
}
as.dendrogram.twins <- function(object, ...) ## ... : really only 'hang'
as.dendrogram(as.hclust(object), ...)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.