R/SomParam.R

Defines functions SomParam

Documented in SomParam

#' Clustering with self-organizing maps
#'
#' Use the self-organizing map implementation in the \pkg{kohonen} package to cluster observations into the specified number of nodes.
#' Note that this requires the installation of the \pkg{kohonen} package.
#'
#' @param centers An integer scalar specifying the number of centers.
#' Alternatively, a function that takes the number of observations and returns the number of centers.
#' @inheritParams clusterRows
#' @param dim.ratio A positive numeric scalar in specifying how \code{centers} should be distributed between the \code{x} and \code{y} dimensions.
#' Defaults to equal distribution, i.e., both dimensions will be of length equal to the square root of \code{centers}.
#' Values above 1 will distribute more nodes to \code{x} while values below 1 will distribute mode nodes to \code{y}.
#' @param topo,neighbourhood.fct,toroidal
#' Further arguments to pass to the \code{\link[kohonen]{somgrid}} function in the \pkg{kohonen} package.
#' @param rlen,alpha,radius,dist.fct
#' Further arguments to pass to the \code{\link[kohonen]{som}} function in the \pkg{kohonen} package.
#' @param BLUSPARAM A \linkS4class{SomParam} object.
#' @param full Logical scalar indicating whether the full SOM statistics should be returned.
#' 
#' @details
#' This class usually requires the user to specify the number of clusters beforehand.
#' However, we can also allow the number of clusters to vary as a function of the number of observations.
#' The latter is occasionally useful, e.g., to allow the clustering to automatically become more granular for large datasets.
#'
#' Note that the final number of clusters may not be exactly equal to \code{centers}, depending on how \code{dim.ratio} is specified.
#' For example, if \code{centers} is a perfect square and \code{dim.ratio=1}, we will get exactly the requested number of points.
#'
#' To modify an existing SomParam object \code{x},
#' users can simply call \code{x[[i]]} or \code{x[[i]] <- value} where \code{i} is any argument used in the constructor.
#'
#' For \code{radius}, a value of \code{NULL} means that the default argument in the \code{\link[kohonen]{som}} function signature is used.
#' This is are data-dependent and so cannot be specified during construction of the SomParam object.
#'
#' For \code{dist.fct}, users can specify any string that can be used in the \code{dist.fcts} arguments in \code{\link[kohonen]{som}}.
#' In practice, the only real alternative is \code{"manhattan"}.
#'
#' @return
#' The \code{SomParam} constructor will return a \linkS4class{SomParam} object with the specified parameters.
#'
#' The \code{clusterRows} method will return a factor of length equal to \code{nrow(x)} containing the cluster assignments.
#' If \code{full=TRUE}, a list is returned with \code{clusters} (the factor, as above) and \code{objects}
#' (a list containing \code{som}, the direct output of \code{\link[kohonen]{som}}).
#'
#' @author Aaron Lun
#' @examples
#' clusterRows(iris[,1:4], SomParam(centers=16))
#' clusterRows(iris[,1:4], SomParam(centers=12, dim.ratio=3/4))
#'
#' @seealso
#' \code{\link[kohonen]{som}} from the \pkg{kohonen} package, which does all of the heavy lifting.
#'
#' \linkS4class{FixedNumberParam}, the parent of the SomParam class.
#' @name SomParam-class
#' @docType class
#' @aliases 
#' show,SomParam-method
NULL

#' @export
#' @rdname SomParam-class
setClass("SomParam", contains="FixedNumberParam", slots=c(
    dim.ratio="numeric", 
    topo="character",
    neighbourhood.fct="character",
    toroidal="logical",
    rlen="integer", 
    alpha="numeric",
    radius="numeric_OR_NULL",
    dist.fct="character"
))

setValidity2("SomParam", function(object) {
    msg <- character(0)

    msg <- c(msg, .check_positive_slots(object, c("dim.ratio", "rlen"))) 

    val <- object@alpha
    if (length(val)!=2 || !all(is.finite(val)) || !all(val > 0)) {
        msg <- c(msg, "'alpha' must be a positive numeric vector of length 2")
    }

    val <- object@radius
    if (!is.null(val)) {
        if (length(val)!=2 || !all(is.finite(val)) || !all(val > 0)) {
            msg <- c(msg, "'radius' must be a positive numeric vector of length 2")
        }
    }

    msg <- c(msg, .check_nonna_slots(object, c("dist.fct", "topo", "neighbourhood.fct", "toroidal")))

    if (length(msg)) {
        return(msg)
    }
    TRUE
})

#' @export
setMethod("show", "SomParam", function(object) {
    callNextMethod()

    for (i in c("dim.ratio", "topo", "neighbourhood.fct", "toroidal", "rlen")) {
        cat(sprintf("%s: %s\n", i, slot(object, i)))
    }

    cat(sprintf("alpha: %s\n", paste(object@alpha, collapse=" ")))
    cat(sprintf("radius: %s\n", if (is.null(object@radius)) "default" else paste(object@radius, collapse=" ")))
    cat(sprintf("dist.fct: %s\n", object@dist.fct))
})

#' @export
#' @rdname SomParam-class
SomParam <- function(centers, 
    dim.ratio = 1,
    topo = "rectangular",
    neighbourhood.fct = "bubble",
    toroidal = FALSE,
    rlen = 100,
    alpha = c(0.05, 0.01),
    radius = NULL,
    dist.fct = "sumofsquares")
{
    if (!is.function(centers)) {
        centers <- as.integer(centers)
    }
    new("SomParam", 
        centers=centers, 
        dim.ratio=dim.ratio,
        topo=topo,
        neighbourhood.fct=neighbourhood.fct,
        toroidal=toroidal,
        rlen=as.integer(rlen),
        alpha=alpha,
        radius=radius,
        dist.fct=dist.fct
    )
}

#' @export
#' @rdname SomParam-class
setMethod("clusterRows", c("ANY", "SomParam"), function(x, BLUSPARAM, full=FALSE) {
    centerx <- centers(BLUSPARAM, n=nrow(x))
    dim.ratio <- BLUSPARAM[["dim.ratio"]]
    xdim <- max(1, round(sqrt(centerx * dim.ratio)))
    ydim <- max(1, round(sqrt(centerx / dim.ratio)))

    args <- list(
        X = as.matrix(x),
        grid = kohonen::somgrid(
            xdim = xdim,
            ydim = ydim,
            topo = BLUSPARAM@topo,
            neighbourhood.fct = BLUSPARAM@neighbourhood.fct,
            toroidal = BLUSPARAM@toroidal
        ),
        rlen = BLUSPARAM@rlen,
        alpha = BLUSPARAM@alpha,
        dist.fcts = list(BLUSPARAM@dist.fct)
    )

    if (!is.null(BLUSPARAM[["radius"]])) {
        args$radius <- BLUSPARAM[["radius"]]
    }

    stats <- do.call(kohonen::som, args)
    clusters <- factor(stats$unit.classif)

    if (full) {
        list(clusters=clusters, objects=list(som=stats))
    } else {
        clusters 
    }
})
LTLA/bluster documentation built on Aug. 20, 2023, 5:39 a.m.