R/hierarchical_clustering.R

Defines functions HCplot HCtbl

Documented in HCplot HCtbl

#' Hierarchical clustering of spectral similarity data
#'
#' \code{HCtbl()} performs hierarchical clustering
#' of spectral similarity data using average linkage
#' as agglomeration criterion.
#'
#' @param distmat A distance matrix as generated by
#' \code{\link{distanceMatrix}}.
#'
#' @param h Height where the tree is to be cut, defaults to \code{0.95}.
#' See \code{\link[stats]{cutree}} for details.
#'
#' @return A \code{data.frame} with name and cluster ID for each
#' feature in \code{distmat}.
#'
#' @seealso \code{\link{HCplot}}
#'
#' @import stats
#'
#' @examples
#' load(file = system.file("extdata",
#'     "distmat.RData",
#'     package = "CluMSIDdata"))
#'
#' my_HCtbl <- HCtbl(distmat[1:50,1:50], h = 0.8)
#'
#' @export
HCtbl <- function(distmat, h = 0.95){
    clust <- stats::hclust(stats::as.dist(distmat), method = "average")
    hclusttree <- stats::cutree(clust, h = h)
    hclustmat <- data.frame(feature = names(hclusttree),
                            cluster_ID = unname(hclusttree))

    return(hclustmat)
}

#' Generate cluster dendrogram or heatmap from spectral similarity data
#'
#' \code{HCplot()} performs hierarchical clustering
#' of spectral similarity data using average linkage
#' as agglomeration criterion like \code{\link{HCtbl}}
#' and generates either a circular dendrogram or a
#' combination of dendrogram and heatmap.
#'
#' @inheritParams HCtbl
#'
#' @param type Specifies which visualisation is to be generated:
#' \code{"dendrogram"} (default) for a circular dendrogram or
#' \code{"heatmap"} for a combination of dendrogram and heatmap.
#'
#' @param ... Additional graphical parameters passed to
#' \code{plot.phylo} (for \code{type = "dendrogram"})
#' or \code{gplots::heatmap.2} (for \code{type = "heatmap"})
#'
#' @return A plot as specified by \code{type}.
#'
#' @importFrom gplots heatmap.2
#' @importFrom RColorBrewer brewer.pal
#' @importFrom graphics plot
#' @importFrom ape as.phylo
#'
#' @import stats
#'
#' @examples
#' load(file = system.file("extdata",
#'     "distmat.RData",
#'     package = "CluMSIDdata"))
#'
#' HCplot(distmat[1:50,1:50], h = 0.8, type = "heatmap")
#'
#' @export
HCplot <- function(distmat, h = 0.95, type = c("dendrogram", "heatmap"), ...){
    clust <- stats::hclust(stats::as.dist(distmat), method = "average")
    hclusttree <- stats::cutree(clust, h = h)
    hclustmat <- cbind(names(hclusttree), hclusttree)
    clustorder <- hclusttree[clust$order]

    nc <- round(max(hclusttree)/8)

    type <- match.arg(type)
    if(type == "heatmap") {
        params <- list(...)
        params$Rowv <- rev(stats::as.dendrogram(clust))
        params$Colv <- stats::as.dendrogram(clust)
        params$distfun <- list(NULL)
        params$key.xlab <- "1 - spectral similarity"
        params$trace <- "none"
        if(!("margins" %in% names(params))) params$margins <- c(16,16)
        if(!("breaks" %in% names(params))) params$breaks <- seq(0,1,0.01)
        do.call(gplots::heatmap.2, append(list(x = distmat), params))
    } else if(type == "dendrogram") {
        clr <- RColorBrewer::brewer.pal(n = 8, name = "Dark2")
        params <- list(...)
        params$tip.color <- rep(clr, nc)[clustorder][hclusttree]
        params$type <- "fan"
        if(!("cex" %in% names(params))) params$cex <- 0.7
        if(!("no.margin" %in% names(params))) params$no.margin <- TRUE
        do.call(graphics::plot, append(list(x = ape::as.phylo(clust)), params))
    } else stop("'type' muste be either 'dendrogram' (default) or 'heatmap'.")
}

Try the CluMSID package in your browser

Any scripts or data that you put into this service are public.

CluMSID documentation built on Nov. 8, 2020, 7:46 p.m.