R/dendrogram.R

Defines functions dendrogram

Documented in dendrogram

#' @title rDendrogram
#' @description Blank Description.
#' @param c Corpus Data
#' @param n Name
#' Defaults to TRUE.
#' @keywords package
#' @return NULL
#' @export
#' @examples dendrogram(nk.corpus, nk.name)

dendrogram <- function(c, n) {

if('tm' %in% rownames(installed.packages()) == TRUE) {
require(tm)} else {
install.packages("tm", repos = "http://cran.us.r-project.org")	
require(tm)}

if('ggplot2' %in% rownames(installed.packages()) == TRUE) {
require(ggplot2)} else {
install.packages("ggplot2", repos = "http://cran.us.r-project.org")	
require(ggplot2)}
	
if('ggdendro' %in% rownames(installed.packages()) == TRUE) {
require(ggdendro)} else {
install.packages("ggdendro", repos = "http://cran.us.r-project.org")	
require(ggdendro)}

# Make New Data from Input

nk.corpus <- c
nk.name <- n

# Format Corpus for Dendrograms
dendro.tdm <- TermDocumentMatrix(nk.corpus, control = list(wordLengths=c(1, Inf))) 
# Remove Sparse Terms
desparse <- removeSparseTerms(dendro.tdm, sparse = 0.95)
dendromatrix <- as.matrix(desparse)
# Hierarchical Clustering
distMatrix <- dist(scale(dendromatrix))
fit <- hclust(distMatrix, method ="ward.D")

dendrogram <- function(fit, k = 5) {
hcdata <- ggdendro::dendro_data(hc, type = "rectangle")
seg <- hcdata$segments
labclust <- cutree(hc, k)[hc$order]
segclust <- rep(0L, nrow(seg))
heights <- sort(hc$height, decreasing = TRUE)
height <- mean(c(heights[k], heights[k - 1L]), na.rm = TRUE)
for (i in 1:k) {
	xi <- hcdata$labels$x[labclust == i]
	idx1 <- seg$x >= min(xi) & seg$x <= max(xi)
	idx2 <- seg$xend >= min(xi) & seg$xend <= max(xi)
	idx3 <- seg$yend < height
	idx <- idx1 & idx2 & idx3
	segclust[idx] <- i}
idx <- which(segclust == 0L)
segclust[idx] <- segclust[idx + 1L]
hcdata$segments$clust <- segclust
hcdata$segments$line <- as.integer(segclust < 1L)
hcdata$labels$clust <- labclust
hcdata}
set_labels_params <- function(nbLabels, direction = c("tb", "bt", "lr", "rl"), fan = FALSE) {
if (fan) {angle <- 360 / nbLabels * 1:nbLabels + 90
	idx <- angle >= 90 & angle <= 270
	angle[idx] <- angle[idx] + 180
	hjust <- rep(0, nbLabels)
	hjust[idx] <- 1
} else {
	angle <- rep(0, nbLabels)
	hjust <- 0
	if (direction %in% c("tb", "bt")) { angle <- angle + 45 }
	if (direction %in% c("tb", "rl")) { hjust <- 1 }}
list(angle = angle, hjust = hjust, vjust = 0.5)}
plot_ggdendro <- function(hcdata, direction = c("lr", "rl", "tb", "bt"),
	fan = FALSE, scale.color = NULL, branch.size = 1, 
	label.size = 3, nudge.label = 0.01, expand.y = 0.1) {
		direction <- match.arg(direction) # if fan = FALSE
		ybreaks <- pretty(segment(hcdata)$y, n = 5)
		ymax <- max(segment(hcdata)$y)
## branches
p <- ggplot() + geom_segment(data = segment(hcdata),
	aes(x = x, y = y, xend = xend, yend = yend, 
	linetype = factor(line), colour = factor(clust)), 
	lineend = "round", show.legend = FALSE, size = branch.size)
## orientation
if (fan) {p <- p + coord_polar(direction = -1) +
	scale_x_continuous(breaks = NULL, limits = c(0, nrow(label(hcdata)))) +
	scale_y_reverse(breaks = ybreaks)
} else {p <- p + scale_x_continuous(breaks = NULL)
	if (direction %in% c("rl", "lr")) {p <- p + coord_flip()}
if (direction %in% c("bt", "lr")) {p <- p + scale_y_reverse(breaks = ybreaks)
} else {p <- p + scale_y_continuous(breaks = ybreaks) 
nudge.label <- -(nudge.label)}}
# labels
labelParams <- set_labels_params(nrow(hcdata$labels), direction, fan)
hcdata$labels$angle <- labelParams$angle
p <- p + geom_text(data = label(hcdata),
aes(x = x, y = y, label = label, colour = factor(clust),
angle = angle), vjust = labelParams$vjust, hjust = labelParams$hjust,
nudge_y = ymax * nudge.label, size = label.size, show.legend = FALSE)
# colors and limits
if (!is.null(scale.color)) {
p <- p + scale_color_manual(values = scale.color)}
ylim <- -round(ymax * expand.y, 1)
p <- p + expand_limits(y = ylim)
p}

mtc <- scale(dendromatrix)
D <- dist(mtc)
hc <- hclust(D)

hcdata <- dendrogram(hc, 5)

dendrogram.plot <<- plot_ggdendro(hcdata, direction = "lr", expand.y = 0.2) + 
	labs(title = paste0("Dendrogram for Query ", nk.name), y = element_blank(), x = element_blank())
dendrogram.plot

return(dendrogram.plot)

}
sabalicodev/sabali documentation built on Jan. 13, 2020, 2:22 p.m.