#' Build the hierarchical clustering tree.
#'
#' Hierarchical clustering with Fisher's test p-values as distance matrix.
#' Also add feature coverage information for each node in the tree.
#' @param pinmat The incidence table generated by \code{calc_pinmat}.
#' @param mat_fdr The FDR matrix generated by \code{fisher_fdr}
#' @param mat_dist The dissmilarity based on Fisher's test p-values for
#' hierarchical clustering.
#' @param hcmethod Default: average
#' @return A hclust objects with new items added.
#' @export
hclust_tree <- function(pinmat, mat_fdr, mat_dist, hcmethod = "average"){
# pinmat <- pinmat[rowSums(pinmat)<ncol(pinmat),,drop = F]
# Grow a tree and add multiple items to the standard hclust object
hc <- hclust(as.dist(mat_dist), method = hcmethod)
# Leaf indices for each node, in the order of the original labels
leaflist <- vector(mode = "list", length = nrow(hc$merge))
# Leaf lables for the node
labellist <- vector(mode = "list",length = nrow(hc$merge))
# Maximal pairwise FDR anywhere in the node
mergefdr <- rep(NA, nrow(hc$merge))
# Mean FDR for the node
meanfdr<-rep(NA, nrow(hc$merge))
# Number of leaves in the node
nodesize <- rep(NA, nrow(hc$merge))
# For each node and each feature(pin) determine the fraction of leaves in the
# node with the feature
sharing <- matrix(NA, nrow = nrow(pinmat), ncol = nrow(hc$merge))
# Mean number of features per leaf in a node
complexity <- rep(NA, nrow(hc$merge))
for(i in 1:nrow(hc$merge)){
if(hc$merge[i, 1] < 0){
leaflist[[i]] <- (-hc$merge[i,1])}
else{
leaflist[[i]] <- leaflist[[hc$merge[i,1]]]}
if(hc$merge[i, 2] < 0){
leaflist[[i]] <- c(leaflist[[i]], (-hc$merge[i,2]))}
else{
leaflist[[i]] <- c(leaflist[[i]], leaflist[[hc$merge[i,2]]])}
labellist[[i]] <- hc$labels[leaflist[[i]]]
complexity[i] <- mean(colSums(pinmat[,labellist[[i]]]))
sharing[,i] <- rowMeans(pinmat[,labellist[[i]]])
nodesize[i] <- length(leaflist[[i]])
mergefdr[i]<- max(mat_fdr[leaflist[[i]],
leaflist[[i]]][upper.tri(mat_fdr[leaflist[[i]], leaflist[[i]]])])
meanfdr[i]<- mean(mat_fdr[leaflist[[i]],
leaflist[[i]]][upper.tri(mat_fdr[leaflist[[i]], leaflist[[i]]])])
}
hc$mergefdr <- mergefdr
hc$meanfdr <- meanfdr
hc$nodesize <- nodesize
hc$leaflist <- leaflist
hc$labellist <- labellist
hc$sharing <- sharing
hc$complexity <- complexity
# return the hclust object which have new features added
return(hc)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.