#'Build the hierarchical clustering tree.
#'
#'Hierarchical clustering with Fisher's test p-values as distance matrix.
#'Also add feature coverage information for each node in the tree.
#'@param pinmat The incidence table generated by \code{findpins}.
#'@param mat_fdr The FDR matrix generated by \code{fdr_fisherPV}
#'@param mat_dist The dissmilarity based on Fisher's test p-values for hierarchical clustering.
#'@param hc_method Default: average
#'@return A hclust objects with new items added.
#'@export
hclust_tree <- function(pinmat, mat_fdr, mat_dist, hc_method = "average"){
pinmat <- pinmat[rowSums(pinmat)<ncol(pinmat),,drop = F]
## Grow a tree and add multiple items to the standard hclust object
####################################################################
hc <- hclust(as.dist(mat_dist), method = hc_method)
#Leaf indices for each node, in the order of the original labels
leafID_list <- vector(mode = "list", length = nrow(hc$merge))
#Leaf lables for the node
leaflabel_list <- vector(mode = "list",length = nrow(hc$merge))
#Maximal pairwise FDR anywhere in the node
maxfdr <- rep(NA, nrow(hc$merge))
#Mean FDR for the node
meanfdr<-rep(NA, nrow(hc$merge))
#Number of leaves in the node
nodesize <- rep(NA, nrow(hc$merge))
#For each node and each feature(pin) determine the fraction of leaves in the node with the feature
sharing <- matrix(NA, nrow = nrow(pinmat), ncol = nrow(hc$merge))
#Mean number of features per leaf in a node
complexity <- rep(NA, nrow(hc$merge))
#compute the items defined above
for(i in 1:nrow(hc$merge)){
if(hc$merge[i, 1] < 0){
leafID_list[[i]] <- (-hc$merge[i,1])}
else{
leafID_list[[i]] <- leafID_list[[hc$merge[i,1]]]}
if(hc$merge[i, 2] < 0){
leafID_list[[i]] <- c(leafID_list[[i]], (-hc$merge[i,2]))}
else{
leafID_list[[i]] <- c(leafID_list[[i]], leafID_list[[hc$merge[i,2]]])}
leaflabel_list[[i]] <- hc$labels[leafID_list[[i]]]
nodesize[i] <- length(leafID_list[[i]])
maxfdr[i]<- max(mat_fdr[leafID_list[[i]], leafID_list[[i]]][upper.tri(mat_fdr[leafID_list[[i]], leafID_list[[i]]])])
meanfdr[i]<- mean(mat_fdr[leafID_list[[i]], leafID_list[[i]]][upper.tri(mat_fdr[leafID_list[[i]], leafID_list[[i]]])])
complexity[i] <- mean(colSums(pinmat[,leaflabel_list[[i]]]))
sharing[,i] <- rowMeans(pinmat[,leaflabel_list[[i]]])
}
hc$maxfdr <- maxfdr
hc$meanfdr <- meanfdr
hc$nodesize <- nodesize
hc$leafID_list <- leafID_list
hc$leaflabel_list <- leaflabel_list
hc$sharing <- sharing
hc$complexity <- complexity
#return the hclust object which have new features added & distance matrix based on log10(fisherPV)
return(hc)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.