R/cutreeDynamic.R
In dynamicTreeCut: Methods for Detection of Clusters in Hierarchical Clustering Dendrograms

Documented in cutreeDynamicTree

#------------------------------------------------------------------------------------------
#
# cutreeDynamic
#
#------------------------------------------------------------------------------------------
# Modification(s) by Peter Langfelder: returns numerical labels (not colors) in a vector (not a factor).
# Several rendundant blocks of code removed; duplicate function definitions removed; unused
# functions removed.
# maxTreeHeight now checked for being too large.

#.minAttachModuleSize = 100;

cutreeDynamicTree = function(dendro, maxTreeHeight=1, deepSplit=TRUE, minModuleSize=50)
{
    if (is.null(maxTreeHeight)) maxTreeHeight = 0.99 * max(dendro$height);
    if (maxTreeHeight > max(dendro$height)) maxTreeHeight = 0.99 * max(dendro$height);
    staticCutCluster = .cutTreeStatic(dendro=dendro, heightcutoff=maxTreeHeight, minsize1=minModuleSize)

    #get tree height for every singleton
    #node_index   tree_height
    demdroHeiAll= rbind( cbind(dendro$merge[,1], dendro$height), cbind(dendro$merge[,2], dendro$height) )

    #singletons will stand at the front of the list
    myorder = order(demdroHeiAll[,1])

    #get # of singletons
    no.singletons = length(dendro$order)
    demdroHeiAll.sort = demdroHeiAll[myorder, ]
    demdroHei.sort = demdroHeiAll.sort[c(1:no.singletons), ]
    demdroHei = demdroHei.sort[seq(no.singletons, 1, by=-1), ]
    demdroHei[,1]     = -demdroHei[,1]

    # combine with prelimilary cluster-cutoff results
    demdroHei         = cbind(demdroHei, as.integer(staticCutCluster))

    # re-order the order based on the dendrogram order dendro$order
    demdroHei.order = demdroHei[dendro$order, ]

    static.clupos = .locateCluster(demdroHei.order[, 3])

    if (is.null(static.clupos) ){
        module.assign   = rep(0, no.singletons)
        return ( module.assign )
    }

    static.no     = dim(static.clupos)[1]
    static.clupos2 =     static.clupos
    static.no2     =     static.no

    #split individual cluster if there are sub clusters embedded
    mcycle=1
    while(1==1){
        clupos = NULL
        for (i in c(1:static.no)){
           mydemdroHei.order = demdroHei.order[ c(static.clupos[i,1]:static.clupos[i,2]), ] #index to [1, clusterSize]
           mydemdroHei.order[, 1] = mydemdroHei.order[, 1] - static.clupos[i, 1] + 1

           #cat("Cycle ", as.character(mcycle), "cluster (", static.clupos[i,1], static.clupos[i,2], ")\n")
           #cat("i=", as.character(i), "\n")

           iclupos = .processIndividualCluster(mydemdroHei.order, 
                                              cminModuleSize       = minModuleSize, 
                                              cminAttachModuleSize = 2*minModuleSize)

           iclupos[,1] = iclupos[,1] + static.clupos[i, 1] -1 #recover the original index
           iclupos[,2] = iclupos[,2] + static.clupos[i, 1] -1

           clupos  = rbind(clupos, iclupos) #put in the final output buffer
        }

        if(deepSplit==FALSE){
          break
        }

        if(dim(clupos)[1] != static.no) {
           static.clupos = clupos
           static.no     = dim(static.clupos)[1]
        }else{
          break
        }
       mcycle = mcycle + 1
       #static.clupos  
    }
    final.cnt = dim(clupos)[1]
    #assign colors for modules
    module.assign = rep(0, no.singletons)
    module.cnt=1
    for (i in c(1:final.cnt ))
    {
       sdx = clupos[i, 1] #module start point
       edx = clupos[i, 2] #module end point
       module.size = edx - sdx +1 
       if(module.size <minModuleSize){
         next
       }
       #assign module lable
       module.assign[sdx:edx] = rep(module.cnt, module.size)
       #update module label for the next module
       module.cnt = module.cnt + 1
    }
    colcode.reduced.order = .assignModuleNumber(module.assign, minsize1=minModuleSize)
   recov.order = order( demdroHei.order[,1])
    colcode.reduced = colcode.reduced.order[recov.order]
    colcode.reduced
}

#leftOrright >0 : running length (with same sign) to right, otherwise to the left
#mysign = -1: negative value, mysign = -1: positive value
.runlengthSign = function(mysequence, leftOrright=-1, mysign=-1){
    seqlen = length(mysequence)   
    if(leftOrright<0){
        pseq = rev(mysequence)
    }else{
        pseq = mysequence
    }

    if(mysign<0){ #see where the first POSITIVE number occurs
       nonezero.bool = (pseq > 0)
    }else{ #see where the first NEGATIVE number occur
       nonezero.bool = (pseq < 0)
    }
    if( sum(nonezero.bool) > 0){
      runlength = min( c(1:seqlen)[nonezero.bool] ) - 1
    }else{
      runlength = 0
    }
}

#"0" is for grey module
#.assignModuleColor = function(labelpred, minsize1=50, anameallmodules=FALSE, auseblackwhite=FALSE) {
    # here we define modules by using a height cut-off for the branches
    #labelpred= cutree(dendro,h=heightcutoff)
    #cat(labelpred)

    #"0", grey module doesn't participate color assignment, directly assigned as "grey"
    #labelpredNoZero = labelpred[ labelpred >0 ]
    #sort1=-sort(-table(labelpredNoZero))
    ## sort1
    #modulename= as.numeric(names(sort1))
    #modulebranch= sort1 >= minsize1
    #no.modules = sum(modulebranch)
#
    #colorcode=GlobalStandardColors;
#
    ##"grey" means not in any module;
    #colorhelp=rep("grey",length(labelpred))
    #if ( no.modules==0){
        #print("No module detected.")
    #}
    #else{
        #if ( no.modules > length(colorcode)  ){
            #print( paste("Too many modules:", as.character(no.modules)) )
        #}
#
        #if ( (anameallmodules==FALSE) || (no.modules <=length(colorcode)) ){
            #labeledModules = min(no.modules, length(colorcode) )
            #for (i in c(1:labeledModules)) {
               #colorhelp=ifelse(labelpred==modulename[i],colorcode[i],colorhelp)
            #}
            #colorhelp=factor(colorhelp,levels=c(colorcode[1:labeledModules],"grey"))
        #}else{#nameallmodules==TRUE and no.modules >length(colorcode)
            #maxcolors=length(colorcode)
            #labeledModules = no.modules
            #extracolors=NULL
            #blackwhite=c("red", "black")
            #for(i in c((maxcolors+1):no.modules)){
              #if(auseblackwhite==FALSE){
                  #icolor=paste("module", as.character(i), sep="")
              #}else{#use balck white alternatively represent extra colors, for display only
                  ##here we use the ordered label to avoid put the same color for two neighboring clusters
                  #icolor=blackwhite[1+(as.integer(modulename[i])%%2) ]
              #}
              #extracolors=c(extracolors, icolor)
            #}
#
            ##combine the true-color code and the extra colorcode into a uniform colorcode for 
            ##color assignment
            #allcolorcode=c(colorcode, extracolors)
#
            #for (i in c(1:labeledModules)) {
               #colorhelp=ifelse(labelpred==modulename[i],allcolorcode[i],colorhelp)
            #}
            #colorhelp=factor(colorhelp,levels=c(allcolorcode[1:labeledModules],"grey"))
        #}
    #}
#

    #colorhelp
#}

# This function written by Peter Langfelder, based on .assignModuleColor above but simplified.
# Assigns module numbers, not colors. All modules are labeled.
#"0" is for grey module
.assignModuleNumber = function(labelpred, minsize1=50) {
    #"0", grey module doesn't participate color assignment, directly assigned as "grey"
    labelpredNoZero = labelpred[ labelpred >0 ]
    sort1=-sort(-table(labelpredNoZero))
    # sort1
    modulename= as.numeric(names(sort1))
    modulebranch= sort1 >= minsize1
    no.modules = sum(modulebranch)

    #"grey" means not in any module;
    colorhelp=rep(0,length(labelpred))

    for (i in c(1:no.modules)) {
        colorhelp=ifelse(labelpred==modulename[i],i,colorhelp)
    }

    colorhelp
}


#locate the start/end positions of each cluster in the ordered cluster label sequence 
#where "-1" indicating no cluster
#3-1 -1 1 1 1 1 2 2 2
#3 3 -1-1 1 1 1 1 2 2 2  (shift)
#---------------------------------
#0-4  0 2 0 0 0 1 0 0 0   (difference)
#       *     * @
.locateCluster = function(clusterlabels)
{
 no.nodes = length(clusterlabels)
 clusterlabels.shift = c(clusterlabels[1], c(clusterlabels[1:(no.nodes-1)]) )
 
 #a non-zero point is the start point of a cluster and it previous point is the end point of the previous
 #cluster 
 label.diff = abs(clusterlabels - clusterlabels.shift)

 #process the first and last positions as start/end points if they belong to a cluster instead of no
 # cluster "-1" 
 if(clusterlabels[1]       >0) {label.diff[1]=1} 
 if(clusterlabels[no.nodes]>0) {label.diff[no.nodes]=1} 

 flagpoints.bool = label.diff > 0
 if( sum(flagpoints.bool) ==0){
     return(NULL)
 }

 flagpoints = c(1:no.nodes)[flagpoints.bool]
 no.points  = length(flagpoints)

 myclupos=NULL
 for(i in c(1:(no.points-1)) ){
   idx = flagpoints[i]
   if(clusterlabels[idx]>0){
      if(flagpoints[i+1]==no.nodes) {#boundary effect
         myclupos = rbind(myclupos, c(idx, flagpoints[i+1]) )
         break
      }else{
         myclupos = rbind(myclupos, c(idx, flagpoints[i+1]-1) )
      }
   }
 }
 myclupos
}


#input is the cluster demdrogram of an individual cluster, we want to find its embbeded subclusters
#execution order: mean-height ==> (mean+max)/2 ==> (mean+min)/2
#useMean: =0 ~ use mean-height   as calibation line
#         =1 ~ use (mean+max)/2  as calibation line to detect relatively a small cluster sitting on the head of a bigger one,
#                      so mean-height is too low to detect the two modules.
#         =-1~ use (mean+min)/2  as calibation line to detect relatively a small cluster sitting on the tail of a bigger one,
#                      so mean-height & (mean+max)/2 are too high to detect the two modules

.processIndividualCluster = function(clusterDemdroHei, cminModuleSize=50, 
     cminAttachModuleSize = 2* cminModuleSize, 
     minTailRunlength= as.integer(cminModuleSize/3)+1, useMean=0)
{
    #for debug: use all genes
    #clusterDemdroHei =demdroHei.order

    no.cnodes = dim(clusterDemdroHei)[1]
    
    cmaxhei   = max(clusterDemdroHei[, 2])
    cminhei   = min(clusterDemdroHei[, 2])
    
    cmeanhei  = mean(clusterDemdroHei[, 2])
    cmidhei = (cmeanhei + cmaxhei)/2.0
    cdwnhei = (cmeanhei + cminhei)/2.0

    if (useMean==1){
        comphei = cmidhei
    }else if (useMean==-1){
        comphei = cdwnhei
    }else{ #normal case
        comphei = cmeanhei
    }
        
    # compute height diffrence with mean height
    heidiff       = clusterDemdroHei[,2] - comphei
    heidiff.shift = .shiftSequence(heidiff, -1)

    # get cut positions
    # detect the end point of a cluster, whose height should be less than meanhei 
    #  and the node behind it is the start point of the next cluster which has a height above meanhei
    cuts.bool = (heidiff<0) & (heidiff.shift > 0)
    cuts.bool[1]         = TRUE
    cuts.bool[no.cnodes] = TRUE
    
    if(sum(cuts.bool)==2){
          if (useMean==0){
             new.clupos=.processIndividualCluster(clusterDemdroHei=clusterDemdroHei, cminModuleSize=cminModuleSize, 
                                         cminAttachModuleSize=cminAttachModuleSize,
                                         useMean=1)
          }else if(useMean==1){
             new.clupos=.processIndividualCluster(clusterDemdroHei=clusterDemdroHei, cminModuleSize=cminModuleSize, 
                                         cminAttachModuleSize=cminAttachModuleSize,
                                         useMean=-1)          
          }else{
             new.clupos = rbind(c(1, no.cnodes))
          }
          return (new.clupos)
    }

    #a good candidate cluster-end point should have significant # of ahead nodes with head < meanHei
    cutindex =c(1:no.cnodes)[cuts.bool]
    no.cutps = length(cutindex)
    runlens  = rep(999, no.cutps)
    cuts.bool2 = cuts.bool
    for(i in c(2:(no.cutps-1)) ){
       seq = c( (cutindex[i-1]+1):cutindex[i] )
       runlens[i] = .runlengthSign(heidiff[seq], leftOrright=-1, mysign=-1)

       if(runlens[i] < minTailRunlength){
          #cat("run length=", runlens[i], "\n")
          cuts.bool2[ cutindex[i] ] = FALSE
       }
    }

    #attach SMALL cluster to the left-side BIG cluster if the small one has smaller mean height
    cuts.bool3=cuts.bool2
    if(sum(cuts.bool2) > 3) {
       curj = 2
       while (1==1){
           cutindex2 =c(1:no.cnodes)[cuts.bool2]
           no.clus = length(cutindex2) -1
           if (curj>no.clus){
              break
           }
           pre.sdx = cutindex2[ curj-1 ]+1 #previous module start point
           pre.edx = cutindex2[ curj ] #previous module end   point
           pre.module.size = pre.edx - pre.sdx +1 
           pre.module.hei  = mean(clusterDemdroHei[c(pre.sdx:pre.edx) , 2])
         
           cur.sdx = cutindex2[ curj ]+1 #previous module start point
           cur.edx = cutindex2[ curj+1 ] #previous module end   point
           cur.module.size = cur.edx - cur.sdx +1 
           cur.module.hei  = mean(clusterDemdroHei[c(cur.sdx:cur.edx) , 2])

           #merge to the leftside major module, don't change the current index "curj"
           #if( (pre.module.size >minAttachModuleSize)&(cur.module.hei<pre.module.hei)&(cur.module.size<minAttachModuleSize) ){
           if( (cur.module.hei<pre.module.hei)&(cur.module.size<cminAttachModuleSize) ){
                cuts.bool2[ cutindex2[curj] ] = FALSE
           }else{ #consider next cluster
                curj = curj + 1
           }
       }#while
   }#if 

   cutindex2 =c(1:no.cnodes)[cuts.bool2]
   no.cutps = length(cutindex2)

    #we don't want to lose the small cluster at the tail, attch it to the previous big cluster
    #cat("Lclu= ", cutindex2[no.cutps]-cutindex2[no.cutps-1]+1, "\n")
    if(no.cutps > 2){
      if( (cutindex2[no.cutps] - cutindex2[no.cutps-1]+1) < cminModuleSize ){
        cuts.bool2[ cutindex2[no.cutps-1] ] =FALSE  
      }
    }

   cutindex2  = c(1:no.cnodes)[cuts.bool2]
   cutindex2[1]=cutindex2[1]-1 #the first 
   no.cutps2  = length(cutindex2)

   if(no.cutps2 > 2){
     new.clupos = cbind( cutindex2[c(1:(no.cutps2-1))]+1, cutindex2[c(2:no.cutps2)] )
   }else{
     new.clupos = cbind( 1, no.cnodes)
   }

   if ( dim(new.clupos)[1] == 1 ){   
          if (useMean==0){
             new.clupos=.processIndividualCluster(clusterDemdroHei=clusterDemdroHei, cminModuleSize=cminModuleSize, 
                                         cminAttachModuleSize=cminAttachModuleSize,
                                         useMean=1)
          }else if(useMean==1){
             new.clupos=.processIndividualCluster(clusterDemdroHei=clusterDemdroHei, cminModuleSize=cminModuleSize, 
                                         cminAttachModuleSize=cminAttachModuleSize,
                                         useMean=-1)          
          }   
   }
   new.clupos
}


#delta >0 : shift to right, otherwise to the left
.shiftSequence = function(mysequence, delta){
    seqlen = length(mysequence)
    if(delta>0){
        finalseq=c(mysequence[1:delta], mysequence[1:(seqlen-delta)])
    }else{
        posdelta = -delta 
        finalseq=c(mysequence[(posdelta+1):seqlen], mysequence[(seqlen-posdelta+1):seqlen])
    }
    finalseq
}

#use height cutoff to remove
.cutTreeStatic = function(dendro,heightcutoff=0.99, minsize1=50) {

    # here we define modules by using a height cut-off for the branches
    labelpred= cutree(dendro,h=heightcutoff)
    sort1=-sort(-table(labelpred))
    sort1
    modulename= as.numeric(names(sort1))
    modulebranch= sort1 >= minsize1
    no.modules=sum(modulebranch)

    colorhelp = rep(-1, length(labelpred) )
    if ( no.modules==0){
        print("No module detected")
    }
    else{
        for (i in c(1:no.modules)) {
            colorhelp=ifelse(labelpred==modulename[i],i ,colorhelp)
        }
    }
    colorhelp
}
Any scripts or data that you put into this service are public.
dynamicTreeCut documentation built on May 2, 2019, 6:12 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
dynamicTreeCut
Methods for Detection of Clusters in Hierarchical Clustering Dendrograms

R/cutreeDynamic.R
In dynamicTreeCut: Methods for Detection of Clusters in Hierarchical Clustering Dendrograms

Defines functions cutreeDynamicTree .runlengthSign .assignModuleNumber .locateCluster .processIndividualCluster .shiftSequence .cutTreeStatic

Documented in cutreeDynamicTree

Try the dynamicTreeCut package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

dynamicTreeCut Methods for Detection of Clusters in Hierarchical Clustering Dendrograms

R/cutreeDynamic.R In dynamicTreeCut: Methods for Detection of Clusters in Hierarchical Clustering Dendrograms

Defines functions cutreeDynamicTree .runlengthSign .assignModuleNumber .locateCluster .processIndividualCluster .shiftSequence .cutTreeStatic

Documented in cutreeDynamicTree

Try the dynamicTreeCut package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

dynamicTreeCut
Methods for Detection of Clusters in Hierarchical Clustering Dendrograms

R/cutreeDynamic.R
In dynamicTreeCut: Methods for Detection of Clusters in Hierarchical Clustering Dendrograms