R/RunClustering.R

Defines functions RunClustering

Documented in RunClustering

#' @name RunClustering
#'
#' @title Clustering of a dynamical image sequence
#' 
#' @description Clusters dynamics of an image sequence. Assumes prior sequence denoising. 
#'
#' The clustering procedure is an iterative procedure.
#' At each step, the (available) children of the voxel associated to a largest neighborhoods (result of the denoising
#' step or made of its children as a result of the getChildren function) are used to build a robust cluster.
#' The center of the latter is compared to previously build ones using the MultiTestH0 procedure.
#' The clusters with equivalent centers are merged together until no further merging are possible.
#' The resulting cluster is added to the cluster list if the number of outliers generated by the robust cluster
#' procedure does not exceed the original number of children. If not the voxel is added to the closest existing cluster.
#' 
#' Further details about the clustering procedure can be found in the references.
#' 
#' @param data.array a (2D or 3D)+T array containing the dynamic
#' sequence of images (the dataset). The last dimension is the time.
#' @param denois the result of the denoising procedure RunDenoising.
#' @param nproc a numeric value indicating the number of processors used for parallel computation. Default set to 1 (no parallelization).
#' @param min.size a numeric value indicating the smallest size of the neighborhood for a voxel to be clusterized. Default set to 1 (no limitation).
#' @param alpha a numeric value indicating the global level of the multitest. Default set to 0.05.
#' @param do.children.first an boolean. If TRUE compute children list for all voxels before starting the clusterization
#' which will use these lists as neighborhoods.If FALSE (default) neighborhood are those resulting from the denoising step.
#' @return a list containing:
#' \itemize{
#' \item \code{cluster}, a list of vectors---each vector contains the voxel indexes of one cluster.
#' \item \code{centers}, a matrix whose columns contain the average dynamics of each cluster.
#' \item \code{bad.voxels}, vector of non clusterized voxel indexes. 
#' }
#' @references Rozenholc, Y. and Reiss, M. (2012) \emph{Preserving time structures while denoising a dynamical image}, Mathematical Methods for Signal and Image Analysis and Representation (Chapter 12),  Florack, L. and Duits, R. and Jongbloed, G. and van~Lieshout, M.-C. and Davies, L. Ed., Springer-Verlag, Berlin
#' @references Lieury, T. and Pouzat, C. and Rozenholc, Y. (submitted) \emph{Spatial denoising and clustering of dynamical image sequence: application to DCE imaging in medicine and calcium imaging in neurons}  
#' @author Tiffany Lieury, Christophe Pouzat, Yves Rozenholc
#' @example ../R/Clustering-example.R
#' @seealso \code{\link{GetClusteringResults}}
#' @export
RunClustering <- function(
    data.array,         
    denois,
    nproc=1,
    min.size=1,
    alpha=0.05,
    do.children.first=FALSE
    ){
    
    ## ###################### INNER FUNCTIONS ###########################################
    
    getChildren = function(pix.idx) {
        ## Build the children list of one voxel
    
        if (is.null(info.den[[pix.idx]])) return(NULL)
        
        ## the projection of the original dynamic of pix.idx 
        pix.org = data.proj[,pix.idx]
        ## the projection of the denoised dynamic of pix.idx 
        pix.den = info.den[[pix.idx]]$Px
        ## the length of pix.idx neighborhood 
        pix.len = length(info.den[[pix.idx]]$Vx)
        
        ## initialize the children of pix.idx to its neighbors
        children = getActive(info.den[[pix.idx]]$Vx)
        i = 1
        while (i<length(children)) {
            i = i+1
            info.child = info.den[[children[i]]]
            ## test if denoised version are time coherent
            if (MultiTestH0(info.child$Px-pix.org,1/length(info.child$Vx)+1,thrs)) {
                ## Time coherent : we add non existing neighbors to children
                children <- c(children,setdiff(getActive(info.child$Vx),children))
            } else {
                ## not time coherent : remove from children
                children <- children[-i]
                i = i-1
            }
        }
        children
    }  
        
    getActive = function(indexes) {
        ## returns active indexes in to.cluster
        indexes[to.cluster[indexes]]
    }
            
    DoFDR = function(pvalues,FDR=0.1) {
        tri = order(pvalues,decreasing=F)
        m = length(pvalues)
        iBH = which(c(0,pvalues[tri])<=(0:m)/m*FDR,arr.ind=T)
        iBH = iBH[length(iBH)] # the last
        if (iBH<=m) H0 = tri[iBH:m] else H0 = integer(0)
                                            # if (iBH>1) H1 = tri[1:(iBH-1)] else H1 = NULL
        H0
    }   
                
    RobustMean = function(children) {
        ## Build a robust 1-mean for a children list
    
        ## Initialize with the dynamic average over all children
        child.proj = data.proj[,children,drop=FALSE]
        cluster = children
        center = Inf
        i = 0; i.max = 1000
        continue = TRUE
        ## repeat until center does not change anymore
        while (continue && (i<=i.max)) {
            i = i+1
            ## compute the new cluster center
            center.new = rowMeans(data.proj[,cluster,drop=FALSE])
            ## test coherence between cluster center and children dynamics
            pvalues = MultiTestH0(child.proj-center.new,1+1/length(cluster)) 
            ## the new cluster
            cluster = children[DoFDR(pvalues)]
            ## check if the cluster center has changed or not 
            continue = (length(cluster)>0)&&(mean((center.new-center)^2)>1e-4)
            ## update
            center = center.new                     
        }
        outliers=setdiff(children,cluster)
        
        if (i==i.max) print('stop RobustMean without stabilization') 
        
        list(cluster=cluster,center=center,outliers=outliers,OK=(i==i.max))
    } 
                    
    AddToClosestCluster = function(id.tocluster) {
        ## add unclusterized voxels to closest cluster i.e. with largest p-value
    
        ## mark voxel as already clusterize
        to.cluster[id.tocluster] <<- FALSE
    
        ## add voxel to the closest cluster i.e. with largest p-value
        ind = CheckClusterNew(data.proj[,id.tocluster],1,pvalue.max=T)
        cluster.list[[ind]]$cluster <<- c(cluster.list[[ind]]$cluster,id.tocluster)
        cluster.list[[ind]]$center <<- rowMeans(data.proj[,cluster.list[[ind]]$cluster])
        bad.voxels[length(bad.voxels)+1] <<- id.tocluster 
    }
                        
    CheckClusterNew = function(center,size,pvalue.max=F,use.fdr=T) {
    
        ## test new potential center against the centers from already build clusters
        ## if pvalue.max=T returns the index of the cluster giving the largest pvalue
        ## if pvalue.max=F returns the indexes of the clusters with pvalue corresponding to H0 (default)
        ## if use.fdr=TRUE (default) correct multiplicity with False Discovery Rate otherwise use Bonferroni correction
        
        if ((length(cluster.list)==0)) 
            if (!pvalue.max) return(NULL) else return(FALSE)
        
        cluster.centers = sapply(cluster.list,function(x) x$center)
        cluster.sizes = sapply(cluster.list,function(x) length(x$cluster))
        
        ## test thresholds with Bonferroni correction adapted to both partition number and number of existing clusters           
        if ((pvalue.max)|(use.fdr)) thrs = NULL
        else thrs = qchisq(1-alpha/length(cluster.list)/(iter+1),2^(0:iter))
        
        tmp = MultiTestH0(cluster.centers-center,1/cluster.sizes+1/size,thrs)
      
        ## return pvalues if thrs==NULL and booleans otherwise
        
        if (pvalue.max) return(which.max(tmp))
        
        if (use.fdr) return(DoFDR(tmp)) else return(which(tmp))
        
    }
                            
    DoCluster = function(id.tocluster){
    
        ## build cluster from voxel defined by id.tocluster
    
        ## find the active children (still in the list of voxels to clusterize) of id.tocluster
       if (do.children.first)
            children.ok = children.list[[id.tocluster]]
        else
            children.ok = getChildren(id.tocluster) 
         
        ## children are active children if to.cluster[children] is TRUE
        if (length(children.ok)==0) {
            to.cluster[id.tocluster]<<-FALSE;
            print('nothing to do');
            return(NULL)
        }
        
        in.children.length = length(children.ok)
        
        if (in.children.length<=min.size) {
            ## not enough children, we just add voxel to closest cluster
            AddToClosestCluster(id.tocluster)
            return(NULL)
        }
        
        bool = 1
        connected.clusters = list()             
        while (length(bool)>0){ 
            ## get the new potential cluster made of children from id.tocluster
            new.cluster = RobustMean(children.ok)
    
            ## check if cluster exists already
            bool = CheckClusterNew(new.cluster$center,length(new.cluster$cluster),pvalue.max=F,use.fdr=T)
            if (length(bool)>0){
                ## at least, one existing cluster is coherent with the new one
                ## get the connected clusters
                connected.clusters = c(connected.clusters,cluster.list[bool])
                ## get the connected voxels id
                connected.idx = unlist(sapply(cluster.list[bool],function(x) x$cluster))
                ## remove the connected clusters from the cluster list
                cluster.list <<- cluster.list[-bool]
                ## add the connected in the neighbors of idx.tocluster
                children.ok = unique(c(children.ok,connected.idx))
            }
        } 
        
        if (length(new.cluster$outliers)<=in.children.length) {
            ## we found a new cluster !!!
            ## remove the new cluster from pixel to clusterize
            to.cluster[new.cluster$cluster] <<- FALSE
            to.cluster[new.cluster$outliers] <<- TRUE 
            ## update cluster list 
            cluster.list[[length(cluster.list)+1]]<<-list(cluster=new.cluster$cluster,center=new.cluster$center)
        } else {
            ## TOO MANY OUTLIERS: we do not build new cluster
            ## Return cluster list to its original state
            ## Add id.cluster to the closest cluster
    
            ## return cluster list in its original state
            cluster.list <<- c(cluster.list,connected.clusters)
            ## add voxel to closest cluster
            AddToClosestCluster(id.tocluster)
        }        
    }
                                
    ## ###################### END INNER FUNCTIONS #######################################
                                
    ## data dimensions
    dim <- dim(data.array)
    ## number of dimensions
    ndim <- length(dim)
    ## special status for time
    ntime <- dim[ndim]
    ## spatial dimensions
    coord <- dim[-ndim]
    ## number of voxels
    nvox <- prod(coord)
    
    ## transforms the kD+T data set into a matrix ntime x nvox
    data.array <- matrix(aperm(data.array,c(ndim,1:(ndim-1))),nrow=ntime,ncol=nvox)
    
    ## number of partitions = number of tests = iter + 1
    iter <- floor(log2(ntime))-1
    ## partition sizes 2^(0:iter)
    
    ## test thresholds with Bonferroni correction adapted to the partition number
    thrs = qchisq(1-alpha/(iter+1),2^(0:iter))
    
    ## pseudo attachment of info.den, data.proj and var results of the callDenoiseVoixel procedure
    info.den = denois$info.den
    data.proj = denois$data.proj
    rm(denois)
    ## if(getRversion() >= "2.15.1")  utils::globalVariables(c("info.den", "data.proj", "var"))
    ## attach(denois,warn.conflicts=F) 
    ## on.exit(detach(denois))
    
    ## initialize the list of voxels NOT to clusterize
    to.cluster = !sapply(info.den,is.null)
    
    if (do.children.first) {
        ## Build the children list of all denoised voxels
        children.list = mclapply(1:length(info.den),getChildren,mc.preschedule=FALSE,mc.cores=nproc)
        ## Equivalent to the one-core version :
        ## children.list = lapply(1:length(info.den),getChildren)
    }
    
    ## the cluster list 
    cluster.list = list()
    
    ## the list of unclusterized voxels
    bad.voxels = vector()
    
                                    
    ## Main loop of the clusterization procedure
    i = 0
    actual.min.size = Inf
    while (any(to.cluster)){
        i = i+1
        id.tocluster = which(to.cluster)
        ## print(paste('NOMBRE DE VOXELS A CLUSTERISER:',length(id.tocluster),'--- current.min.size:',actual.min.size))
        ## Find in the remaining voxels to clusterize
        ## that with the largest list of children (if do.children.first=T) or neighbors (otherwise)
        ## remaining to clusterize
        if (do.children.first)
            size = sapply(children.list[id.tocluster],function(children) sum(to.cluster[children]))
        else
            size = sapply(info.den[id.tocluster],function(L) sum(to.cluster[L$Vx]))         
        
        id.max = which.max(size)
        actual.min.size =size[id.max]
        if (actual.min.size<min.size) break
        
        ## Build cluster associated to this voxel
        DoCluster(id.tocluster[id.max])
    }
    
    ## sort the cluster list in decreasing order
    tri = order(sapply(cluster.list,function(x) length(x$cluster)),decreasing=T)
    
    cluster.list = cluster.list[tri]
    
    ## compute the cluster centers
    for (i in 1:length(cluster.list)) 
        cluster.list[[i]]$center=rowMeans(data.array[,cluster.list[[i]]$cluster,drop=F])
    
    
    ## extract centers and clusters
    centers = sapply(cluster.list,function(x) x$center)
    clusters = lapply(cluster.list,function(x) x$cluster)
    
    list(clusters=clusters,centers=centers,bad.voxels=bad.voxels)
}

Try the DynClust package in your browser

Any scripts or data that you put into this service are public.

DynClust documentation built on April 11, 2022, 5:08 p.m.