R/clusterAndCollapse.R

clusterAndCollapse <- function(x, minSep=0.2, ...) {  ## this should recurse
  require(mclust)
  results = list()
  results$retain = 0
  results$values = x
  mcl = Mclust(x, ...) # look for small # of groups by default
  means = mcl$parameters$mean
  diff.mat = sapply(means, function(x) sapply(means, '-', x))
  if(all(diff.mat < minSep)){ # {{{
    return(results) # }}}
  } else if(any(is.na(x))) { # {{{ skip
    message('NA/NaN values found, skipping... impute if you want to use these')
    return(results) # }}}
  } else if(any(diff.mat[ -which(diff.mat==0) ] < minSep)) { # {{{ collapse
    rounds = 1
    i = rev(order(table(as.factor(mcl$classification))))[1]
    while(rounds < mcl$G) { # {{{ the business: collapse if sep < minSep 
      if(length(which(mcl$classification == i)) == 0) break()
      newmean = means[i]
      groups = which(table(as.factor(mcl$classification)) > 0)
      means = tapply(x, mcl$classification, mean, na.rm=T)
      diffs = sapply(means, function(x) abs(x - newmean))
      names(diffs) = names(means)
      if(all(diffs > minSep)) break()
      rounds = rounds + 1 
      to.merge = groups[ which(diffs[groups] < minSep) ]
      mcl$classification[ which(mcl$classification %in% to.merge) ] = i
      means = tapply(x, mcl$classification, mean, na.rm=T)
      if(length(means) < 2) break()
      newmean = mean(x[ which(mcl$classification == i) ], na.rm=T)
      diffs = sapply(means, function(x) abs(x - newmean))
      if(all(diffs > minSep)) break()
      groupnames = names(rev(sort(diffs*table(mcl$classification))))
      for(j in seq_along(groupnames)) {
        if(groupnames[j] == i) break()
        i = j
        if(diffs[groupnames[i]] > minSep) break()
      } 
    } # }}}
    means = tapply(x, mcl$classification, mean, na.rm=T)
    results$values = means[ as.character(mcl$classification) ]
    results$retain = length(unique(results$values))
    return(results)
  } # }}}
}
ttriche/regulatoR documentation built on June 1, 2019, 2:51 a.m.