R/cluster_cutoff_at_first_empty_bin.R

#' cluster_cutoff_at_first_empty_bin function
#' 
#' This function decides where to cut the hierarchical clustering tree to define clusters within a level set.
#'
#' @param heights Height values in hierarchical clustering.
#' @param diam Maximum distance between points in a level set.
#' @param num_bins_when_clustering Controls how many bins there are in the histogram used to determine cutoff. values
#' 
#' @return Numerical value for cutoff point of hierarchical cluster diagram.
#'
#' @author Paul Pearson, \email{pearsonp@@hope.edu}
#' @references \url{https://github.com/paultpearson/TDAmapper}
#' @seealso \code{\link{mapper1D}}, \code{\link{mapper2D}}
#'

cluster_cutoff_at_first_empty_bin <- function(heights, diam, num_bins_when_clustering) {
  
  # if there are only two points (one height value), then we have a single cluster
  if (length(heights) == 1) {
    if (heights == diam) {
      cutoff <- Inf
      return(cutoff)
    }
  }
  
  bin_breaks <- seq(from=min(heights), to=diam, 
                    by=(diam - min(heights))/num_bins_when_clustering)
  myhist <- hist(c(heights,diam), breaks=bin_breaks, plot=FALSE)
  z <- (myhist$counts == 0)
  if (sum(z) == 0) {
    cutoff <- Inf
    return(cutoff)
  } else {
    #  which returns the indices of the logical vector (z == TRUE), min gives the smallest index
    cutoff <- myhist$mids[ min(which(z == TRUE)) ]
    return(cutoff)
  }
  
}

Try the TDAmapper package in your browser

Any scripts or data that you put into this service are public.

TDAmapper documentation built on May 1, 2019, 10:30 p.m.