R/smartCut.R

#' Smart cut
#'
#' Cut values into ranges using k-means
#' @param value vector of values to be categorized
#' @param number of categories (equal ranges)
#' @param labs if one wants labels to be returned
#' @return $intervval_value
#' @export
smartCut <- function(value, length=7, return="labs", excluded){

  prop <- value %>% table %>% prop.table

  if(missing(excluded)) excluded <- which(prop>.5) %>% names
  if(length(excluded)==0) excluded <- 99999111.9999911111

  idx_incl <- which(value!=excluded)
  idx_excl <- which(value==excluded)

  x <-  vector(mode="integer",length=length(value))
  x[idx_excl] <- 1

  if(sum(idx_excl)>0)
    x[idx_incl] <- Ckmeans.1d.dp:::Ckmeans.1d.dp(value[idx_incl], k=length-1)[[1]]+1 else
    x[idx_incl] <- Ckmeans.1d.dp:::Ckmeans.1d.dp(value[idx_incl], k=length)[[1]]

  min <- tapply(value, x, min)
  max <- tapply(value, x, max)
  lab <- paste0("[",round(min,3),"-", round(max,3),"]")

  labels <-
    data.frame(
      level = names(min) %>% as.integer,
      label= lab,
      min = min,
      max = max
    )
  labels <- data.frame(level = x) %>% left_join(labels)

  if(return=="labs") return(labels[,2])
  if(return=='clust') return(labels[,1])
  if(return=='all') return(labels)

}
gogonzo/oddsandsods documentation built on May 12, 2019, 1:35 a.m.