R/nearZeroVar.R

nearZeroVarOld <- function(x, freqCut = 95/5, uniqueCut = 10, saveMetrics = FALSE)
{
  if(is.vector(x)) x <- matrix(x, ncol = 1)
  freqRatio <- apply(
                     x, 
                     2, 
                     function(data)
                     {
                       dataTable <- sort(table(data[!is.na(data)]), decreasing = TRUE)
                       if(length(dataTable ) >= 2) 
                         {
                           dataTable [1]/dataTable[2]
                         } else 0
                     })
  percentUnique <- apply(
                         x, 
                         2, 
                         function(data) 100*length(unique(data[!is.na(data)]))/length(data))

  zeroVar <- apply(x, 2, function(data) length(unique(data[!is.na(data)])) == 1 | all(is.na(data)))
  
  
  if(saveMetrics)
    {
      out <- data.frame(freqRatio = freqRatio, 
                        percentUnique = percentUnique, 
                        zeroVar = zeroVar,
                        nzv = (freqRatio > freqCut & percentUnique <= uniqueCut) | zeroVar)
    } else {
      out <- which((freqRatio > freqCut & percentUnique <= uniqueCut) | zeroVar)
      names(out) <- NULL
    }
  out
}

nearZeroVar <- function (x, freqCut = 95/5, uniqueCut = 10, saveMetrics = FALSE)
{
  if (is.vector(x)) x <- matrix(x, ncol = 1)
  freqRatio <- apply(x, 2, function(data)
                     {
                       t <- table(data[!is.na(data)]) 
                       if (length(t) <= 1) {
                         return(0);
                       }
                       w <- which.max(t);
                       return(max(t, na.rm=TRUE)/max(t[-w], na.rm=TRUE))
                     })
  lunique <- apply(x, 2, function(data) length(unique(data[!is.na(data)])))
  percentUnique <- 100 * lunique / apply(x, 2, length)
  zeroVar <- (lunique == 1) | apply(x, 2, function(data) all(is.na(data)))
  if (saveMetrics)
    {
      out <- data.frame(freqRatio = freqRatio,
                        percentUnique = percentUnique,
                        zeroVar = zeroVar,
                        nzv = (freqRatio > freqCut & percentUnique <= uniqueCut) | zeroVar)
    }
  else {
    out <- which((freqRatio > freqCut & percentUnique <= uniqueCut) | zeroVar)
    names(out) <- NULL
  }
  out
}

zeroVar <- function(x)
  {
    x <- x[,colnames(x) != ".outcome", drop = FALSE]
    which(apply(x, 2, function(x) length(unique(x)) < 2))
  }

checkConditionalX <- function(x, y)
  {
    x$.outcome <- y
    unique(unlist(dlply(x, .(.outcome), zeroVar)))
  }


checkResamples <- function(index, x, y)
  {
    if(!is.factor(y)) stop("y must be a factor")
    if(length(levels(y)) < 2) stop("y must have at least 2 levels")
    wrap <- function(index, x, y) checkConditionalX(x[index,,drop=FALSE], y[index])
    unique(unlist(lapply(index, wrap, x = x, y = y)))
  }

Try the caret package in your browser

Any scripts or data that you put into this service are public.

caret documentation built on May 2, 2019, 5:47 p.m.