R/truncate_outliers_2.R

#' Truncate Outliers based on z distribution
#' 
#' This function truncates outliers using a specified z-score cutoff point
#' @keywords outliers
#' @param data a dataframe object
#' @param ... dataframe columns
#' @param z a z-score cutoff point
#' @import dplyr 
#' @export
#' @examples 
#' trunc.out()

trunc.out = function(data, ..., z){
  
  upper = NULL
  lower = NULL
  
  argList = match.call(expand.dots = FALSE)$...
  
  cat(paste("_________________________________________", "\n"))
  
  ##loop over column names
  for (i in 1:length(argList)) {
    colName = argList[[i]]
    
    series_colName = eval(substitute(colName), envir = data, enclos = parent.frame())
    series_scaled = scale(series_colName, scale = T)
    
    ##print original values to be truncated
    for (a in series_colName){
      zs = ((a - (mean(series_colName))) / sd(series_colName))
      
      if (zs > z){
        #print(a)
        
        upper$input = a
        
      }
      
      if (zs < -z){
        #print(a)
        
        lower$input = a
      }
      
    }
   
    ##print z scores for values to be truncated
    
    for (a in series_colName){
      zs = ((a - (mean(series_colName))) / sd(series_colName))
      
      if (zs > z){
        #print(zs)
        
        upper$Z_Score = zs
      }
    

     if (zs < -z){
        #print(zs)
        
        lower$Z_Score = zs
      }
    }
  
  
    #zs = (x - (mean(dat2$dat))) / sd(dat2$dat) ##z score formula
    
    ##compute truncated values from z scores
    for (j in series_scaled){
      
      ##greater than z standard deviations above mean
      if (j > z ){
        g1 = (z * sd(series_colName) + mean(series_colName))
        
        upper$truncated = g1
        
        #print(g1)
      } ##figure out how to store these in a table/dataframe w/ original value
      
      ##less than z standard deviations below mean
      if (j < -z){
        g2 = (-z * sd(series_colName) + mean(series_colName))
        
        lower$truncated = g2
        #print(g2)
      }
    }
  }
  
  to_return = rbind(upper, lower)
  
  if (empty(to_return) == FALSE){
    return(to_return) }
  
  if (empty(to_return) == TRUE) 
  {cat(paste("No Outliers Detected", "\n"))} 
}


#document()
npm27/domo documentation built on July 2, 2019, 11:09 p.m.