#' Truncate Outliers based on z distribution
#'
#' This function truncates outliers using a specified z-score cutoff point
#' @keywords outliers
#' @param data a dataframe object
#' @param ... dataframe columns
#' @param z a z-score cutoff point
#' @import dplyr
#' @export
#' @examples
#' trunc.out()
trunc.out = function(data, ..., z){
upper = NULL
lower = NULL
argList = match.call(expand.dots = FALSE)$...
cat(paste("_________________________________________", "\n"))
##loop over column names
for (i in 1:length(argList)) {
colName = argList[[i]]
series_colName = eval(substitute(colName), envir = data, enclos = parent.frame())
series_scaled = scale(series_colName, scale = T)
##print original values to be truncated
for (a in series_colName){
zs = ((a - (mean(series_colName))) / sd(series_colName))
if (zs > z){
#print(a)
upper$input = a
}
if (zs < -z){
#print(a)
lower$input = a
}
}
##print z scores for values to be truncated
for (a in series_colName){
zs = ((a - (mean(series_colName))) / sd(series_colName))
if (zs > z){
#print(zs)
upper$Z_Score = zs
}
if (zs < -z){
#print(zs)
lower$Z_Score = zs
}
}
#zs = (x - (mean(dat2$dat))) / sd(dat2$dat) ##z score formula
##compute truncated values from z scores
for (j in series_scaled){
##greater than z standard deviations above mean
if (j > z ){
g1 = (z * sd(series_colName) + mean(series_colName))
upper$truncated = g1
#print(g1)
} ##figure out how to store these in a table/dataframe w/ original value
##less than z standard deviations below mean
if (j < -z){
g2 = (-z * sd(series_colName) + mean(series_colName))
lower$truncated = g2
#print(g2)
}
}
}
to_return = rbind(upper, lower)
if (empty(to_return) == FALSE){
return(to_return) }
if (empty(to_return) == TRUE)
{cat(paste("No Outliers Detected", "\n"))}
}
#document()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.