Nothing
#' @title Identify outliers based on 3 MAD
#'
#' @description Identify outliers based on 3 median absolute deviations (MAD)
#' from the median.
#'
#' @details The function internally use [scale_mad()] to "standardize" the data
#' based on the MAD and median, and then check for any observation greater
#' than the specified criteria (e.g., +/-3).
#'
#' For the *easystats* equivalent, use:
#' `performance::check_outliers(x, method = "zscore_robust, threshold = 3)`.
#'
#' @param data The data frame.
#' @param col.list List of variables to check for outliers.
#' @param ID ID variable if you would like the outliers to be identified
#' as such.
#' @param criteria How many MAD to use as threshold (similar to standard
#' deviations)
#' @param mad.scores Logical, whether to output robust z (MAD) scores (default)
#' or raw scores. Defaults to `TRUE`.
#' @keywords standardization normalization median MAD mean outliers
#' @return A list of dataframes of outliers per variable, with row
#' numbers, based on the MAD. When printed, provides the number
#' of outliers, selected variables, and any outlier flagged for
#' more than one variable. More information can be obtainned
#' by using the [attributes()] function around the generated object.
#' @references Leys, C., Ley, C., Klein, O., Bernard, P., & Licata, L.
#' (2013). Detecting outliers: Do not use standard deviation
#' around the mean, use absolute deviation around the median.
#' *Journal of Experimental Social Psychology*, *49*(4), 764–766.
#' https://doi.org/10.1016/j.jesp.2013.03.013
#' @export
#' @examples
#' find_mad(
#' data = mtcars,
#' col.list = names(mtcars),
#' criteria = 3
#' )
#'
#' mtcars2 <- mtcars
#' mtcars2$car <- row.names(mtcars)
#' find_mad(
#' data = mtcars2,
#' col.list = names(mtcars),
#' ID = "car",
#' criteria = 3
#' )
#' @importFrom dplyr mutate %>% select ends_with across all_of
#' if_any filter bind_rows count n
#' @export
find_mad <- function(data,
col.list,
ID = NULL,
criteria = 3,
mad.scores = TRUE) {
check_col_names(data, c(col.list, ID))
if (missing(ID)) {
data$ID <- rownames(data)
}
row.names(data) <- NULL
mad0.list <- lapply(col.list, function(x) {
find_mad0(data,
x,
ID = ID,
criteria = criteria,
mad.scores = mad.scores
)
})
names(mad0.list) <- col.list
mad0.list <- mad0.list[lapply(mad0.list, nrow) > 0]
duplicates.df <- bind_rows(mad0.list) %>%
select(where(~ !all(is.na(.x))))
if (nrow(duplicates.df) > 0) {
if (!missing(ID)) {
duplicates.df <- duplicates.df %>%
count(Row, .[[ID]]) %>%
rename({{ ID }} := ".[[ID]]")
} else {
duplicates.df <- duplicates.df %>%
count(Row)
}
duplicates.df <- duplicates.df %>%
filter(n > 1)
}
mad0 <- find_mad0(data, col.list, ID = ID, criteria, mad.scores)
# Define attributes!
out <- mad0.list
class(out) <- "find_mad"
attr(out, "outlier_list") <- mad0.list
attr(out, "outlier_total") <- mad0
attr(out, "outlier_multiple") <- duplicates.df
attr(out, "criteria") <- criteria
attr(out, "col.list") <- col.list
out
}
#' @noRd
find_mad0 <- function(data, col.list, ID = ID, criteria = 3, mad.scores) {
if (criteria <= 0) {
stop("Criteria needs to be greater than one.")
}
data <- data %>%
select(-ends_with("_mad")) %>%
mutate(
Row = row(.[1]),
across(all_of(col.list), rempsyc::scale_mad,
.names = "{col}_mad"
)
) %>%
filter(if_any(ends_with("_mad"), ~ . > criteria | . < -criteria))
if (isTRUE(mad.scores)) {
data <- data %>%
select(Row, all_of(ID), ends_with("_mad"))
} else {
data <- data %>%
select(Row, all_of(ID), all_of(col.list))
}
data
}
#' @export
print.find_mad <- function(x, ...) {
mad0 <- attr(x, "outlier_total")
duplicates.df <- attr(x, "outlier_multiple")
criteria <- attr(x, "criteria")
col.list <- attr(x, "col.list")
mad0.list <- attr(x, "outlier_list")
if (isTRUE(nrow(mad0) > 0)) {
cat(
nrow(mad0), "outlier(s) based on", criteria,
"median absolute deviations for variable(s): \n",
paste0(col.list, collapse = ", "), "\n\n"
)
if (nrow(duplicates.df) > 0) {
cat(
"The following participants were considered outliers ",
"for more than one variable: \n\n",
sep = ""
)
print(duplicates.df)
cat("\n")
}
cat("Outliers per variable: \n\n")
print(mad0.list)
} else {
cat(
"There were no outlier based on", criteria,
"median absolute deviations.\n\n"
)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.