Nothing
mvout <-
function(x, method = c("none", "princomp", "factanal"), standardize = TRUE,
robust = TRUE, direction = rep("two.sided", ncol(x)), thresh = 0.01,
keepx = TRUE, factors = 2, scores = c("regression", "Bartlett"),
rotation = c("none", "varimax", "promax"), ...){
# robust detection of (directional) multivariate outliers
# using minimum covariance determinant (MCD) estimator
# Nathaniel E. Helwig
# updated: 2025 Mar 27
#########***######### INITIAL CHECKS #########***#########
# check 'x'
x <- as.matrix(x)
n <- nrow(x)
p <- ncol(x)
if(n <= p) stop("Input 'x' needs to satisfy: nrow(x) > ncol(x)")
# check 'method'
method <- as.character(method[1])
method <- pmatch(method, c("none", "princomp", "factanal"))
if(is.na(method)) stop("Invalid 'method' input. See help files.")
method <- c("none", "princomp", "factanal")[method]
if(method == "factanal" && p < 3L) stop("Need ncol(x) >= 3 when 'method' is 'factanal'")
# check 'standardize'
if(method == "princomp"){
standardize <- as.logical(standardize[1])
if(!any(standardize == c(TRUE, FALSE))) stop("Input 'standardize' must be logical.")
} else {
standardize <- TRUE
}
# check 'robust'
robust <- as.logical(robust[1])
if(!any(robust == c(TRUE, FALSE))) stop("Input 'robust' must be TRUE or FALSE.")
# check 'direction'
direction <- as.character(direction)
if(length(direction) != p) {
if(length(direction) != 1L){
warning("The length of 'direction' does not match the number of columns of 'x',\nso the first element of 'direction' is being used for all variables.")
}
direction <- pmatch(direction[1], c("two.sided", "less", "greater"))
if(is.na(direction)) stop("Invalid 'direction' input. See help files.")
direction <- c("two.sided", "less", "greater")[direction]
direction <- rep(direction, p)
} else {
for(j in 1:p){
directionj <- pmatch(direction[j], c("two.sided", "less", "greater"))
if(is.na(directionj)) stop("Invalid 'direction' input. See help files.")
direction[j] <- c("two.sided", "less", "greater")[directionj]
}
}
# check 'thresh'
thresh <- as.numeric(thresh[1])
if(thresh <= 0 | thresh >= 1) stop("Input 'thresh' must satisfy: 0 < thresh < 1")
# check 'keepx'
keepx <- as.logical(keepx[1])
if(!any(keepx == c(TRUE, FALSE))) stop("Input 'keepx' must be TRUE or FALSE.")
xin <- if(keepx) x else NULL
# check 'factors'
if(method != "none"){
factors <- as.integer(factors[1])
if(factors < 1L | factors > p) stop("Input 'factors' must satisfy: 1 <= factors <= ncol(x)")
}
# check scores
if(method == "factanal"){
scores <- as.character(scores[1])
scores <- pmatch(scores, c("regression", "Bartlett"))
if(is.na(scores)) stop("Invalid 'scores' input.")
scores <- c("regression", "Bartlett")[scores]
}
scores.orig <- scores
# check rotation
if(method == "none"){
rotation <- "none"
} else {
rotation <- as.character(rotation[1])
rotation <- pmatch(rotation, c("none", "varimax", "promax"))
if(is.na(rotation)) stop("Invalid 'rotation' input.")
rotation <- c("none", "varimax", "promax")[rotation]
}
#########***######### ROBUST Z-SCORES #########***#########
# calculate (robust) mean and covariance
if(robust){
rcov <- robustbase::covMcd(x, cor = standardize, ...)
} else {
rcov <- list(center = colMeans(x),
cov = cov(x),
mcd.wt = rep(1, n))
if(standardize) rcov$cor <- cor(x)
}
# robust center (and scale?)
if(standardize){
x <- scale(x, center = rcov$center, scale = sqrt(diag(rcov$cov)))
orig.cov <- rcov$cov
rcov$cov <- rcov$cor
} else {
x <- scale(x, center = rcov$center, scale = FALSE)
orig.cov <- NULL
}
#########***######### MAHALANOBIS DISTANCE #########***#########
# directional distances?
directional = FALSE
if(any(direction != "two.sided")){
directional <- TRUE
xorig <- x
indx <- which(direction != "two.sided")
for(j in indx){
sgn <- ifelse(direction[j] == "less", -1, 1)
x[,j] <- sgn * pmax(sgn * x[,j], 0)
}
}
# robust distance calculation
if(method == "none"){
rdis <- mahalanobis(x, center = FALSE, cov = rcov$cov)
} else if(method[1] == "princomp"){
reig <- eigen(rcov$cov, symmetric = TRUE)
uniquenesses <- diag(rcov$cov) - rowSums((reig$vectors[,1:factors,drop=FALSE] %*% diag(sqrt(reig$values[1:factors]), nrow = factors))^2)
loadings <- reig$vectors[,1:factors,drop=FALSE]
lsigns <- sign(colSums(loadings^3))
if(any(lsigns == 0)) lsigns[lsigns == 0] <- 1
loadings <- loadings %*% diag(lsigns, nrow = factors)
scores <- x %*% loadings
rdis <- mahalanobis(scores, center = FALSE, cov = diag(reig$values[1:factors], nrow = factors))
if(directional) x <- xorig
scores <- x %*% loadings %*% diag(1/sqrt(reig$values[1:factors]), nrow = factors)
loadings <- loadings %*% diag(sqrt(reig$values[1:factors]), nrow = factors)
p <- factors
} else {
rfac <- factanal(factors = factors, covmat = rcov$cov,
n.obs = sum(rcov$mcd.wt), rotation = "none")
uniquenesses <- rfac$uniquenesses
loadings <- rfac$loadings
lsigns <- sign(colSums(loadings^3))
if(any(lsigns == 0)) lsigns[lsigns == 0] <- 1
loadings <- loadings %*% diag(lsigns, nrow = factors)
covmat <- tcrossprod(rfac$loadings) + diag(rfac$uniquenesses)
rdis <- mahalanobis(x, center = FALSE, cov = covmat)
if(directional) x <- xorig
PsiInvLo <- diag(1 / uniquenesses, nrow = p, ncol = p) %*% loadings
if(scores == "regression"){
scores <- x %*% PsiInvLo %*% solve(diag(factors) + crossprod(loadings, PsiInvLo))
} else {
scores <- x %*% PsiInvLo %*% solve(crossprod(loadings, PsiInvLo))
}
} # end if(method == "none")
#########***######### OUTLIER DETECTION #########***#########
# robust critical value
cval.in <- qchisq(1 - thresh, df = p)
cval.out <- qf(1 - thresh, df1 = p, df2 = n - p) * ((n - 1) * p / (n - p))
# robust outlier detection
outlier <- ifelse(rdis > ifelse(rcov$mcd.wt, cval.in, cval.out), TRUE, FALSE)
#########***######### POSTPROCESSING #########***#########
# rotate loadings?
cormat <- irot <- diag(factors)
if(method != "none" && rotation != "none"){
if(rotation == "varimax"){
rot <- varimax(loadings)
irot <- rot$rotmat
loadings <- rot$loadings
scores <- scores %*% rot$rotmat
} else {
rot <- promax(loadings)
loadings <- rot$loadings
irot <- solve(t(rot$rotmat))
scores <- scores %*% irot
cormat <- crossprod(irot)
}
}
# return results
if(!is.null(orig.cov)) rcov$cov <- orig.cov
args <- list(x = xin, method = method, standardize = standardize,
direction = direction, thresh = thresh,
robust = robust, factors = factors,
scores = scores.orig, rotation = rotation)
if(method == "none"){
res <- list(distance = rdis,
outlier = outlier,
mcd = rcov,
args = args)
} else {
rownames(loadings) <- colnames(x)
colnames(loadings) <- paste0("Factor", 1:ncol(loadings))
rownames(scores) <- rownames(x)
colnames(scores) <- paste0("Factor", 1:ncol(scores))
res <- list(distance = rdis,
outlier = outlier,
mcd = rcov,
args = args,
scores = scores,
loadings = loadings,
uniquenesses = uniquenesses,
invrot = irot,
cormat = cormat)
}
class(res) <- "mvout"
return(res)
} # mvout
print.mvout <- function(x, ...){
#cat("Multivariate Outliers Detection\n\n")
cat(paste0(" \n ", round(100 * mean(x$outlier), 2), "% flagged as outliers\n"))
cat("\n Mahalanobis Distances:\n")
dq <- quantile(x$distance, probs = c(0.8, 0.85, 0.9, 0.95, 0.99))
print(round(dq, 4))
cat("\n")
cat(" method: ", x$args$method, "\n")
cat(" standardize: ", x$args$standardize, "\n")
cat(" robust: ", x$args$robust, "\n")
if(length(unique(x$args$direction)) == 1) {
cat(" direction: ", x$args$direction[1], "\n")
} else {
cat(" direction: ", head(x$args$direction), "\n\n")
}
cat(" thresh: ", x$args$thresh, "\n\n")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.