Description Usage Arguments Examples
Determine center of data cloud, for each point, connect it with center, project points onto this line and use distances between projected points to detect outliers. A boxplot method is used on the projected distances.
plotit=T creates a scatterplot when working with bivariate data.
op=T means the .5 depth contour is plotted based on data with outliers removed.
op=F means .5 depth contour is plotted without removing outliers.
MM=F Use interquatile range when checking for outliers MM=T uses MAD.
If value for center is not specified, there are four options for computing the center of the cloud of points when computing projections:
cop=2 uses MCD center cop=3 uses median of the marginal distributions. cop=4 uses MVE center cop=5 uses TBS cop=6 uses rmba (Olive's median ball algorithm) cop=7 uses the spatial (L1) median
When using cop=2, 3 or 4, default critical value for outliers is square root of the .975 quantile of a chi-squared distribution with p degrees of freedom.
Donoho-Gasko (Tukey) median is marked with a cross, +.
1 2 3 |
m |
|
gval |
|
center |
|
plotit |
|
op |
|
MM |
|
cop |
|
xlab |
|
ylab |
|
STAND |
STAND=T means that marginal distributions are standardized before checking for outliers. |
tr |
- |
q |
- |
pr |
- |
... |
- |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | ##---- Should be DIRECTLY executable !! ----
##-- ==> Define data, use random,
##-- or do help(data=index) for the standard data sets.
## The function is currently defined as
function (m, gval = NA, center = NA, plotit = T, op = T, MM = F,
cop = 3, xlab = "VAR 1", ylab = "VAR 2")
{
library(MASS)
m <- as.matrix(m)
if (ncol(m) == 1) {
dis <- (m - median(m))^2/mad(m)^2
dis <- sqrt(dis)
crit <- sqrt(qchisq(0.975, 1))
chk <- ifelse(dis > crit, 1, 0)
vec <- c(1:nrow(m))
outid <- vec[chk == 1]
keep <- vec[chk == 0]
}
if (ncol(m) > 1) {
if (is.na(gval) && cop == 1)
gval <- sqrt(qchisq(0.95, ncol(m)))
if (is.na(gval) && cop != 1)
gval <- sqrt(qchisq(0.975, ncol(m)))
m <- elimna(m)
if (cop == 1 && is.na(center[1])) {
if (ncol(m) > 2)
center <- dmean(m, tr = 0.5, cop = 1)
if (ncol(m) == 2) {
tempd <- NA
for (i in 1:nrow(m)) tempd[i] <- depth(m[i, 1],
m[i, 2], m)
mdep <- max(tempd)
flag <- (tempd == mdep)
if (sum(flag) == 1)
center <- m[flag, ]
if (sum(flag) > 1)
center <- apply(m[flag, ], 2, mean)
}
}
if (cop == 2 && is.na(center[1])) {
center <- cov.mcd(m)$center
}
if (cop == 4 && is.na(center[1])) {
center <- cov.mve(m)$center
}
if (cop == 3 && is.na(center[1])) {
center <- apply(m, 2, median)
}
if (cop == 5 && is.na(center[1])) {
center <- tbs(m)$center
}
if (cop == 6 && is.na(center[1])) {
center <- rmba(m)$center
}
if (cop == 7 && is.na(center[1])) {
center <- spat(m)
}
flag <- rep(0, nrow(m))
outid <- NA
vec <- c(1:nrow(m))
for (i in 1:nrow(m)) {
B <- m[i, ] - center
dis <- NA
BB <- B^2
bot <- sum(BB)
if (bot != 0) {
for (j in 1:nrow(m)) {
A <- m[j, ] - center
temp <- sum(A * B) * B/bot
dis[j] <- sqrt(sum(temp^2))
}
temp <- idealf(dis)
if (!MM)
cu <- median(dis) + gval * (temp$qu - temp$ql)
if (MM)
cu <- median(dis) + gval * mad(dis)
outid <- NA
temp2 <- (dis > cu)
flag[temp2] <- 1
}
}
if (sum(flag) == 0)
outid <- NA
if (sum(flag) > 0)
flag <- (flag == 1)
outid <- vec[flag]
idv <- c(1:nrow(m))
keep <- idv[!flag]
if (ncol(m) == 2) {
if (plotit) {
plot(m[, 1], m[, 2], type = "n", xlab = xlab,
ylab = ylab)
points(m[keep, 1], m[keep, 2], pch = "*")
if (length(outid) > 0)
points(m[outid, 1], m[outid, 2], pch = "o")
if (op) {
tempd <- NA
keep <- keep[!is.na(keep)]
mm <- m[keep, ]
for (i in 1:nrow(mm)) tempd[i] <- depth(mm[i,
1], mm[i, 2], mm)
mdep <- max(tempd)
flag <- (tempd == mdep)
if (sum(flag) == 1)
center <- mm[flag, ]
if (sum(flag) > 1)
center <- apply(mm[flag, ], 2, mean)
m <- mm
}
points(center[1], center[2], pch = "+")
x <- m
temp <- fdepth(m, plotit = F)
flag <- (temp >= median(temp))
xx <- x[flag, ]
xord <- order(xx[, 1])
xx <- xx[xord, ]
temp <- chull(xx)
xord <- order(xx[, 1])
xx <- xx[xord, ]
temp <- chull(xx)
lines(xx[temp, ])
lines(xx[c(temp[1], temp[length(temp)]), ])
}
}
}
list(out.id = outid, keep = keep)
}
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.