Nothing
## VT::15.09.2013 - this will render the output independent
## from the version of the package
suppressPackageStartupMessages(library(rrcov))
library(MASS)
dodata <- function(nrep = 1, time = FALSE, full = TRUE) {
domest <- function(x, xname, nrep = 1) {
n <- dim(x)[1]
p <- dim(x)[2]
mm <- CovMest(x)
crit <- log(mm@crit)
## c1 <- mm@psi@c1
## M <- mm$psi@M
xres <- sprintf("%3d %3d %12.6f\n", dim(x)[1], dim(x)[2], crit)
lpad <- lname-nchar(xname)
cat(pad.right(xname,lpad), xres)
dist <- getDistance(mm)
quantiel <- qchisq(0.975, p)
ibad <- which(dist >= quantiel)
names(ibad) <- NULL
nbad <- length(ibad)
cat("Outliers: ",nbad,"\n")
if(nbad > 0)
print(ibad)
cat("-------------\n")
show(mm)
cat("--------------------------------------------------------\n")
}
options(digits = 5)
set.seed(101) # <<-- sub-sampling algorithm now based on R's RNG and seed
lname <- 20
data(heart)
data(starsCYG)
data(phosphor)
data(stackloss)
data(coleman)
data(salinity)
data(wood)
data(hbk)
data(Animals, package = "MASS")
brain <- Animals[c(1:24, 26:25, 27:28),]
data(milk)
data(bushfire)
tmp <- sys.call()
cat("\nCall: ", deparse(substitute(tmp)),"\n")
cat("Data Set n p c1 M LOG(det) Time\n")
cat("======================================================================\n")
domest(heart[, 1:2], data(heart), nrep)
domest(starsCYG, data(starsCYG), nrep)
domest(data.matrix(subset(phosphor, select = -plant)), data(phosphor), nrep)
domest(stack.x, data(stackloss), nrep)
domest(data.matrix(subset(coleman, select = -Y)), data(coleman), nrep)
domest(data.matrix(subset(salinity, select = -Y)), data(salinity), nrep)
domest(data.matrix(subset(wood, select = -y)), data(wood), nrep)
domest(data.matrix(subset(hbk, select = -Y)), data(hbk), nrep)
domest(brain, "Animals", nrep)
domest(milk, data(milk), nrep)
domest(bushfire, data(bushfire), nrep)
cat("======================================================================\n")
}
# generate contaminated data using the function gendata with different
# number of outliers and check if the M-estimate breaks - i.e. the
# largest eigenvalue is larger than e.g. 5.
# For n=50 and p=10 and d=5 the M-estimate can break for number of
# outliers grater than 20.
dogen <- function(){
eig <- vector("numeric",26)
for(i in 0:25) {
gg <- gendata(eps=i)
mm <- CovMest(gg$x, t0=gg$tgood, S0=gg$sgood, arp=0.001)
eig[i+1] <- ev <- getEvals(mm)[1]
# cat(i, ev, "\n")
stopifnot(ev < 5 || i > 20)
}
# plot(0:25, eig, type="l", xlab="Number of outliers", ylab="Largest Eigenvalue")
}
#
# generate data 50x10 as multivariate normal N(0,I) and add
# eps % outliers by adding d=5.0 to each component.
# - if eps <0 and eps <=0.5, the number of outliers is eps*n
# - if eps >= 1, it is the number of outliers
# - use the center and cov of the good data as good start
# - use the center and the cov of all data as a bad start
# If using a good start, the M-estimate must iterate to
# the good solution: the largest eigenvalue is less then e.g. 5
#
gendata <- function(n=50, p=10, eps=0, d=5.0){
if(eps < 0 || eps > 0.5 && eps < 1.0 || eps > 0.5*n)
stop("eps is out of range")
library(MASS)
x <- mvrnorm(n, rep(0,p), diag(p))
bad <- vector("numeric")
nbad = if(eps < 1) eps*n else eps
if(nbad > 0){
bad <- sample(n, nbad)
x[bad,] <- x[bad,] + d
}
cov1 <- cov.wt(x)
cov2 <- if(nbad <= 0) cov1 else cov.wt(x[-bad,])
list(x=x, bad=sort(bad), tgood=cov2$center, sgood=cov2$cov, tbad=cov1$center, sbad=cov1$cov)
}
pad.right <- function(z, pads)
{
## Pads spaces to right of text
padding <- paste(rep(" ", pads), collapse = "")
paste(z, padding, sep = "")
}
## -- now do it:
dodata()
dogen()
#cat('Time elapsed: ', proc.time(),'\n') # for ``statistical reasons''
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.