Nothing
dodata <- function(nrep=1, time=FALSE, short=FALSE, full=TRUE,
method=c("hubert", "hubert.mcd", "locantore", "cov", "classic",
"grid", "proj"))
{
## Test the PcaXxx() functions on the literature datasets:
##
## Call PcaHubert() and the other functions for all regression
## data sets available in robustbase/rrcov and print:
## - execution time (if time == TRUE)
## - loadings
## - eigenvalues
## - scores
##
dopca <- function(x, xname, nrep=1){
n <- dim(x)[1]
p <- dim(x)[2]
if(method == "hubert.mcd")
pca <- PcaHubert(x, k=p)
else if(method == "hubert")
pca <- PcaHubert(x, mcd=FALSE)
else if(method == "locantore")
pca <- PcaLocantore(x)
else if(method == "cov")
pca <- PcaCov(x)
else if(method == "classic")
pca <- PcaClassic(x)
else if(method == "grid")
pca <- PcaGrid(x)
else if(method == "proj")
pca <- PcaProj(x)
else
stop("Undefined PCA method: ", method)
e1 <- getEigenvalues(pca)[1]
e2 <- getEigenvalues(pca)[2]
k <- pca@k
if(time){
xtime <- system.time(dorep(x, nrep, method))[1]/nrep
xres <- sprintf("%3d %3d %3d %12.6f %12.6f %10.3f\n", dim(x)[1], dim(x)[2], k, e1, e2, xtime)
}
else{
xres <- sprintf("%3d %3d %3d %12.6f %12.6f\n", dim(x)[1], dim(x)[2], k, e1, e2)
}
lpad<-lname-nchar(xname)
cat(pad.right(xname, lpad), xres)
if(!short){
cat("Scores: \n")
print(getScores(pca))
if(full){
cat("-------------\n")
show(pca)
}
cat("----------------------------------------------------------\n")
}
}
stopifnot(length(nrep) == 1, nrep >= 1)
method <- match.arg(method)
options(digits = 5)
set.seed(101) # <<-- sub-sampling algorithm now based on R's RNG and seed
lname <- 20
## VT::15.09.2013 - this will render the output independent
## from the version of the package
suppressPackageStartupMessages(library(rrcov))
data(Animals, package = "MASS")
brain <- Animals[c(1:24, 26:25, 27:28),]
tmp <- sys.call()
cat("\nCall: ", deparse(substitute(tmp)),"\n")
cat("Data Set n p k e1 e2\n")
cat("==========================================================\n")
dopca(heart[, 1:2], data(heart), nrep)
dopca(starsCYG, data(starsCYG), nrep)
dopca(data.matrix(subset(phosphor, select = -plant)), data(phosphor), nrep)
dopca(stack.x, data(stackloss), nrep)
## dopca(data.matrix(subset(coleman, select = -Y)), data(coleman), nrep) # differences between the architectures
dopca(data.matrix(subset(salinity, select = -Y)), data(salinity), nrep)
## dopca(data.matrix(subset(wood, select = -y)), data(wood), nrep) # differences between the architectures
dopca(data.matrix(subset(hbk, select = -Y)),data(hbk), nrep)
## dopca(brain, "Animals", nrep)
dopca(milk, data(milk), nrep)
dopca(bushfire, data(bushfire), nrep)
cat("==========================================================\n")
}
dogen <- function(nrep=1, eps=0.49, method=c("hubert", "hubert.mcd", "locantore", "cov")){
dopca <- function(x, nrep=1){
gc()
xtime <- system.time(dorep(x, nrep, method))[1]/nrep
cat(sprintf("%6d %3d %10.2f\n", dim(x)[1], dim(x)[2], xtime))
xtime
}
set.seed(1234)
## VT::15.09.2013 - this will render the output independent
## from the version of the package
suppressPackageStartupMessages(library(rrcov))
library(MASS)
method <- match.arg(method)
ap <- c(2, 5, 10, 20, 30)
an <- c(100, 500, 1000, 10000, 50000)
tottime <- 0
cat(" n p Time\n")
cat("=====================\n")
for(i in 1:length(an)) {
for(j in 1:length(ap)) {
n <- an[i]
p <- ap[j]
if(5*p <= n){
xx <- gendata(n, p, eps)
X <- xx$X
## print(dimnames(X))
tottime <- tottime + dopca(X, nrep)
}
}
}
cat("=====================\n")
cat("Total time: ", tottime*nrep, "\n")
}
dorep <- function(x, nrep=1, method=c("hubert", "hubert.mcd", "locantore", "cov")){
method <- match.arg(method)
for(i in 1:nrep)
if(method == "hubert.mcd")
PcaHubert(x)
else if(method == "hubert")
PcaHubert(x, mcd=FALSE)
else if(method == "locantore")
PcaLocantore(x)
else if(method == "cov")
PcaCov(x)
else
stop("Undefined PCA method: ", method)
}
#### gendata() ####
# Generates a location contaminated multivariate
# normal sample of n observations in p dimensions
# (1-eps)*Np(0,Ip) + eps*Np(m,Ip)
# where
# m = (b,b,...,b)
# Defaults: eps=0 and b=10
#
gendata <- function(n,p,eps=0,b=10){
if(missing(n) || missing(p))
stop("Please specify (n,p)")
if(eps < 0 || eps >= 0.5)
stop(message="eps must be in [0,0.5)")
X <- mvrnorm(n,rep(0,p),diag(1,nrow=p,ncol=p))
nbad <- as.integer(eps * n)
xind <- vector("numeric")
if(nbad > 0){
Xbad <- mvrnorm(nbad,rep(b,p),diag(1,nrow=p,ncol=p))
xind <- sample(n,nbad)
X[xind,] <- Xbad
}
list(X=X, xind=xind)
}
pad.right <- function(z, pads)
{
### Pads spaces to right of text
padding <- paste(rep(" ", pads), collapse = "")
paste(z, padding, sep = "")
}
whatis <- function(x){
if(is.data.frame(x))
cat("Type: data.frame\n")
else if(is.matrix(x))
cat("Type: matrix\n")
else if(is.vector(x))
cat("Type: vector\n")
else
cat("Type: don't know\n")
}
#################################################################
## VT::27.08.2010
## bug report from Stephen Milborrow
##
test.case.1 <- function()
{
X <- matrix(c(
-0.79984, -1.00103, 0.899794, 0.00000,
0.34279, 0.52832, -1.303783, -1.17670,
-0.79984, -1.00103, 0.899794, 0.00000,
0.34279, 0.52832, -1.303783, -1.17670,
0.34279, 0.52832, -1.303783, -1.17670,
1.48542, 0.66735, 0.716162, 1.17670,
-0.79984, -1.00103, 0.899794, 0.00000,
1.69317, 1.91864, -0.018363, 1.76505,
-1.00759, -0.16684, -0.385626, 0.58835,
-0.79984, -1.00103, 0.899794, 0.00000), ncol=4, byrow=TRUE)
cc1 <- PcaHubert(X, k=3)
cc2 <- PcaLocantore(X, k=3)
cc3 <- PcaCov(X, k=3, cov.control=CovControlSest())
cc4 <- PcaProj(X, k=2) # with k=3 will produce warnings in .distances - too small eignevalues
cc5 <- PcaGrid(X, k=2) # dito
list(cc1, cc2, cc3, cc4, cc5)
}
#################################################################
## VT::05.08.2016
## bug report from Matthieu Lesnoff <matthieu.lesnoff@gmail.com>
##
test.case.2 <- function()
{
do.test.case.2 <- function(z)
{
if(missing(z))
{
set.seed(12345678)
n <- 5
z <- data.frame(v1 = rnorm(n), v2 = rnorm(n), v3 = rnorm(n))
z
}
fm <- PcaLocantore(z, k = 2, scale = TRUE)
fm@scale
apply(z, MARGIN = 2, FUN = mad)
scale(z, center = fm@center, scale = fm@scale)
T <- fm@scores
P <- fm@loadings
E <- scale(z, center = fm@center, scale = fm@scale) - T %*% t(P)
d2 <- apply(E^2, MARGIN = 1, FUN = sum)
## print(sqrt(d2)); print(fm@od)
print(ret <- all.equal(sqrt(d2), fm@od))
ret
}
do.test.case.2()
do.test.case.2(phosphor)
do.test.case.2(stackloss)
do.test.case.2(salinity)
do.test.case.2(hbk)
do.test.case.2(milk)
do.test.case.2(bushfire)
data(rice); do.test.case.2(rice)
data(un86); do.test.case.2(un86)
}
## VT::15.09.2013 - this will render the output independent
## from the version of the package
suppressPackageStartupMessages(library(rrcov))
dodata(method="classic")
dodata(method="hubert.mcd")
dodata(method="hubert")
dodata(method="locantore")
dodata(method="cov")
dodata(method="grid")
## IGNORE_RDIFF_BEGIN
dodata(method="proj")
## IGNORE_RDIFF_END
## VT::14.11.2018 - commented out - on some platforms PcaHubert will choose only 1 PC
## and will show difference
## test.case.1()
test.case.2()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.