# R/rg-robmva.R In StatDA: Statistical Analysis for Environmental Data

```"rg.robmva" <-
function(x, proc = "mcd", wts = NULL, main = deparse(substitute(x)))
{
# Procedure to undertake robust multivariate analyses using the minimum
# volume ellipsoid (MVE), minimim covariance determinant (MCD) procedure,
# or a user supplied set of 0-1 weights, wts, to identify a set of outliers to omit
# from estimation of geochemical background parameters.  Note: The
# cov.mcd procedure is limited to a maximum of 50 variables.  Both of these
# procedures lead to a vector of 0-1 wts.  Of the two procedures, mcd is
# recommended and is the default.
#
# Note this procedure uses svd() rather than the classic solve().
#
# Provision is made for the user to provide a set of n weights via wts,
# e.g., wts = c(1,1,0,1,0,1,...........1,1).  Such a set of weights can be
# generated by using the Graphical Adaptive Interactive Trimming (GAIT)
# procedure available though rg.md.gait().
#
# Using 0-1 wts the parameters of the background distribution are estimated
# by cov.wt().  Following this a robust R-mode PCA is undertaken, and robust
# Mahalanobis distances are estimated for the total data set.  The estimation
# of Mahalanobis distances is only undertaken if x is non-singular, i.e., the
# lowest eigenvalue is > 10e-4.
#
# PCA output may be plotted with rg.rqpca.plot() and rg.rqpca.screeplot(), and
# Mahalanobis distances may be plotted with rg.md.plot().  The PCA solution
# may be rotated using rg.rotate().
#
#Determine the length of the vectors
if(!is.matrix(x)) stop("Not a Matrix")
n <- length(x[, 1])
p <- length(x[1,  ])
matnames <- dimnames(x)
if(n <= 3 * p)
cat("*** Proceed With Great Care, n < 3p ***\n")
if(is.null(wts)) {
#Use mve or mcd procedure to identify core background subset
if(p > 50) proc <- "mve"
if(proc == "mve") {
save <- cov.mve(x,cor=TRUE)
wts <- save\$mve.wt
}
else {
save <- covMcd(x,cor=TRUE)
wts <- save\$mcd.wt
}
}
else {
proc == "wts"
#Use cov.wt() to estimate parameters charcterizing the core (background)
save <- cov.wt(x, wt = wts, cor = TRUE)
}
nc <- sum(wts)
cat("  n = ", n, "\tnc = ", nc, "\tp = ", p, "\t\tnc/p = ", round(nc/p, 2), "\n")
if(nc <= 5 * p)
cat("*** Proceed with Care, nc is < 5p ***\n")
if(nc <= 3 * p)
cat("*** Proceed With Great Care, nc = ", nc, ", which is < 3p ***\n")

#Standardize whole data set with robust background estimators
temp <- sweep(x, 2, save\$center, "-")
sd <- sqrt(diag(save\$cov))
snd <- sweep(temp, 2, sd, "/")
#Standardize for RQ PCA and compute
#	w <- sweep(snd, 2, sqrt(n), "/")
#	wt <- t(as.matrix(w))
#	a <- wt %*% as.matrix(w)
b <- svd(save\$cor)
sumc <- sum(b\$d)
econtrib <- 100 * (b\$d/sumc)
rqscore <- snd %*% b\$v
###	vcontrib <- colVars(rqscore)
vcontrib <- apply(rqscore,2,var)
sumv <- sum(vcontrib)
pvcontrib <- (100 * vcontrib)/sumv
cpvcontrib <- cumsum(pvcontrib)
b1 <- b\$v * 0
diag(b1) <- sqrt(b\$d)
rcr <- rload[,  ] * 0
rcr <- 100 * sweep(rload^2, 1, rcr1, "/")
#test for non-singularity and compute robust Mahalanobis distances
if(b\$d[p] > 10^-4) {
md <- mahalanobis(x, save\$center, save\$cov)
temp <- (nc - p)/(p * (nc + 1))
ppm <- 1 - pf(temp * md, p, nc - p)
epm <- 1 - pchisq(md, p)
}
else {
cat("  Lowest eigenvalue > 10^-4, Mahalanobis distances not computed\n")
md <- NULL
ppm <- NULL
epm <- NULL
}
invisible(list(main = main, input = deparse(substitute(x)), proc = proc, n = n, nc = nc,
p = p, matnames = matnames, wts = wts, mean = save\$center, cov = save\$cov, sd =
sd, snd = snd, r = save\$cor, eigenvalues = b\$d, econtrib = econtrib,
eigenvectors = b\$v, rload = rload, rcr = rcr, rqscore = rqscore, vcontrib =
vcontrib, pvcontrib = pvcontrib, cpvcontrib = cpvcontrib, md = md, ppm = ppm,
epm = epm, nr = NULL))
}
```

## Try the StatDA package in your browser

Any scripts or data that you put into this service are public.

StatDA documentation built on March 13, 2020, 2:42 a.m.