R/rg-robmva.R

"rg.robmva" <- 
function(x, proc = "mcd", wts = NULL, main = deparse(substitute(x)))
{
	# Procedure to undertake robust multivariate analyses using the minimum
	# volume ellipsoid (MVE), minimim covariance determinant (MCD) procedure, 
	# or a user supplied set of 0-1 weights, wts, to identify a set of outliers to omit
	# from estimation of geochemical background parameters.  Note: The
	# cov.mcd procedure is limited to a maximum of 50 variables.  Both of these
	# procedures lead to a vector of 0-1 wts.  Of the two procedures, mcd is
	# recommended and is the default.
	#
	# Note this procedure uses svd() rather than the classic solve().
	#
	# Provision is made for the user to provide a set of n weights via wts, 
	# e.g., wts = c(1,1,0,1,0,1,...........1,1).  Such a set of weights can be
	# generated by using the Graphical Adaptive Interactive Trimming (GAIT)
	# procedure available though rg.md.gait().
	#
	# Using 0-1 wts the parameters of the background distribution are estimated
	# by cov.wt().  Following this a robust R-mode PCA is undertaken, and robust
	# Mahalanobis distances are estimated for the total data set.  The estimation
	# of Mahalanobis distances is only undertaken if x is non-singular, i.e., the
	# lowest eigenvalue is > 10e-4.
	#
	# PCA output may be plotted with rg.rqpca.plot() and rg.rqpca.screeplot(), and
	# Mahalanobis distances may be plotted with rg.md.plot().  The PCA solution
	# may be rotated using rg.rotate().
	#
	#Determine the length of the vectors
	if(!is.matrix(x)) stop("Not a Matrix")
	n <- length(x[, 1])
	p <- length(x[1,  ])
	matnames <- dimnames(x)
	if(n <= 3 * p)
		cat("*** Proceed With Great Care, n < 3p ***\n")
	if(is.null(wts)) {
		#Use mve or mcd procedure to identify core background subset
		if(p > 50) proc <- "mve"
		if(proc == "mve") {
			save <- cov.mve(x,cor=TRUE)
			wts <- save$mve.wt
		}
		else {
			save <- covMcd(x,cor=TRUE)
			wts <- save$mcd.wt
		}
	}
	else {
		proc == "wts"
		#Use cov.wt() to estimate parameters charcterizing the core (background)
		save <- cov.wt(x, wt = wts, cor = TRUE)
	}
	nc <- sum(wts)
	cat("  n = ", n, "\tnc = ", nc, "\tp = ", p, "\t\tnc/p = ", round(nc/p, 2), "\n")
        if(nc <= 5 * p)
                cat("*** Proceed with Care, nc is < 5p ***\n")
        if(nc <= 3 * p)
                cat("*** Proceed With Great Care, nc = ", nc, ", which is < 3p ***\n")

	#Standardize whole data set with robust background estimators
	temp <- sweep(x, 2, save$center, "-")
	sd <- sqrt(diag(save$cov))
	snd <- sweep(temp, 2, sd, "/")
	#Standardize for RQ PCA and compute
#	w <- sweep(snd, 2, sqrt(n), "/")
#	wt <- t(as.matrix(w))
#	a <- wt %*% as.matrix(w)
	b <- svd(save$cor)
	sumc <- sum(b$d)
	econtrib <- 100 * (b$d/sumc)
	rqscore <- snd %*% b$v
###	vcontrib <- colVars(rqscore)
        vcontrib <- apply(rqscore,2,var)
	sumv <- sum(vcontrib)
	pvcontrib <- (100 * vcontrib)/sumv
	cpvcontrib <- cumsum(pvcontrib)
	b1 <- b$v * 0
	diag(b1) <- sqrt(b$d)
	rload <- b$v %*% b1
	rcr <- rload[,  ] * 0
	rcr1 <- apply(rload^2, 1, sum)
	rcr <- 100 * sweep(rload^2, 1, rcr1, "/")
	#test for non-singularity and compute robust Mahalanobis distances
	if(b$d[p] > 10^-4) {
		md <- mahalanobis(x, save$center, save$cov)
		temp <- (nc - p)/(p * (nc + 1))
		ppm <- 1 - pf(temp * md, p, nc - p)
		epm <- 1 - pchisq(md, p)
	}
	else {
		cat("  Lowest eigenvalue > 10^-4, Mahalanobis distances not computed\n")
		md <- NULL
		ppm <- NULL
		epm <- NULL
	}
	invisible(list(main = main, input = deparse(substitute(x)), proc = proc, n = n, nc = nc,
		p = p, matnames = matnames, wts = wts, mean = save$center, cov = save$cov, sd = 
		sd, snd = snd, r = save$cor, eigenvalues = b$d, econtrib = econtrib, 
		eigenvectors = b$v, rload = rload, rcr = rcr, rqscore = rqscore, vcontrib = 
		vcontrib, pvcontrib = pvcontrib, cpvcontrib = cpvcontrib, md = md, ppm = ppm,
		epm = epm, nr = NULL))
}

Try the StatDA package in your browser

Any scripts or data that you put into this service are public.

StatDA documentation built on March 13, 2020, 2:42 a.m.