R/gx.robmva.R

Defines functions gx.robmva

Documented in gx.robmva

gx.robmva <-
function(xx, proc = "mcd", wts = NULL, main = deparse(substitute(xx)))
{
     # Procedure to undertake robust multivariate analyses using the minimum
     # volume ellipsoid (MVE), minimim covariance determinant (MCD) procedure, 
     # or a user supplied set of 0-1 weights, wts, to identify a set of
     # outliers to omit from estimation of geochemical background parameters. 
     # Note: The cov.mcd procedure is limited to a maximum of 50 variables. 
     # Both of these procedures lead to a vector of 0-1 wts.  Of the two
     # procedures, mcd is recommended and is the default.
     #
     # Note this procedure uses svd() rather than the classic solve().
     #
     # Provision is made for the user to provide a set of n weights via wts, 
     # e.g., wts = c(1,1,0,1,0,1,...........1,1).  Such a set of weights can
     # be generated by using the Graphical Adaptive Interactive Trimming
     # (GAIT) procedure available though gx.md.gait().
     #
     # Using 0-1 wts the parameters of the background distribution are
     # estimated by cov.wt().  Following this a robust R-mode PCA is undertaken,
     # and robust Mahalanobis distances are estimated for the total data set. 
     # The estimation of Mahalanobis distances is only undertaken if x is
     # non-singular, i.e., the lowest eigenvalue is > 10e-4.
     #
     # PCA output may be plotted with gx.rqpca.plot() and gx.rqpca.screeplot(),
     # and Mahalanobis distances may be plotted with gx.md.plot().  The PCA
     # solution may be rotated using gx.rotate().
     #
     if(!is.matrix(xx)) stop(deparse(substitute(xx)), " is not a Matrix")
     # Remove any rows containing NAs
     temp.x <- remove.na(xx)
     x <- temp.x$x; n <- temp.x$n; p <- temp.x$m
     matnames <- dimnames(xx)
     if(is.null(matnames[[1]])) matnames[[1]] <- c(1:n)
     if(is.null(wts)) {
         # Use mve or mcd procedure to identify core background subset
         if(p > 50) proc <- "mve"
         if(proc == "mve") {
             save <- cov.mve(x, cor = TRUE)
             wts <- array(0, n)
             wts[save$best] <- 1
             }
         else {
             save <- cov.mcd(x, cor = TRUE)
             wts <- array(0, n)
             wts[save$best] <- 1
             }
	}
     else {
         if(length(wts) != n) stop(paste("Length of vector wts, ", 
             length(wts), ", must be equal to the number of individuals, ", n,
             "\n  Were any individuals with NAs removed by the function?",
             sep = ""), call. = FALSE)
         proc <- "wts"
         # Use cov.wt() to estimate parameters charcterizing the core (background)
         save <- cov.wt(x, wt = wts, cor = TRUE)
     }
     nc <- sum(wts)
     cat("  n = ", n, "\tnc = ", nc, "\tp = ", p, "\t\tnc/p = ", round(nc/p, 2), "\n")
     if(nc < 5 * p)
         cat("  *** Proceed with Care, Core Size is < 5p ***\n")
     if(nc < 3 * p)
         cat("  *** Proceed With Great Care, nc = ", nc, ", which is < 3p ***\n")
     # Standardize whole data set with robust background estimators
     temp <- sweep(x, 2, save$center, "-")
     sd <- sqrt(diag(save$cov))
     snd <- sweep(temp, 2, sd, "/")
     b <- svd(save$cor)
     cat("  Eigenvalues:", signif(b$d, 4), "\n")
     sumc <- sum(b$d)
     econtrib <- 100 * (b$d/sumc)
     cat("     as %ages:", round(econtrib, 1), "\n")
     b1 <- b$v * 0
     diag(b1) <- sqrt(b$d)
     rload <- b$v %*% b1
     rqscore <- snd %*% rload
     vcontrib <- numeric(p)
     cpvcontrib <- pvcontrib <- vcontrib <- numeric(p)
     for (j in 1:p) vcontrib[j] <- var(rqscore[, j])
     sumv <- sum(vcontrib)
     pvcontrib <- (100 * vcontrib)/sumv
     cpvcontrib <- cumsum(pvcontrib)
     cat("  Score S^2s :", signif(vcontrib, 4), "\n")
     cat("     as %ages:", round(pvcontrib, 1), "\n")
     rcr <- rload[,  ] * 0
     rcr1 <- apply(rload^2, 1, sum)
     rcr <- 100 * sweep(rload^2, 1, rcr1, "/")
     dimnames(rload)[[1]] <- dimnames(rcr)[[1]] <- matnames[[2]]
     #
     # Test for non-singularity and compute robust Mahalanobis distances
     if(b$d[p] > 10^-4) {
         md <- mahalanobis(x, save$center, save$cov)
         temp <- (nc - p)/(p * (nc + 1))
         ppm <- 1 - pf(temp * md, p, nc - p)
         epm <- 1 - pchisq(md, p)
     }
     else {
         cat("  Lowest eigenvalue < 10^-4\n",
             " Trying Moore-Penrose Generalized Inverse\n")
         # Requires Library MASS for function ginv
         mpi <- ginv(save$cov)
         md <- mahalanobis(x, save$center, mpi, inverted = TRUE)
         temp <- (nc - p)/(p * (nc + 1))
         ppm <- 1 - pf(temp * md, p, nc - p)
         epm <- 1 - pchisq(md, p)
     }
     invisible(list(main = main, input = deparse(substitute(xx)), proc = proc,
         n = n, nc = nc, p = p, ifilr = FALSE, matnames = matnames, wts = wts,
         mean = save$center, cov = save$cov, sd = sd, snd = snd, r = save$cor,
         eigenvalues = b$d, econtrib = econtrib, eigenvectors = b$v,
         rload = rload, rcr = rcr, rqscore = rqscore, vcontrib = vcontrib, 
         pvcontrib = pvcontrib, cpvcontrib = cpvcontrib, md = md, ppm = ppm,
         epm = epm, nr = NULL))
}

Try the rgr package in your browser

Any scripts or data that you put into this service are public.

rgr documentation built on May 2, 2019, 6:09 a.m.