R/gx.robmva.closed.R

Defines functions gx.robmva.closed

Documented in gx.robmva.closed

gx.robmva.closed <-
function(xx, proc = "mcd", wts = NULL, main = deparse(substitute(xx)))
{
     # Special version of robmva for use with closed, compositional, data
     # sets, based on function gx.robmva.  An ilr transformation is always
     # undertaken, and the robust ilr covariance matrix is back transformed
     # to the clr space for the PCA.
     #
     # Procedure to undertake robust multivariate analyses using the minimum
     # volume ellipsoid (MVE), minimim covariance determinant (MCD) procedure, 
     # or a user supplied set of 0-1 weights, wts, to identify a set of
     # outliers to omit from estimation of geochemical background parameters. 
     # Note: The cov.mcd procedure is limited to a maximum of 50 variables. 
     # Both of these procedures lead to a vector of 0-1 wts.  Of the two
     # procedures, mcd is recommended and is the default.
     #
     # Note this procedure uses svd() rather than the classic solve().
     #
     # Provision is made for the user to provide a set of n weights via wts, 
     # e.g., wts = c(1,1,0,1,0,1,...........1,1).  Such a set of weights can
     # be generated by using the Graphical Adaptive Interactive Trimming
     # (GAIT) procedure available though gx.md.gait().
     #
     # Using 0-1 wts the parameters of the background distribution are
     # estimated by cov.wt().  Following this a robust R-mode PCA is undertaken,
     # and robust Mahalanobis distances are estimated for the total data set. 
     # The estimation of Mahalanobis distances is only undertaken if x is
     # non-singular, i.e., the lowest eigenvalue is > 10e-4.
     #
     # PCA output may be plotted with gx.rqpca.plot() and gx.rqpca.screeplot(),
     # and Mahalanobis distances may be plotted with gx.md.plot().  The PCA
     # solution may be rotated using gx.rotate().
     #
     if(!is.matrix(xx)) stop(deparse(substitute(xx)), " is not a Matrix")
     # Remove any rows containing NAs
     temp.x <- remove.na(xx)
     x <- temp.x$x; n <- temp.x$n; p <- temp.x$m
     # Save variable names and matrix row numbers
     matnames <- dimnames(xx)
     if(is.null(matnames[[1]])) matnames[[1]] <- c(1:n)
     # Perform ilr transformation
     x.ilr <- ilr(x)
     # Estimate robust ilr covariance matrix
     if(is.null(wts)) {
         # Use mve or mcd procedure to identify core background subset
         if(p > 50) proc <- "mve"
         if(proc == "mve") {
             save.ilr <- cov.mve(x.ilr, cor = TRUE)
             wts <- array(0, n)
             wts[save.ilr$best] <- 1
             }
         else {
             save.ilr <- cov.mcd(x.ilr, cor = TRUE)
             wts <- array(0, n)
             wts[save.ilr$best] <- 1
             }
	}
     else {
         if(length(wts) != n) stop(paste("Length of vector wts, ", 
             length(wts), ", must be equal to the number of individuals, ", n,
             "\n  Were any individuals with NAs removed by the function?",
             sep = ""), call. = FALSE)
         proc <- "wts"
         # Use cov.wt() to estimate parameters charcterizing the core (background)
         save.ilr <- cov.wt(x.ilr, wt = wts, cor = TRUE)
     }
     # Robust ilr covariance matrix, means, etc., are now in save.ilr
     #
     # Issue any warnings re small sample population size
     nc <- sum(wts)
     cat("  n = ", n, "\tnc = ", nc, "\tp = ", p, "\t\tnc/p = ", round(nc/p, 2), "\n")
     if(nc < 5 * p)
         cat("  *** Proceed with Care, Core Size is < 5p ***\n")
     if(nc < 3 * p)
         cat("  *** Proceed With Great Care, nc = ", nc, ", which is < 3p ***\n")
     #
     # Compute Mahalanobis distances with robust ilr covariance matrix
     md <- mahalanobis(x.ilr, save.ilr$center, save.ilr$cov)
     p.ilr <- p - 1
     temp <- (nc - p.ilr)/(p.ilr * (nc + 1))
     ppm <- 1 - pf(temp * md, p.ilr, nc - p.ilr)
     epm <- 1 - pchisq(md, p.ilr)
     #
     # Invert ilr covariance matrix for use in gx.mvalloc.closed
     inverted <- ginv(save.ilr$cov)
     # Back-transform ilr covariance matrix and means to clr form
     V <- orthonorm(p)
     cov.clr <- V %*% save.ilr$cov %*% t(V)
     dimnames(cov.clr)[[1]] <- dimnames(cov.clr)[[2]] <- matnames[[2]]
     inverted.clr <- V %*% inverted %*% t(V)
     dimnames(inverted.clr) <- dimnames(cov.clr)
     corr <- V %*% save.ilr$cor %*% t(V)
     dimnames(corr) <- dimnames(cov.clr)
     center <- as.vector(save.ilr$center %*% t(V))
     sd <- sqrt(diag(cov.clr))
     names(center) <- names(sd) <- matnames[[2]]
     #
     # Back-transform ilr transformed input data to clr form
     x <- x.ilr %*% t(V)
     dimnames(x) <- matnames
     #
     # Perform a robust clr PCA, first standardizing the whole data
     # set with the robust estimators
     temp <- sweep(x, 2, center, "-")
     snd <- sweep(temp, 2, sd, "/")
     b <- svd(corr)
     cat("  Eigenvalues:", signif(b$d, 4), "\n")
     sumc <- sum(b$d)
     econtrib <- 100 * (b$d/sumc)
     cat("     as %ages:", round(econtrib, 1), "\n")
     b1 <- b$v * 0
     diag(b1) <- sqrt(b$d)
     rload <- b$v %*% b1
     rqscore <- snd %*% rload
     vcontrib <- numeric(p)
     cpvcontrib <- pvcontrib <- vcontrib <- numeric(p)
     for (j in 1:p) vcontrib[j] <- var(rqscore[, j])
     sumv <- sum(vcontrib)
     pvcontrib <- (100 * vcontrib)/sumv
     cpvcontrib <- cumsum(pvcontrib)
     cat("  Score S^2s :", signif(vcontrib, 4), "\n")
     cat("     as %ages:", round(pvcontrib, 1), "\n")
     rcr <- rload[,  ] * 0
     rcr1 <- apply(rload^2, 1, sum)
     rcr <- 100 * sweep(rload^2, 1, rcr1, "/")
     dimnames(rload)[[1]] <- dimnames(rcr)[[1]] <- matnames[[2]]
     #
     invisible(list(main = main, input = deparse(substitute(xx)), proc = proc,
         n = n, nc = nc, p = p, ifilr = TRUE, matnames = matnames, wts = wts,
         mean = center, cov = cov.clr, cov.inv = inverted.clr, sd = sd,
         snd = snd, r = corr, eigenvalues = b$d, econtrib = econtrib,
         eigenvectors = b$v, rload = rload, rcr = rcr, rqscore = rqscore,
         vcontrib = vcontrib, pvcontrib = pvcontrib, cpvcontrib = cpvcontrib,
         md = md, ppm = ppm, epm = epm, nr = NULL))
}

Try the rgr package in your browser

Any scripts or data that you put into this service are public.

rgr documentation built on May 2, 2019, 6:09 a.m.