Nothing
#' Robust principal component analysis for compositional data
#'
#' This function applies robust principal component analysis for compositional
#' data.
#'
#' The compositional data set is expressed in isometric logratio coordinates.
#' Afterwards, robust principal component analysis is performed. Resulting
#' loadings and scores are back-transformed to the clr space where the
#' compositional biplot can be shown.
#'
#' \code{mult_comp} is used when there are more than one group of compositional
#' parts in the data. To give an illustrative example, lets assume that one
#' variable group measures angles of the inner ear-bones of animals which sum
#' up to 100 and another one having percentages of a whole on the thickness of
#' the inner ear-bones included. Then two groups of variables exists which are
#' both compositional parts. The isometric logratio coordinates are then internally applied
#' to each group independently whenever the \code{mult_comp} is set correctly.
#'
#' @aliases pcaCoDa print.pcaCoDa
#' @param x compositional data
#' @param method must be either \dQuote{robust} (default) or \dQuote{classical}
#' @param mult_comp a list of numeric vectors holding the indices of linked
#' compositions
#' @param external external non-compositional variables
#' @param solve eigen (as princomp does, i.e. eigenvalues of the covariance matrix) or svd (as prcomp does with single value decomposition instead of eigen). Only for method classical.
#' @return \item{scores }{scores in clr space} \item{loadings }{loadings in clr
#' space} \item{eigenvalues }{eigenvalues of the clr covariance matrix}
#' \item{method }{method} \item{princompOutputClr }{output of \code{princomp}
#' needed in \code{plot.pcaCoDa}}
#' @author Karel Hron, Peter Filzmoser, Matthias Templ and a contribution for dimnames in external variables by Amelia Landre.
#' @seealso \code{\link{print.pcaCoDa}}, \code{\link{summary.pcaCoDa}}, \code{\link{biplot.pcaCoDa}}, \code{\link{plot.pcaCoDa}}
#' @importFrom stats princomp
#' @importFrom stats prcomp
#' @references Filzmoser, P., Hron, K., Reimann, C. (2009) Principal component
#' analysis for compositional data with outliers. \emph{Environmetrics},
#' \bold{20}, 621-632.
#'
#' Kynclova, P., Filzmoser, P., Hron, K. (2016) Compositional biplots including external non-compositional variables.
#' \emph{Statistics: A Journal of Theoretical and Applied Statistics},
#' \bold{50}, 1132-1148.
#' @keywords multivariate
#' @export
#' @importFrom MASS cov.mve
#' @examples
#'
#' data(arcticLake)
#'
#' ## robust estimation (default):
#' res.rob <- pcaCoDa(arcticLake)
#' res.rob
#' summary(res.rob)
#' plot(res.rob)
#'
#' ## classical estimation:
#' res.cla <- pcaCoDa(arcticLake, method="classical", solve = "eigen")
#' biplot(res.cla)
#'
#' ## just for illustration how to set the mult_comp argument:
#' data(expenditures)
#' p1 <- pcaCoDa(expenditures, mult_comp=list(c(1,2,3),c(4,5)))
#' p1
#'
#' ## example with external variables:
#' data(election)
#' # transform external variables
#' election$unemployment <- log((election$unemployment/100)/(1-election$unemployment/100))
#' election$income <- scale(election$income)
#'
#' res <- pcaCoDa(election[,1:6], method="classical", external=election[,7:8])
#' res
#' biplot(res, scale=0)
pcaCoDa <- function (x, method = "robust", mult_comp = NULL, external = NULL, solve = "eigen")
{
if (is.vector(external) & length(external) != nrow(x)) {
stop("external and x must have the same number of observations")
}
if (!is.null(mult_comp) & !is.list(mult_comp))
stop("if specified, mult_comp must be a list")
ilrV <- function(x) {
x.ilr = matrix(NA, nrow = nrow(x), ncol = ncol(x) - 1)
for (i in 1:ncol(x.ilr)) {
x.ilr[, i] = sqrt((i)/(i + 1)) * log(((apply(as.matrix(x[,
1:i]), 1, prod))^(1/i))/(x[, i + 1]))
}
return(x.ilr)
}
if (is.null(mult_comp)) {
xilr <- ilrV(x)
}
else {
xilr <- do.call("cbind", lapply(mult_comp, function(xx) ilrV(x[,
xx])))
}
if (!is.null(external)) {
xilr <- cbind(xilr, external)
}
if (method == "robust") {
cv <- robustbase::covMcd(xilr, cor = FALSE)
pcaIlr <- suppressWarnings(princomp(xilr, covmat = cv,
cor = FALSE))
eigenvalues <- eigen(cv$cov)$values
}
else if (method == "mve") {
cv <- MASS::cov.mve(xilr)
pcaIlr <- suppressWarnings(princomp(xilr, covmat = cv,
cor = FALSE))
eigenvalues <- eigen(cv$cov)$values
}
else {
if(solve == "eigen"){
pcaIlr <- princomp(xilr, cor = FALSE)
} else{
pcaIlr <- prcomp(xilr, scale = FALSE, center = TRUE)
pcaIlr$loadings <- pcaIlr$rotation
}
eigenvalues <- eigen(cov(xilr))$values
}
if (is.null(mult_comp)) {
V <- matrix(0, nrow = ncol(x), ncol = ncol(x) - 1)
for (i in 1:ncol(V)) {
V[1:i, i] <- 1/i
V[i + 1, i] <- (-1)
V[, i] <- V[, i] * sqrt(i/(i + 1))
}
}
else {
V <- matrix(0, nrow = length(unlist(mult_comp)), ncol = length(unlist(mult_comp)) -
length(mult_comp))
l <- sapply(mult_comp, length)
start <- c(1, cumsum(l[-length(l)]))
cumsum(l[-length(l)])
end <- cumsum(l - 1)
start2 <- c(1, cumsum(l[-length(l)]) + 1)
end2 <- cumsum(l)
for (j in 1:length(mult_comp)) {
ind <- start[j]:end[j]
ind2 <- start2[j]:end2[j]
for (i in 1:length(ind)) {
V[ind2[1:i], ind[i]] <- 1/i
V[ind2[i] + 1, ind[i]] <- (-1)
V[, ind[i]] <- V[, ind[i]] * sqrt(i/(i + 1))
}
}
}
if (!is.null(external)) {
nload <- nrow(pcaIlr$loadings)
if (is.null(dim(external)[2])){
index <- 1
nam <- "ext"
}
else{
index <- ncol(external)
nam <- names(external)
}
loadings <- V %*% pcaIlr$loadings[-c((nload - index +
1):nload), ]
loadings <- rbind(loadings, pcaIlr$loadings[(nload -
index + 1):nload, ])
}
else {
loadings <- V %*% pcaIlr$loadings
}
if (is.null(mult_comp)) {
if (!is.null(names(x)) & !is.null(external))
dimnames(loadings)[[1]] <- c(names(x), nam)
else if (!is.null(names(x)))
dimnames(loadings)[[1]] <- names(x)
}
else {
## modifications added :
if (!is.null(names(x)) & !is.null(external))
dimnames(loadings)[[1]] <- c(colnames(x)[unlist(mult_comp)], names(external))
else if (!is.null(names(x)))
dimnames(loadings)[[1]] <- colnames(x)[unlist(mult_comp)]
}
pcaClr <- pcaIlr
if(solve == "eigen"){
pcaClr$scores <- pcaIlr$scores
} else {
pcaClr$scores <- pcaIlr$x
pcaClr$rotation <- loadings
pcaClr$loadings <- loadings
pcaClr$eigenvalues <- eigenvalues
}
if(is.null(external)){
rownames(loadings) <- colnames(x)
} else {
rownames(loadings) <- c(colnames(x), colnames(external))
}
pcaClr$loadings <- loadings
res <- list(scores = pcaClr$scores, loadings = loadings,
eigenvalues = eigenvalues, method = method, princompOutputClr = pcaClr,
mult_comp = mult_comp)
class(res) <- "pcaCoDa"
invisible(res)
}
#' @rdname pcaCoDa
#' @export
#' @method print pcaCoDa
#' @param ... additional parameters for print method passed through
print.pcaCoDa <- function(x, ...){
## percentage of explained variability for clr transformed data
eV <- x$eigenvalues / sum(x$eigenvalues)
eVcum <- cumsum(x$eigenvalues) / sum(x$eigenvalues)
cat("\n-------------------")
cat("\n Percentages of explained variability for compositional data \n after clr transformation \n")
print(eVcum)
cat("\n-------------------\n\n")
}
#' @rdname pcaCoDa
#' @method summary pcaCoDa
#' @param object object of class pcaCoDa
#' @export
summary.pcaCoDa <- function(object, ...){
stopifnot(inherits(object, "pcaCoDa"))
summary(object$princompOutputClr)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.