# R/pfa.R In robCompositions: Compositional Data Analysis

# PF, 2008-09-18
# Computes principal factor analysis for compositional data
# Uniquenesses are nor longer of diagonal form

#' Factor analysis for compositional data
#'
#' Computes the principal factor analysis of the input data which are
#' transformed and centered first.
#'
#' The main difference to usual implementations is that uniquenesses are nor
#' longer of diagonal form. This kind of factor analysis is designed for
#' centered log-ratio transformed compositional data. However, if the
#' covariance is not specified, the covariance is estimated from isometric
#' log-ratio transformed data internally, but the data used for factor analysis
#' are backtransformed to the clr space (see Filzmoser et al., 2009).
#'
#' @param x (robustly) scaled input data
#' @param factors number of factors
#' @param robust default value is TRUE
#' @param data default value is NULL
#' @param covmat (robustly) computed covariance or correlation matrix
#' @param n.obs number of observations
#' @param subset if a subset is used
#' @param na.action what to do with NA values
#' @param start starting values
#' @param scores which method should be used to calculate the scores
#' @param rotation if a rotation should be made
#' @param maxiter maximum number of iterations
#' @param control default value is NULL
#' @param \dots arguments for creating a list
#' The factors are ordered in decreasing order of sums of squares of loadings.}
#' \item{uniqueness }{uniqueness} \item{correlation }{correlation matrix}
#' \item{criteria}{The results of the optimization: the value of the negativ
#' log-likelihood and information of the iterations used.} \item{factors }{the
#' factors } \item{dof }{degrees of freedom} \item{method }{\dQuote{principal}}
#' \item{n.obs }{number of observations if available, or NA} \item{call }{The
#' matched call.} \item{STATISTIC, PVAL }{The significance-test statistic and
#' p-value, if they can be computed}
#' @author Peter Filzmoser, Karel Hron, Matthias Templ
#' @references C. Reimann, P. Filzmoser, R.G. Garrett, and R. Dutter (2008):
#' Statistical Data Analysis Explained.  \emph{Applied Environmental Statistics
#' with R}.  John Wiley and Sons, Chichester, 2008.
#'
#' P. Filzmoser, K. Hron, C. Reimann, R. Garrett (2009): Robust Factor Analysis
#' for Compositional Data.  \emph{Computers and Geosciences}, \bold{35} (9),
#' 1854--1861.
#' @keywords multivariate
#' @export
#' @importFrom MASS ginv
#' @importFrom robustbase covMcd
#' @examples
#'
#' data(expenditures)
#' x <- expenditures
#' res.rob <- pfa(x, factors=1)
#' res.cla <- pfa(x, factors=1, robust=FALSE)
#'
#'
#' ## the following produce always the same result:
#' res1 <- pfa(x, factors=1, covmat="covMcd")
#' res2 <- pfa(x, factors=1, covmat=robustbase::covMcd(pivotCoord(x))$cov) #' res3 <- pfa(x, factors=1, covmat=robustbase::covMcd(pivotCoord(x))) #' pfa <- function (x, factors, robust=TRUE, data = NULL, covmat = NULL, n.obs = NA, subset, na.action, start = NULL, scores = c("none", "regression", "Bartlett"), rotation = "varimax", maxiter = 5, control = NULL, ...) { z <- -pivotCoord(x) #ilr transformed data ## orthonormal basis: V <- matrix(0,nrow=ncol(x),ncol=ncol(x)-1) for (i in 1:ncol(V)){ V[1:i,i] <- 1/i V[i+1,i] <- (-1) V[,i] <- V[,i]*sqrt(i/(i+1)) } y <- as.matrix(z)%*%t(V) #clr transformed data # if(transformation=="ilr") x <- ilr(x) else if(transformation=="clr") x <- clr(x)$x.clr else stop("This transformation is not supported by pfa().")
x <- scale(x, scale=FALSE)  ## TODO: check this line if needed! (not in Peters code)
cn <- colnames(Lambda)
Phi <- attr(Lambda, "covariance")
ssq <- apply(Lambda, 2, function(x) -sum(x^2))
Lambda <- Lambda[, order(ssq), drop = FALSE]
colnames(Lambda) <- cn
neg <- colSums(Lambda) < 0
Lambda[, neg] <- -Lambda[, neg]
if (!is.null(Phi)) {
unit <- ifelse(neg, -1, 1)
attr(Lambda, "covariance") <- unit %*% Phi[order(ssq),
order(ssq)] %*% unit
}
Lambda
}
cl <- match.call()
na.act <- NULL
if (is.character(covmat) && length(covmat)==1){
if(covmat == "cov"){
cv <- cov(z)
n.obs <- nrow(x)
} else{
cv <- get(covmat)(z)$cov n.obs <- nrow(x) } } else if (is.list(covmat)) { if (any(is.na(match(c("cov", "n.obs"), names(covmat))))) stop("covmat is not a valid covariance list") cv <- covmat$cov
n.obs <- covmat$n.obs have.x <- FALSE } else if (is.matrix(covmat)) { cv <- covmat have.x <- FALSE } else if (is.null(covmat)) { if (missing(x)) stop("neither x nor covmat supplied") have.x <- TRUE if (inherits(x, "formula")) { mt <- terms(x, data = data) if (attr(mt, "response") > 0) stop("response not allowed in formula") attr(mt, "intercept") <- 0 mf <- match.call(expand.dots = FALSE) names(mf)[names(mf) == "x"] <- "formula" mf$factors <- mf$covmat <- mf$scores <- mf$start <- mf$rotation <- mf$control <- mf$... <- NULL
mf[[1]] <- as.name("model.frame")
mf <- eval(mf, parent.frame())
na.act <- attr(mf, "na.action")
z <- model.matrix(mt, mf)
}
else {
z <- as.matrix(x)
if (!missing(subset))
z <- z[subset, , drop = FALSE]
}
covmat <- cov.wt(z)
cv <- covmat$cov n.obs <- covmat$n.obs
}
else stop("covmat is of unknown type")
scores <- match.arg(scores)
if (scores != "none" && !have.x)
z <- x
sds <- sqrt(diag(cv))
cv <- cv/(sds %o% sds)
p <- ncol(cv)
dof <- 0.5 * ((p - factors)^2 - p - factors)
cn <- list(nstart = 1, trace = FALSE, lower = 0.005)
cn[names(control)] <- control
more <- list(...)[c("nstart", "trace", "lower", "opt", "rotate")]
if (length(more))
cn[names(more)] <- more
if (is.null(start)) {
start <- (1 - 0.5 * factors/p)/diag(solve(cv))
}
start <- as.matrix(start)
if (nrow(start) != p)
stop(paste("start must have", p, "rows"))
nc <- ncol(start)
if (nc < 1)
stop("no starting values supplied")
fit <- factanal.fit.principal1(cv, factors, p = p, start = start[,
1], iter.max = maxiter)
load <- fit$loadings if (rotation != "none") { rot <- do.call(rotation, c(list(load), cn$rotate))
rot$loadings else rot } fit$loadings <- sortLoadings(load)
class(fit$loadings) <- "loadings" fit$na.action <- na.act
if (scores != "none") {
Lambda <- fit$loadings zz <- z switch(scores, regression = { sc <- as.matrix(zz) %*% solve(cv, Lambda) if (!is.null(Phi <- attr(Lambda, "covariance"))) sc <- sc %*% Phi }, Bartlett = { psiinv <- ginv(fit$psi)
sc <- t(ginv(t(Lambda)%*%psiinv%*%Lambda)%*%t(Lambda)%*%psiinv%*%t(zz))
###            d <- 1/fit$uniquenesses ### tmp <- t(Lambda * d) ### sc <- t(solve(tmp %*% Lambda, tmp %*% t(zz))) }) rownames(sc) <- rownames(z) colnames(sc) <- colnames(Lambda) if (!is.null(na.act)) sc <- napredict(na.act, sc) fit$scores <- sc
}
if (!is.na(n.obs) && dof > 0) {
fit$STATISTIC <- (n.obs - 1 - (2 * p + 5)/6 - (2 * factors)/3) * fit$criteria["objective"]
fit$PVAL <- pchisq(fit$STATISTIC, dof, lower.tail = FALSE)
}
fit$n.obs <- n.obs fit$robust <- robust
fit\$call <- cl
fit
}


## Try the robCompositions package in your browser

Any scripts or data that you put into this service are public.

robCompositions documentation built on Jan. 13, 2021, 10:07 p.m.