#' End-member modelling analysis algorithm.
#'
#' A multivariate data set (m samples composed of n variables) is decomposed by
#' eigenspace analysis and modelled with a given number of end-members (q).
#' Several steps of scaling, transformation, normalisation, eigenspace
#' decomposition, factor rotation, data modelling and evaluation are performed.
#'
#' The parameter \code{Vqn} is useful when \code{EMMA} shall be performed with
#' a set of prior unscaled end-members, e.g. from other data sets that are to
#' be used as reference or when modelling a data set with mean end-members, as
#' in the output of \code{robust.loadings}.\cr
#' The rotation type \code{Varimax} was used by Dietze et al. (2012). In this
#' R package, one out of the rotations provided by the package GPArotation
#' is possible, as well. However, tests showed that the rotation type has no
#' dramatic consequences for the result.\cr
#' The function values \code{$loadings} and \code{$scores} are redundant. They
#' are essentially the same as \code{$Vqsn} and \code{$Mqs}. However, they are
#' included for user convenience.
#'
#' @param X \code{Numeric} matrix, input data set with m samples (rows)
#' and n variables (columns).
#'
#' @param q \code{Numeric} scalar, number of end-members to be modelled.
#'
#' @param l \code{Numeric} scalar or vector, weight transformation
#' limit, i.e. quantile. Set to zero if omitted.
#'
#' @param c \code{Numeric} scalar, constant sum scaling parameter, e.g.
#' 1, 100, 1000. Set to 100 if omitted.
#'
#' @param Vqn \code{Numeric} matrix, optional unscaled user-defined
#' end-member loadings. If provided, these are used instead of model-derived
#' ones. See details.
#'
#' @param classunits \code{Numeric} vector, optional class units
#' (e.g. micrometers or phi-units) of the same length as columns of \code{X}.
#'
#' @param ID \code{Numeric} or character vector, optional sample IDs of the
#' same length as rows of X.
#'
#' @param EM.ID \code{Character} vector, end-member names. If present,
#' these will be set as row-names of the output data set and used in the
#' legend text.
#'
#' @param rotation \code{Character} scalar, rotation type, default is
#' "Varimax". See details.
#'
#' @param plot \code{Logical} scalar, optional graphical output of the results,
#' default is FALSE. If set to TRUE, end-member loadings and end-member scores
#' are plotted.
#'
#' @param \dots Additional arguments passed to the plot function. Since the
#' function returns two plots some additional graphical parameters must be
#' specified as vector with the first element for the first plot and the second
#' element for the second plot.
#'
#' @return A list with numeric matrix objects. \item{loadings}{Normalised
#' rescaled end-member loadings.} \item{scores}{Rescaled end-member scores.}
#' \item{Vqn}{Normalised end-member loadings.} \item{Vqsn}{Normalised rescaled
#' end-member loadings.} \item{Mqs}{Rescaled end-member scores.}
#' \item{Xm}{Modelled data.} \item{modes}{Mode class of end-member loadings.}
#' \item{Mqs.var}{Explained variance of end-members} \item{Em}{Absolute
#' row-wise model error.} \item{En}{Absolute column-wise model error.}
#' \item{RMSEm}{row-wise root mean square erroe} \item{RMSEn}{column-wise root
#' mean square erroe} \item{Rm}{Row-wise (sample-wise) explained variance.}
#' \item{Rn}{Column-wise (variable-wise) explained variance.} \item{ol}{Number
#' of overlapping end-members.}
#'
#' @author Michael Dietze, Elisabeth Dietze
#' @seealso \code{\link{test.parameters}}, \code{\link{rotations}},
#' \code{\link{eigen}}, \code{\link{nnls}}
#' @references Dietze E, Hartmann K, Diekmann B, IJmker J, Lehmkuhl F, Opitz S,
#' Stauch G, Wuennemann B, Borchers A. 2012. An end-member algorithm for
#' deciphering modern detrital processes from lake sediments of Lake Donggi
#' Cona, NE Tibetan Plateau, China. Sedimentary Geology 243-244: 169-180.\cr
#' Klovan JE, Imbrie J. 1971. An Algorithm and FORTRAN-IV Program for
#' Large-Scale Q-Mode Factor Analysis and Calculation of Factor Scores.
#' Mathematical Geology 3: 61-77. Miesch AT. 1976. Q-Mode factor analysis of
#' geochemical and petrologic data matrices with constant row sums. U.S.
#' Geological Survey Professsional Papers 574.
#' @keywords EMMA
#' @examples
#'
#' ## load example data and set phi-vector
#' data(example_X)
#' phi <- seq(from = 1, to = 10, length.out = ncol(X))
#'
#' ## perform EMMA with 5 end-members
#' EM <- EMMA(X = X, q = 5, l = 0.05, c = 100, plot = TRUE)
#'
#' ## perform EMMA with 4 end-members and more graphical settings
#' EM <- EMMA(X = X, q = 4, l = 0.05, c = 100,
#' plot = TRUE,
#' EM.ID = c("EM 1", "EM 2", "EM 3", "EM 4"),
#' classunits = phi,
#' xlab = c(expression(paste("Class [", phi, "]")), "Sample ID"),
#' cex = 0.7,
#' col = rainbow(n = 4))
#'
#' @export EMMA
EMMA <- function(
X,
q,
l,
c,
Vqn,
classunits,
ID,
EM.ID,
rotation = "Varimax",
plot = FALSE,
...
) {
## check for l vs. lw
if("lw" %in% names(list(...))) {
stop('Parameter "lw" is depreciated. Use "l" instead.')
}
## check/set default values
if(missing(l) == TRUE) {
l <- 0
}
if(missing(c) == TRUE) {
c <- 100
}
## check/set class units vector and test for consistency
if(missing(classunits) == TRUE) {
classunits <- 1:ncol(X)
}
if(ncol(X) != length(classunits)) {
stop("Units vector is not of same length as variables.")
}
## check/set ID vector and test for consistency
if(missing(ID) == TRUE) {
ID <- as.character(1:nrow(X))
}
if(nrow(X) != length(ID)) {
stop("ID vector is not of same length as variables.")
}
## check that no zero-only columns exist
if(any(colSums(X) == 0)) {
stop("X contains columns with only zeros.")
}
## End-member modelling
## rescale X to constant sum c
X <- X / rowSums(X) * c
## calculate weight limit quantiles column-wise
ls <- apply(X <- X,
MARGIN = 2,
FUN = quantile,
probs = c(l, 1 - l),
type = 5)
## perform weight-transformation
W <- t((t(X) - ls[1,]) / (ls[2,] - ls[1,]))
## create similarity matrix as outer product
A <- t(W) %*% W
## perform eigenspace decomposition
EIG <- try(eigen(A), silent = TRUE)
if(class(EIG)[1] == "try-error") {
stop("Cannot compute eigen space! Consider decreasing l.")
}
## assign raw eigenvectors V
V <- EIG$vectors[,order(seq(ncol(A), 1, -1))]
Vf <- V[,order(seq(ncol(A), 1, -1))]
## rotate eigenvectors and assign factor loadings
Vr <- do.call(what = rotation,
args = list(Vf[,1:q]))
Vq <- Vr$loadings[,seq(q, 1, -1)]
## rescale factor loadings and transpose matrix
Vqr <- t(Vq) / colSums(Vq) * c
## optionally calculate normalised factor loadings
if(missing(Vqn) == TRUE) {
## calculate row-wise min and max values
Vqr.min <- matrixStats::rowMins(x = Vqr)
Vqr.max <- matrixStats::rowMaxs(x = Vqr)
## calculate Vqn
Vqn <- (Vqr - Vqr.min) / (Vqr.max - Vqr.min)
} else {
## or use respective values from input data
Vqn <- Vqn
}
## get eigenvector scores as non-negative least squares estimate
Mq <- t(apply(W, MARGIN = 1, FUN = function(row) {
limSolve::nnls(t(Vqn), as.vector(t(row)))$X
}))
## modelled values matrix
Wm <- Mq %*% Vqn
## calculate scaling vector after Miesch (1976)
ls <- apply(X <- X,
MARGIN = 2,
FUN = quantile,
probs = c(l, 1 - l),
type = 5)
s <- c - sum(ls[1,]) / rowSums(Vqn * (ls[2,] - ls[1,]))
## rescale end-member loadings after Miesch (1976)
Vqs <- t(apply(X = cbind(t(t(s)), Vqn),
MARGIN = 1,
FUN = function(row) {
row[1] * row[2:length(row)] * (ls[2, ] - ls[1, ]) + ls[1, ]
}))
## normalise end-member loadings
Vqsn <- Vqs / rowSums(Vqs) * c
## rescale end-member scores
Mqs <- t(t(Mq) / s) / rowSums(t(t(Mq) / s))
## Model evaluation
## get number of overlapping end-member loadings
ol <- sum(apply(X = Vqsn,
MARGIN = 1,
FUN = function(row) {
if (row[row == max(row)] < max(row[row == max(row)])) {
1
} else {
0
}
}))
## calculate modelled data set
Xm <- Mqs %*% Vqs
## normalise modelled data set
Xm <- Xm / rowSums(Xm) * c
## evaluate absolute error and explained variances
Mqs.var <- colVars(x = Mqs)
Mqs.var <- Mqs.var / sum(Mqs.var) * 100 # explained scores variance
Em <- rowMeans(x = abs(X - Xm)) # absolute row-wise model error
En <- rowMeans(x = abs(X - Xm)) # absolute column-wise model error
Rm <- diag(cor(t(X), t(Xm))^2) # row-wise explained variance
Rn <- diag(cor(X, Xm)^2) # column-wise explained variance
RMSEm <- sqrt(mean(x = Em^2, na.rm = TRUE))
RMSEn <- sqrt(mean(x = En^2, na.rm = TRUE))
mRm <- mean(Rm, na.rm = TRUE)
mRn <- mean(Rn, na.rm = TRUE)
mRt <- mean(c(mRm, mRn), na.rm = TRUE)
## Sort output by mode position for consistent output
## create auxiliary x-unit and index variable, calculate index
x.unit <- 1:ncol(X)
ind <- numeric(q)
for (i in 1:q) {
ind[i] <- x.unit[Vqsn[i,] == max(Vqsn[i,])]
}
ind.sort <- seq(1, nrow(Vqsn))[order(ind)]
Vqsn <- Vqsn[ind.sort,]
Vqn <- Vqn[ind.sort,]
Mqs <- Mqs[,ind.sort]
Mqs.var <- Mqs.var[ind.sort]
if(missing(EM.ID) == TRUE) {
EM.ID <- paste("EM", 1:q, sep = "")
}
## determine mode class for all end-member loadings
modes <- numeric(q)
for(i in 1:q) {modes[i] <- classunits[Vqsn[i,1:ncol(
Vqsn)] == max(Vqsn[i,1:ncol(Vqsn)])]}
## optionally, plot end-member loadings and scores
if(plot == TRUE) {
## read additional arguments list and check/set default values
extraArgs <- list(...)
if("main" %in% names(extraArgs)) {
main <- extraArgs$main
} else {
main <- c("End-member loadings",
"End-member scores")
}
if("xlab" %in% names(extraArgs)) {
xlab <- extraArgs$xlab
} else {
xlab <- c("Class",
"Samples")
}
if("ylab" %in% names(extraArgs)) {
ylab <- extraArgs$ylab
} else {
ylab <- c("Relative amounts",
"Relative amounts")
}
if("ylim" %in% names(extraArgs)) {
ylim <- extraArgs$ylim
} else {
ylim <- rbind(c(0, max(Vqsn, na.rm = TRUE)),
c(-0.04, 1.04))
}
if("log" %in% names(extraArgs)) {
log <- extraArgs$log
} else {
log <- ""
}
if("col" %in% names(extraArgs)) {
col <- extraArgs$col
} else {
col <- seq(1, q)
}
if("cex" %in% names(extraArgs)) {
cex <- extraArgs$cex
} else {
cex <- 1
}
if("lty" %in% names(extraArgs)) {
lty <- extraArgs$lty
} else {
lty <- rep(1, q)
}
if("lwd" %in% names(extraArgs)) {
lwd <- extraArgs$lwd
} else {
lwd <- rep(1, q)
}
## setup plot area
## save origial parameters
par.old <- op <- par(no.readonly = TRUE)
## adjust margins
par(mar = c(3.9, 4.5, 3.1, 1.1),
cex.axis = cex,
cex.lab = cex,
cex.main = 1.2 * cex,
cex.sub = cex)
## define layout
layout(rbind(c(1, 1, 2, 2),
c(1, 1, 2, 2),
c(3, 3, 4, 4),
c(3, 3, 4, 4),
c(3, 3, 4, 4),
c(3, 3, 4, 4),
c(5, 5, 5, 5))
)
## plot col-wise explained variance
plot(x = classunits,
y = Rn,
type = "b",
main = paste("Class-wise explained variance (mean = ",
signif(x = mean(Rn * 100), digits = 2), " %)", sep = ""),
xlab = xlab[1],
ylab = expression(R^2),
log = log,
cex = cex)
## plot row-wise explained variance
plot(x = 1:nrow(X),
y = Rm,
type = "b",
main = paste("Sample-wise explained variance (mean = ",
signif(x = mean(Rm * 100), digits = 2), " %)", sep = ""),
xlab = xlab[2],
ylab = expression(R^2),
cex = cex)
## plot end-member loadings
plot(classunits, Vqsn[1,], type = "l",
main = main[1],
xlab = xlab[1],
ylab = ylab[1],
ylim = as.vector(ylim[1,]),
log = log,
col = col[1],
lty = lty[1],
lwd = lwd[1],
cex = cex)
if(nrow(Vqsn) >= 2) {
for(i in 2:nrow(Vqsn)) {
lines(x = classunits,
y = Vqsn[i,],
col = col[i],
lty = lty[i],
lwd = lwd[i],
cex = cex)
}
}
## plot end-member scores
barplot(t(Mqs),
main = main[2],
xlab = xlab[2],
ylab = ylab[2],
ylim = as.vector(ylim[2,]),
col = col,
border = NA,
space = 0,
names.arg = ID,
horiz = FALSE)
box(which = "plot")
## plot legend-like information
par(mar = c(1, 1, 1, 1))
plot(NA,
xlim = c(0, 1),
ylim = c(0, 1),
axes = FALSE,
ann = FALSE,
frame.plot = TRUE)
mtext(line = -2,
text = "End-member ID (mode position | explained variance)",
cex = 0.8 * cex,
font = 2)
legend(x = "bottom",
legend = paste(EM.ID, " (", signif(x = modes, digits = 2), " | ",
signif(x = Mqs.var, digits = 2), " %)", sep = ""),
col = col,
lty = lty,
lwd = lwd,
horiz = TRUE,
box.lty = 0,
cex = cex)
## reset plot parameters
par(par.old)
}
## optionally, assign EM.IDs to Vqsn and Vqn matrices
if(missing(EM.ID) == FALSE) {
rownames(Vqn) <- EM.ID
rownames(Vqsn) <- EM.ID
}
## Output list with numeric matrix objects.
list(loadings = Vqsn, ## Normalised rescaled end-member loadings.
scores = Mqs, ## Rescaled end-member scores.
Vqn = Vqn, ## Normalised end-member loadings.
Vqsn = Vqsn, ## Normalised rescaled end-member loadings.
Mqs = Mqs, ## Rescaled end-member scores.
Xm = Xm, ## Modelled data.
modes = modes, ## Mode class of end-member loadings.
Mqs.var = Mqs.var, ## Explained variance of end-members
Em = Em, ## Absolute row-wise model error.
En = En, ## Absolute column-wise model error.
RMSEm = RMSEm, ## Sample-wise RMSE.
RMSEn = RMSEn, ## Class-wise RMSE.
Rm = Rm, ## Row-wise (sample-wise) explained variance.
Rn = Rn, ## Column-wise (variable-wise) explained variance.
mRm = mRm, ## Average class-wise explained variance.
mRn = mRn, ## Average sample-wise average explained variance.
mRt = mRt, ## Total average explained variance.
ol = ol) ## Number of overlapping end-members.
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.