B <- matrix(c(50, 67, 90, 98, 120,
55, 71, 93, 102, 129,
65, 76, 95, 105, 134,
50, 80, 102, 130, 138,
60, 82, 97, 135, 151,
65, 89, 106, 137, 153,
75, 95, 117, 133, 155), ncol=5, byrow=TRUE)
rownames(B) <- c("G1","G2","G3","G4","G5","G6","G7")
colnames(B) <- c("E1","E2","E3","E4","E5")
B2 = B
B2[1,1] = B2[2,1] = NA
m.pls <- plsdepot::nipals(B2, comps=5)
m2$values[,1]
# [1] 3.962960454 0.696489358 0.191825217 0.009365655 0.003420647
sum(m2$values[,1]^2) # 16.22705
# note, to match mixOmics eigenvalues, multiply by (nr-1)
# R> m2$values[,1] * 6
# [1] 23.77776272 4.17893615 1.15095130 0.05619393 0.02052388
# P loadings
m2$loadings
# T scores
m2$scores
# ----- timing example -----
# 100 x 100
set.seed(43)
Bbig <- matrix(rnorm(100*100), nrow=100)
Bbig[1,1] <- NA
system.time(nipals(Bbig, comps=1)) # Only 1 factor !
# user system elapsed
# 1.28 0.02 1.30
system.time(nipals(Bbig, comps=100)) # 100 factors
# user system elapsed
# 76.46 0.00 76.60
#'@title NIPALS: Non-linear Iterative Partial Least Squares
#'
#'@description
#'Principal Components Analysis with NIPALS algorithm
#'
#'@details
#'The function \code{nipals} performs Principal Components Analysis of a data
#'matrix that may contain missing values.
#'
#'@param Data A numeric matrix or data frame (which may contain
#'missing values).
#'@param comps Number of components to be calculated (by default 2)
#'@param scaled A logical value indicating whether to scale the data
#'(\code{TRUE} by default).
#'@return An object of class \code{"nipals"}, basically a list with the
#'following elements:
#'
#'When the analyzed data contain missing values, the help interpretation tools
#'(e.g. \code{cor.xt, disto, contrib, cos, dmod}) may not be meaningful, that
#'is to say, some of the results may not be coherent.
#'@return \item{values}{The pseudo eigenvalues}
#'@return \item{scores}{The extracted scores (i.e. components)}
#'@return \item{loadings}{The loadings}
#'@return \item{cor.xt}{Correlations between the variables and the scores}
#'@return \item{disto}{Squared distance of the observations to the origin}
#'@return \item{contrib}{Contributions of the observations (rows)}
#'@return \item{cos}{Squared cosinus}
#'@return \item{dmod}{Distance to the Model}
#'@author Gaston Sanchez
#'@seealso \code{\link{plot.nipals}}, \code{\link{plsreg1}}
#'@references Tenenhaus, M. (1998) \emph{La Regression PLS. Theorie et
#'Pratique.} Paris: Editions TECHNIP.
#'
#'Tenenhaus, M. (2007) \emph{Statistique. Methodes pour decrire, expliquer et
#'prevoir}. Paris: Dunod.
#'@export
#'@examples
#'
#' \dontrun{
#' # load datasets carscomplete and carsmissing
#' data(carscomplete) # complete data
#' data(carsmissing) # missing values
#'
#' # apply nipals
#' my_nipals1 = nipals(carscomplete)
#' my_nipals2 = nipals(carsmissing)
#'
#' # plot variables (circle of correlations)
#' plot(my_nipals1, what="variables", main="Complete data")
#' plot(my_nipals2, what="variables", main="Missing data")
#'
#' # plot observations with labels
#' plot(my_nipals1, what="observations", show.names=TRUE, main="Complete data")
#' plot(my_nipals2, what="observations", show.names=TRUE, main="Missing data")
#'
#' # compare results between my_nipals1 and my_nipals2
#' plot(my_nipals1$scores[,1], my_nipals2$scores[,1], type="n")
#' title("Scores comparison: my_nipals1 -vs- my_nipals2", cex.main=0.9)
#' abline(a=0, b=1, col="gray85", lwd=2)
#' points(my_nipals1$scores[,1], my_nipals2$scores[,1], pch=21,
#' col="#5592e3", bg = "#5b9cf277", lwd=1.5)
#' }
#'
nipals <- function(Data, comps = 2, scaled = TRUE, tol=1e-09) {
X = as.matrix(Data)
n = nrow(X)
p = ncol(X)
if (!n || !p) stop("dimension 0 in Data")
if (p==1) stop("\nData must be a numeric matrix or data frame")
if (is.null(colnames(X)))
colnames(X) = paste(rep("X",p), 1:p, sep="")
if (is.null(rownames(X)))
rownames(X) = rep(1:n)
if (!is.logical(scaled)) scaled = TRUE
if (scaled) X <- scale(X) else X <- scale(X, scale=FALSE)
nc = comps
if (mode(nc)!="numeric" || length(nc)!=1 ||
nc<=1 || (nc%%1)!=0 || nc>min(n,p))
nc <- 2
if (nc==n) nc = n - 1
if (any(is.na(X))) na.miss = TRUE else na.miss = FALSE
# =======================================================
# NIPALS algorithm
# =======================================================
# Prepare ingredients
X.old = X
Th = matrix(NA, n, nc) # scores
Ph = matrix(NA, p, nc) # loadings
eigvals = rep(NA, nc) # eigenvalues
# iterative process
for (h in 1:nc)
{
th.new = X.old[,1]
ph.old = rep(1, p)
ph.new = rep(1, p)
iter = 1
repeat
{
# missing data
if (na.miss)
{
for (j in 1:p)
{
i.exist = intersect(which(complete.cases(X[,j])), which(complete.cases(th.new)))
ph.new[j] = sum(X.old[i.exist,j] * th.new[i.exist]) / sum(th.new[i.exist]^2)
}
ph.new = ph.new / sqrt(sum(ph.new^2))
for (i in 1:n)
{
j.exist = which(complete.cases(X[i,]))
th.new[i] = sum(X.old[i,j.exist] * ph.new[j.exist]) / sum(ph.new[j.exist]^2)
}
} else {
# no missing data
ph.new = t(X.old) %*% th.new / sum(th.new^2)
ph.new = ph.new / sqrt(sum(ph.new^2))
th.new = X.old %*% ph.new
}
ph.aux = sum((ph.new - ph.old)^2)
ph.old = ph.new
# check convergence
# kw change 1e-06 to tol
if (ph.aux < tol || iter==100) break
iter = iter + 1
}
Th[,h] = th.new
Ph[,h] = ph.new
X.new = X.old - th.new %*% t(ph.new)
X.old = X.new
eigvals[h] = sum(th.new^2) / (n-1)
}
dimnames(Th) = list(rownames(X), paste(rep("t",nc), 1:nc, sep=""))
dimnames(Ph) = list(colnames(X), paste(rep("p",nc), 1:nc, sep=""))
# =======================================================
# eigenvalues
# =======================================================
# eigenvalues
eig.perc = 100 * eigvals / p
eigs = data.frame(values=eigvals, percentage=eig.perc, cumulative=cumsum(eig.perc))
rownames(eigs) = paste(rep("v",nc), 1:nc, sep="")
# =======================================================
# interpretation tools
# =======================================================
# correlation between components and variables
if (na.miss) {
cor.sco = matrix(NA, p, nc)
for (j in 1:p) {
i.exist = which(complete.cases(X[,j]))
cor.sco[j,] = round(cor(X[i.exist,j], Th[i.exist,]), 4)
}
dimnames(cor.sco) = list(colnames(X), colnames(Th))
} else
cor.sco = cor(X, Th)
# individuals contribution
ConInd = (100/n) * Th^2 %*% diag(1/eigvals)
dimnames(ConInd) = list(rownames(X), paste(rep("ctr",nc),1:nc,sep=".") )
# square distance of projections
D.proj = Th^2
D.orig = rowSums(X^2, na.rm=TRUE) # square distance of centered data
# square cosinus
Cos.inds = matrix(0, n, nc)
for (i in 1:n)
Cos.inds[i,] = D.proj[i,] / D.orig[i]
dimnames(Cos.inds) = list(rownames(X), paste(rep("cos2",nc),1:nc,sep=".") )
# Distance to the model: DModX
DMX2 = matrix(NA, n, nc)
DMX2[,1] = (D.orig * (1-Cos.inds[,1])) / (p-eigvals[1])
for (j in 2:nc)
DMX2[,j] = (D.orig * (1 - rowSums(Cos.inds[,1:j]))) / (p-sum(eigvals[1:j]))
# Distance to the Model X (normalized)
DMX = DMX2
dimnames(DMX) = list(rownames(X), colnames(Th))
# results
res = list(values = eigs,
scores = Th,
loadings = Ph,
cor.xt = cor.sco,
disto = D.orig,
contrib = ConInd,
cos = Cos.inds,
dmod = DMX)
class(res) = "nipals"
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.