# R/sca.R In sca: Simple Component Analysis

#### Documented in agglomblockallcritcorcompcovcompfirstpcresmaxmatrixnextdiffprint.simpcompquickcritscasccritshrinkdiffsimpvectorsortmatrix

```## -*- mode: R; ess-indent-level: 2 -*-
## \$Id: sca.R,v 1.19 2015/09/01 17:12:53 maechler Exp \$

sca <- function(S, b = if(interactive) 5, d = 0,
qmin = if(interactive) 0 else 5,
corblocks = if(interactive) 0 else 0.3,
criterion = c("csv", "blp"),
cluster = c("median","single","complete"),
withinblock = TRUE, invertsigns = FALSE,
interactive = dev.interactive())
{
## Interactive program for Simple Component Analysis (SCA)

## S: correlation (or variance-covariance) matrix of data to be analyzed
##
## b: number of block-components initially proposed
## d: number of difference-components initially proposed
##
## qmin>0:  take over parameter d
##	      Select number of difference-components such that
##	      number of components at least equal to qmin
##
## corblocks>0: take over parameter b
##		 Select number of block-components such that
##		 "between-blocks" correlations smaller than corblocks
##
## criterion="csv" for "corrected sum of variances"
## criterion="blp" for "best linear predictor"
##
## withinblock=TRUE if difference-components necessarily "within-block"
## withinblock=FALSE if difference-components not necessarily "within-block"
##
## cluster="single" for "single linkage clustering"
## cluster="median" for "median linkage clustering"
## cluster="complete" for "complete linkage clustering"
##
## invertsigns=FALSE: the sign of the variables are not inverted
## invertsigns=TRUE: the sign of some variables may be inverted in order to
##		avoid negative correlations
## interactive = TRUE: interactive program active
## interactive =FALSE: computation without user interaction

S <- as.matrix(S)
dS <- dim(S)
p <- dS[2]
## check the correlation matrix
if(dS[1] != p) stop("First argument 'S' must be a square matrix")
if(!is.logical(tst <- all.equal.numeric(S, t(S), 100 * .Machine\$double.eps))
|| !tst)
stop("First argument 'S' must be a symmetric matrix")
eS <- eigen(S,symmetric = TRUE)
### MM: FIXME pos.def. test
if(eS\$value[p] < (-0.0001))
warning("argument \"S\" is not a definite positive matrix")
invertind <- NULL
if(invertsigns && isdiffcomp(eS\$vec[,1])) {
p.plus <- sum(iPos <- eS\$vec[,1] >= 0)
pminus <- sum(!iPos)
invertind <- (1:p)[if(p.plus >= pminus) !iPos else iPos]
S[,invertind] <- -S[,invertind]
S[invertind,] <- -S[invertind,]
warning("some rows and columns in 'S' have been inverted to avoid negative values")
}

S0 <- S

## check values of parameters

cluster <- match.arg(cluster)
criterion <- match.arg(criterion)

if(corblocks > 1) corblocks <- 1
qmin <- round(qmin)
if(qmin > p) qmin <- p

## create labels

labvar <- dimnames(S)[[1]]
if(is.null(labvar)) labvar <- dimnames(S)[[2]]
if(is.null(labvar)) labvar <- paste0("V",1:p)
if(!is.null(invertind))
labvar[invertind] <- paste0("-",labvar[invertind])

##---------------Calculate first b blocks and first d differences-------------

## We keep everything (and index by 'I') such that
##  quick "back" and "forth" can be used interactively.
I <- 1
phase <- as.integer(1)
P <- list(sortmatrix(S,diag(p)))
dimnames(P[[I]]) <- list(labvar, paste0("B",1:p))
crit <- list(NA)
nextpc <- list(NA)

if(corblocks > 0) { ## determine b := #{block comp.} from correlations:
maxcor <- maxmatrix(abs(corcomp(S,P[[I]])))
while(I < p && maxcor\$val > corblocks) {
phase[I+1] <- 1
P[[I+1]] <- agglomblock(S,P[[I]],cluster)
crit[[I+1]] <- NA
nextpc[[I+1]] <- NA
I <- I+1
maxcor <- maxmatrix(abs(corcomp(S,P[[I]])))
}
b <- p+1-I
}
else { ## check and use the argument 'b'

b <- round(b)
if(b < 1) {
warning("at least one block component required, setting b = 1");
b <- 1
}
else if(b > p) b <- p

while(I <= (p-b)) { ## ie. (b < p) & !(corblocks > 0)
phase[I+1] <- 1
P[[I+1]] <- agglomblock(S,P[[I]],cluster)
crit[[I+1]] <- NA
nextpc[[I+1]] <- NA
I <- I+1
}
}

if(qmin > 0)
d <- qmin-b
else { ## check and use the argument 'd' :
d <- round(d)
if(b+d > p) ## should give a warning - at least if corblocks = 0 (?)
d <- p-b
}

if(d > 0) {
phase[I+1] <- 2
P[[I+1]] <- P[[I]]
crit[[I+1]] <- NA
nextpc[[I+1]] <- NA
I <- I+1
for(i in 1:d) {
phase[I+1] <- 2
tmp <- nextdiff(S,P[[I]],withinblock,criterion)
P[[I+1]] <- tmp[[1]]
crit[[I+1]] <- NA
nextpc[[I+1]] <- tmp[[2]]
I <- I+1
}
}

if(!interactive) {
P[[I+1]] <- P[[I]]
crit[[I+1]] <- allcrit(S,P[[I]],criterion,sortP = TRUE)
I <- I+1
}
else { ##  if(interactive)

clickvalid <- 0.1
clickentries <- 0.025
Imax <- I
cat("Use your graphics window for interaction with sca() from now on!\n")
}

## phase=1: Definition of block-components
## phase=2: Definition of difference-components
## phase=3: Deletion of difference-components
## phase=4: Final system (non-interactive)

while(phase[I] < 4) { ##-------- interactive algorithm -----------------------

##---------------Initialize the present step--------------------------------

## calculate values of criteria if the system has not yet been printed:

if(is.na(crit[[I]][[1]][1]))
crit[[I]] <- allcrit(S,P[[I]],criterion,sortP = TRUE)

## initilaize the screen for this step

Ptmp <- P[[I]]
qI <- ncol(Ptmp)
crittmp <- crit[[I]]
okcomp <- rep(TRUE, qI)
messg <- ""
warn <- ""
if(I > 1 && phase[I] == (phase[I-1]+1) && !eqmatrix(P[[I]], P[[I-1]])) {
warn <- "Warning: components have been reordered !"
}
r0 <- redrawMatrix(phase[I],I,Imax,Ptmp,crittmp,nextpc[[I]],
okcomp,messg,warn)
warn <- ""

## new   = T : the bottom left  button on the screen has been activated
## valid = T : the bottom right button on the screen has been activated
## back  = T : the  top   left  button on the screen has been activated
## forth = T : the  top   right button on the screen has been activated
new <- valid <- back <- forth <- FALSE

##---------------Treat the present step-------------------------------------

while(!new && !valid && !back && !forth) {

messg <- ""
replot <- FALSE
r <- unlist(locator(1)) ## << User Input  c(x, y)

j <- which.min(abs(r[1]-r0[[1]]))
i <- which.min(abs(r[2]-r0[[2]]))

##---------------Change the blocks----------------------------------------

if(phase[I] == 1 && I > 1 && Ptmp[i,j] == 0 &&
abs(r[1]-r0[[1]][j]) <= clickentries &&
abs(r[2]-r0[[2]][i]) <= clickentries)
{
replot <- TRUE
Ptmp[i,j] <- 1
Ptmp[i,-j] <- 0
okcomp <- isblockempty(Ptmp)
if(all(okcomp)) {
messg <- ""
crittmp <- allcrit(S,Ptmp,criterion,sortP = TRUE)
} else messg <- "some block is empty !"
}

##---------------Change the differences-----------------------------------

if(phase[I] == 2 && j > b &&
abs(r[1]-r0[[1]][j]) <= clickentries &&
abs(r[2]-r0[[2]][i]) <= clickentries)
{
replot <- TRUE
Ptmp[i,j] <- sign(Ptmp[i,j])-1
if(Ptmp[i,j] == (-2)) Ptmp[i,j] <- 1
Ptmp[,j] <- simpvector(Ptmp[,j])
okcomp[j] <- isdiffcomp(Ptmp[,j]) && (b == 1 || !withinblock ||
iswithinblock(Ptmp[,j],Ptmp[,1:b]))
if(!withinblock && sum(okcomp) < qI)
messg <- "this is not a difference !"
if(withinblock && sum(okcomp) < qI)
messg <- "this is not a (within-block) difference !"
if(sum(okcomp) == qI) {
messg <- ""
crittmp <- allcrit(S,Ptmp,criterion,sortP = TRUE)
}
}

##---------------Delete difference-components-----------------------------

if(phase[I] == 3 && j > b &&
abs((r[1]-r0[[1]])[j]) <= clickentries &&
min(abs((r[2]-r0[[2]]))) <= clickentries)
{
new <- TRUE
Ptmp <- Ptmp[, -j , drop = FALSE]
qI <- ncol(Ptmp)
## not needed: if(qI == 1) dimnames(Ptmp)[[2]] <- "B1"
crittmp <- allcrit(S,Ptmp,criterion,sortP = TRUE)

## MM FIXME: shouldn't we break out in this case anyway?
##    -----  otherwise can happen that
}

##---------------Select the next step------------------------------------

if(sum(okcomp) == qI &&
((phase[I] == 1 && qI > 1) ||
(phase[I] == 2 && qI < p)))
{
new <- r["x"] < 0 && all(abs(c(0,0) - r) <= clickvalid)
}

if(sum(okcomp) == qI)
valid <- r["x"] > 1 && all(abs(c(1,0) - r) <= clickvalid)

if(I > 1)
back  <- r["x"] < 0 && all(abs(c(0,1) - r) <= clickvalid)

if(I < Imax)
forth <- r["x"] > 1 && all(abs(c(1,1) - r) <= clickvalid)

if(phase[I] < 3 && new) messg <- "calculation in progress ..."
if(messg != "") replot <- TRUE
if(replot)
r0 <- redrawMatrix(phase[I],I,Imax,Ptmp,crittmp,nextpc[[I]],
okcomp,messg,warn)
} ## while(.. present step ..)

##---------------Prepare the next step when new=TRUE------------------------

if(new) {
if(phase[I] == 1) {
if(eqmatrix(Ptmp,P[[I]])) {
J <- I+1
}
else {
phase[I+1] <- 1
P[[I+1]] <- Ptmp
crit[[I+1]] <- crittmp
nextpc[[I+1]] <- NA
J <- I+2
}
phase[J] <- 1
P[[J]] <- agglomblock(S,Ptmp,cluster)
crit[[J]] <- allcrit(S,P[[J]],criterion,sortP = TRUE)
nextpc[[J]] <- NA
}
else if(phase[I] == 2) {
if(eqmatrix(Ptmp,P[[I]])) {
J <- I+1
} else { ## !eqmatrix(Ptmp,P[[I]])
phase[I+1] <- 2
P[[I+1]] <- Ptmp
crit[[I+1]] <- crittmp
nextpc[[I+1]] <- nextpc[[I]]
J <- I+2
}
phase[J] <- 2
tmp <- nextdiff(S,Ptmp,withinblock,criterion)
P[[J]] <- tmp[[1]]
crit[[J]] <- allcrit(S,P[[J]],criterion,sortP = TRUE)
nextpc[[J]] <- tmp[[2]]
}
else if(phase[I] == 3) {
J <- I+1
phase[J] <- 3
P[[J]] <- Ptmp
crit[[J]] <- crittmp
nextpc[[J]] <- NA
}
} ## if(new)

##---------------Prepare the next step when valid=TRUE----------------------

if(valid) {
if(phase[I] < 3) {
if(eqmatrix(Ptmp,P[[I]])) {
J <- I+1
} else {
phase[I+1] <- phase[I]
P[[I+1]] <- Ptmp
crit[[I+1]] <- crittmp
nextpc[[I+1]] <- nextpc[[I]]
J <- I+2
}
phase[J] <- phase[I]+1
P[[J]] <- sortmatrix(S,Ptmp)
crit[[J]] <- allcrit(S,P[[J]], criterion, sortP = TRUE)
nextpc[[J]] <- NA
}
else if(phase[I] == 3) {
J <- I+1
phase[J] <- 4
P[[J]] <- Ptmp
crit[[J]] <- crittmp
nextpc[[J]] <- NA
}

}## if(valid)

##---------------Prepare the next step when back=TRUE or forth=TRUE---------

if(back)
J <- if(eqmatrix(Ptmp,P[[I]])) I - 1 else I
if(forth) J <- I+1

##---------------End of the step--------------------------------------------

if(valid && phase[I] == 1) b <- dim(P[[I]])[2]
if(new || valid) Imax <- J
I <- J

} ## while( phase[] < 4 )

##---------------End of the process-------------------------------------------

if(interactive) {
okcomp <- rep(TRUE,dim(P[[I]])[2])
messg <- ""
r0 <- redrawMatrix(phase[I],I,Imax,P[[I]],crit[[I]],nextpc[[I]],
okcomp,messg,warn)
}

dimnames(S0) <- list(labvar,labvar)
cNam <- dimnames(P[[I]])[[2]]
dimnames(crit[[I]]\$corsc) <- list(cNam,cNam)
crit[[I]]\$maxcor\$row <- cNam[crit[[I]]\$maxcor\$row]
crit[[I]]\$maxcor\$col <- cNam[crit[[I]]\$maxcor\$col]

r <- list(simplemat = P[[I]],
allcrit  = crit[[I]],
nblock   = b,
ndiff    = dim(P[[I]])[2] - b,
criterion = criterion,
cluster  = cluster,
withinblock = withinblock,
invertsigns = invertsigns,
vardata  = S0)
class(r) <- "simpcomp"
r
} ## sca()

##----------------------------------------------------------------------------
##-------------------PRINTING SCREEN PROCEDURE--------------------------------
##----------------------------------------------------------------------------

redrawMatrix <- function(phase, I, Imax, P, crit, nextpc,
okcomp, messg, warn)
{
## print the screen for a given system

if(0 > (phase <- as.integer(phase)) || phase > 4)
stop("'phase' must be in 1:4")
if(is.null(dP <- dim(P))) stop("'P' must be a component matrix")
p <- dP[1]
q <- dP[2]
if(q > p) stop("q > p : should never happen")
if(length(okcomp) != q)
warning("** okcomp[] has wrong length ", length(okcomp),

## determine  b := #{block components} <= q
if(phase == 1)
b <- q
else {
b <- 1
while(b < q && okcomp[b+1] && !isdiffcomp(P[,b+1]))
b <- b+1
}

dn <- dimnames(P)

maxvar <- 20
cex <- min(1, maxvar/p)
maxchar <- 8
cexlab <- cex* min(1, maxchar/ nchar(dn[[1]]))

## colors for block-, difference-, and error - components and commands
col <- c(b = 2, d = 4, e = 6, c = 4)

## create messg
if(messg == "")
messg <- paste(q,"components:    SCA =", percent(crit\$cumsc,1),
"    PCA =", percent(crit\$cumpc,1),
"    Opt =", percent(crit\$opt,1),
"    Max cor =", round(crit\$maxcor\$val,2),
"(", dn[[2]][crit\$maxcor\$row],
"-", dn[[2]][crit\$maxcor\$col],")")

## coordinates strictly inside (0,1) (will be returned)
xcord <- seq(0,1,len = q+2)[-c(1,q+2)] # length q
ycord <- seq(1,0,len = p+2)[-c(1,p+2)] # length p

## plot the labels

plot(0,1,xlim = c(-0.05,1.05), ylim = c(0,1),
type = "n", axes = FALSE, xlab = "", ylab = "")

text(rep(0,p),ycord,dn[[1]],cex = cexlab)
text(xcord,1,dn[[2]],cex = cex)

## plot the system

for(j in 1:q) {
if(okcomp[j]) {
text(rep(xcord[j],p), ycord, P[,j],
col = col[if(j <= b) "b" else "d"], cex = cex)
text(rep(xcord[j],p),rep(0,p), paste(percent(crit\$varsc[j],1)),
cex = cex)
}
else { ## !okcomp[j]
text(rep(xcord[j],p),ycord,P[,j],col = col["e"],cex = cex)
}
}
if(phase == 2 && !is.na(nextpc[1])) {
text(rep(1,p),ycord, round(100 * nextpc), cex = cex)
}

## plot the buttons

if(phase < 4 && I > 1)
text(0,1,"BACK",cex = 1,col = col["c"])
if(phase == 1 && sum(okcomp) == q && q > 1)
text(0,0,"GROUP",cex = 1,col = col["c"])
else if(phase == 2 && sum(okcomp) == q && q < p)
text(0,0,"NEXT",cex = 1,col = col["c"])

if(phase < 4 && I < Imax)
text(1,1,"FORW",cex = 1,col = col["c"])
if(phase < 4 && sum(okcomp) == q)
text(1,0,"VALID",cex = 1,col = col["c"])

## create titles

if(phase <= 3)
tit1 <- paste0("Stage ",phase," (Step ",I,"):")
tit1 <-
switch(phase,
paste(tit1,"Definition of block-components"),	## 1
paste(tit1,"Definition of difference-components"),	## 2
paste(tit1,"Deletion of difference-components"),	## 3
"Simple components - Final system")	## 4

cl1 <- "Click on 0s to modify blocks"
cl2 <- paste0("Click on entries of ",dn[[2]][b+1]," to change signs")
cl3 <- paste0("Click on entries of ",dn[[2]][b+1],"-",dn[[2]][q]," to change signs")
cl4 <- paste0("Click on entries of ",dn[[2]][b+1]," to delete component")
cl5 <- paste0("Click on entries of ",dn[[2]][b+1],"-",dn[[2]][q]," to delete component")
cl6 <- "Click on GROUP to agglomerate blocks"
cl7 <- paste0("Click on NEXT to calculate D", q+1)

cl11 <- "There is only one block"
cl12 <- "Maximum number of components attained"
cl13 <- "No difference-components to delete"

tit2 <-
switch(phase,
{ ## phase 1:
cl8 <- "Click on VALID to go to Stage 2"
if(q > 1 && q == p)	paste(cl6,"-",cl8)
else if(q > 1 && q < p)  paste(cl1,"-",cl6,"-",cl8)
else if(q == 1)  paste(cl11,"-",cl8)
else warning("q (= ncol(P)) is invalid in phase 1")
},

{ ## phase 2 :
cl9 <- "Click on VALID to go to Stage 3"
if(q > b+1) {
if(q < p)	paste(cl3,"-",cl7,"-",cl9)
else ## q == p
paste(cl12,"-",cl3,"-",cl9)
}
else if(q == b+1) {
if(q < p)	paste(cl2,"-",cl7,"-",cl9)
else ## q == p
paste(cl12,"-",cl2,"-",cl9)
}
else if(q == b) {
if(q < p)	paste(cl7,"-",cl9)
else ## q == p
paste(cl12,"-",cl9)
}
## else q < b : impossible
},

{ ## phase 3 :
cl10 <- "Click on VALID to end the program"
if(q >  b+1)	 paste(cl5,"-",cl10)
else if(q == b+1)  paste(cl4,"-",cl10)
else if(q == b)  paste(cl13,"-",cl10)
## else q < b : impossible
},
{ ## phase 4 :
""
})

## print titles and messgs

title(main = paste(tit1,"\n",tit2),	cex = 0.7)
title(sub = messg, mgp = c(1,1,0),  cex = 1)
title(sub = warn,  mgp = c(2.5,1,0), cex = 1)

list(x= xcord, y=ycord)
} ## redrawMatrix()

##----------------------------------------------------------------------------
##-------------------SCA PROCEDURES-------------------------------------------
##----------------------------------------------------------------------------

agglomblock <- function(S, P, cluster = c("median","single","complete"))
{
## agglomerate the two block-components according to cluster

S <- as.matrix(S)
P <- as.matrix(P)
p <- dim(P)[1]
b <- dim(P)[2]
if(b == 1) return(P)
cluster <- match.arg(cluster)

R <- cov2cor(S)
g <- vector("list", b)
for(i in 1:b) g[[i]] <- (1:p)[P[,i] == 1]
cormax <- -Inf
for(i in 1:(b-1)) {
for(j in (i+1):b) {
newcor <-
switch(cluster,
"single"  = max   (R[g[[i]],g[[j]]]),
"median"  = median(R[g[[i]],g[[j]]]),
"complete"= min   (R[g[[i]],g[[j]]]))
if(newcor >= cormax) {
cormax <- newcor
i0 <- i
j0 <- j
}
}
}
P[,i0] <- P[,i0]+P[,j0]
P <- sortmatrix(S, P[,-j0])
dimnames(P)[[2]] <- paste0("B",1:(b-1))
P
}

##----------------------------------------------------------------------------

nextdiff <- function(S,P, withinblock, criterion = c("csv", "blp"))
{
## calculate the next simple difference-component
S <- as.matrix(S)
P <- as.matrix(P)
criterion <- match.arg(criterion)
zcomp <- firstpcres(S,P)
## Eigen vectors are only unique up to  * (+- 1)
## make sure the component is "unique": first (non-zero) entry positive:
k <- 1; while(zcomp[k] == 0) k <- k + 1
if(zcomp[k] < 0) zcomp <- -zcomp

q <- ncol(P)

if(!withinblock) {
scompmax <- shrinkdiff(zcomp,S,P,criterion)\$scompmax
nextpc <- zcomp
}
else { ## withinblock :
k <- 1
critmax <- 0
while(k <= q && !isdiffcomp(P[,k])) {
not0 <- P[,k] != 0
if(sum(not0) >= 2) {
zc0 <- zcomp
zc0[!not0] <- 0
sd.z <- shrinkdiff(zc0, S,P, criterion)
if(sd.z\$critmax >= critmax) {
critmax <- sd.z\$critmax
scompmax <- sd.z\$scompmax
nextpc <- zc0
}
}
k <- k+1
}
}

P <- cbind(P,scompmax)
dimnames(P)[[2]][q+1] <- paste0("D",q+1)

list(P=P, nextpc=nextpc)
}

##----------------------------------------------------------------------------

shrinkdiff <- function(zcomp,S,P, criterion) {
## shrink a component towards a simple difference-component

S <- as.matrix(S)
P <- as.matrix(P)
scompmax <- simpvector(sign(zcomp))
critmax <- quickcrit(scompmax,S,P,criterion)
abs.z <- abs(zcomp)
cut <- sort(abs.z[abs.z > 0])
if(length(cut) > 2) {
for(i in 2:(length(cut)-1)) {
newscomp <- simpvector(sign(zcomp*(abs.z >= cut[i])))
newcrit <- quickcrit(newscomp,S,P,criterion)
if(isdiffcomp(newscomp) && newcrit >= critmax) {
critmax <- newcrit
scompmax <- newscomp
}
}
}
list(scompmax=scompmax, critmax=critmax)
}

##----------------------------------------------------------------------------
##-------------------MATRICES AND VECTORS PROCEDURES--------------------------
##----------------------------------------------------------------------------

simpvector <- function(x) {
## simplify the vector x

x <- sign(x)
npos <- sum(x > 0)
nneg <- sum(x < 0)
if(nneg > 0) x[x > 0] <- nneg
if(npos > 0) x[x < 0] <- -npos
## MM: ensure we use exact integer arithmetic
if(nneg > 0 || npos > 0) {
while(sum(x%%2) == 0) x <- x %/% 2
while(sum(x%%3) == 0) x <- x %/% 3
while(sum(x%%5) == 0) x <- x %/% 5
while(sum(x%%7) == 0) x <- x %/% 7
if(nneg > 1) while(sum(x%%nneg) == 0) x <- x %/% nneg
if(npos > 1) while(sum(x%%npos) == 0) x <- x %/% npos
}
x
}

##----------------------------------------------------------------------------

eqmatrix <- function(A,B) {
A <- as.matrix(A)
B <- as.matrix(B)
all(dim(A) == dim(B)) && all(A == B)
}

##----------------------------------------------------------------------------

isblockempty <- function(P)
apply(as.matrix(P), 2, function(v) max(v) > 0)

##----------------------------------------------------------------------------

isdiffcomp <- function(x) { prod(range(x)) < 0 }

##----------------------------------------------------------------------------

iswithinblock <- function(d,P) {
P <- as.matrix(P)
b <- ncol(P)
## FIXME: faster using partial sort() :
return(sort(crossprod(abs(d), abs(P)))[b-1] == 0)
}

##----------------------------------------------------------------------------
##-------------------COMPUTATIONAL PROCEDURES---------------------------------
##----------------------------------------------------------------------------

allcrit <- function(S,P, criterion, sortP = TRUE)
{
## calculate all kinds of criteria

S <- as.matrix(S)
P <- normmatrix(P)
q <- ncol(P)
tr.S <- sum(diag(S))
varpc <- eigen(S, symmetric = TRUE, only.values=TRUE)\$values[1:q] / tr.S
varsc <- diag(covcomp(S,P)) / tr.S
cumpc <- sum(varpc)## == cumsum(varpc)[q]
cumsc <- sccrit(S,P,criterion,sortP)[q]
opt <- cumsc/cumpc
corsc <- corcomp(S,P)
maxcor <- maxmatrix(abs(corsc))
list(varpc=varpc, varsc=varsc,
cumpc=cumpc, cumsc=cumsc,
opt=opt, corsc=corsc, maxcor=maxcor)
}

##----------------------------------------------------------------------------

sccrit <- function(S,P, criterion, sortP = TRUE) {
## return value of criterion for components P on S (cumulative)

S <- as.matrix(S)
if(sortP) P <- sortmatrix(S,P)
P <- normmatrix(P)
q <- ncol(P)
varsc <- numeric(q)
for(k in 1:q) {
Ak <- if(k > 1) {
Pk <- P[,1:(k-1)]
P.S <- crossprod(Pk, S)
S - crossprod(P.S, solve(P.S %*% Pk, P.S))
} else S
pk <- P[,k]
varsc[k] <-
switch(criterion,
"csv" = crossprod(pk,Ak) %*% pk,
"blp" = (crossprod(pk, Ak %*% Ak) %*% pk) / (crossprod(pk,Ak) %*% pk))
}

cumsum(varsc) / sum(diag(S))
}

##----------------------------------------------------------------------------

quickcrit <- function(newcomp,S,P, criterion) {
## return additional contribution of a new component to the system P on S

S <- as.matrix(S)
Pk <- normmatrix(P)
Ak <- crossprod(Pk, S)
Ak <- S - crossprod(Ak, solve(Ak %*% Pk, Ak))

pk <- normmatrix(newcomp)
switch(criterion,
"csv" = crossprod(pk, Ak)%*% pk,
"blp" = (crossprod(pk, Ak %*% Ak) %*% pk) / (crossprod(pk, Ak)%*% pk)
)
}

##----------------------------------------------------------------------------

firstpcres <- function(S,P) {
## return the first principal component of residuals of S given components P

S <- as.matrix(S)
P <- normmatrix(P)
A <- crossprod(P, S)
A <- S - crossprod(A, solve(A %*% P, A))

## Return first eigen vector :
eigen(A, symmetric = TRUE)\$vectors[,1]
}

corcomp <- function(S,P) {
## return the correlation matrix of the components P on S

P <- normmatrix(P)
A <- crossprod(P, as.matrix(S)) %*% P ## the covariance
cov2cor(A)
}

##----------------------------------------------------------------------------

## FIXME: efficiency : this is only used as  diag(covcomp(S,P)) !!!
## .../MATRIX/TODO:   if A %*% B is square,  diag(A %*% B) === colSums(t(A) * B)
covcomp <- function(S,P) {
## return the variance-covariance matrix of the components P on S

P <- normmatrix(P)
## return
crossprod(P, as.matrix(S)) %*% P
}

##----------------------------------------------------------------------------

normmatrix <- function(P) {
## normalize the matrix P

P <- as.matrix(P)
sweep(P,2,apply(P,2,function(v) { sqrt(sum(v^2)) }),"/")
}

##----------------------------------------------------------------------------
##-------------------SORTING PROCEDURES---------------------------------------
##----------------------------------------------------------------------------

maxmatrix <- function(R) {
## return position and value of the largest element of a correlation matrix R
## (without taking into account the diagonal elements)

R <- as.matrix(R)
p <- dim(R)[1]
if(p <= 1) {
row <- NA
col <- NA
val <- NA
} else { ## (p > 1)
i <- sort.list(R)[p*(p-1)]
row <- i%%p+p*(i%%p == 0)
col <- 1+i%/%(p+1/p)
val <- R[row,col]
if(row > col) { ## swap
tmp <- col; col <- row; row <- tmp
}
}
list(row=row, col=col, val=val)
}

##----------------------------------------------------------------------------

sortmatrix <- function(S,P) {
## sort a matrix P by decreasing variances of components
## (where the block-components are first)

S <- as.matrix(S)
P <- as.matrix(P)
q <- ncol(P)
diffcomp <- apply(P,2,isdiffcomp)
iblock <- (1:q)[!diffcomp]
i.diff <- (1:q)[diffcomp]
d <- diag(covcomp(S,P))
P[,] <- P[,c(iblock[sort.list(-d[iblock])],
i.diff[sort.list(-d[i.diff])])]
P
}

##----------------------------------------------------------------------------
##-------------------PRINTING CRITERIA PROCEDURE------------------------------
##----------------------------------------------------------------------------

percent <-  {
if(is.R())## work around R's format(*, nsmall = . ) bug:
function(p, d = 0, sep=" ")
paste(formatC(round(100 * p, d), format="f", digits= d), "%",
sep = sep)
else
function(p, d = 0, sep=" ")
paste(format(round(100 * p, d), nsmall= d), "%", sep = sep)
}

print.simpcomp <- function(x, ndec = 2, ...) {

## print method for the result list of the sca() function above

catn <- function(...) cat(..., "\n")

labcrit <-
switch(x\$criterion,
"csv" = "corrected sum of variances",
"blp" = "best linear predictor")

catn("------------------------------------------------------------")
catn("Simple Component Analysis")
catn("------------------------------------------------------------")
catn("Optimality criterion        :",labcrit)
catn("Within-block differences    :",x\$withinblock)
catn("Possible invertion of signs :",x\$invertsigns)
catn("Number of block-components  :",x\$nblock)
catn("Number of diff.-components  :",x\$ndiff)
catn("------------------------------------------------------------")
catn("Simple matrix:")
print(x\$simplemat)

AC <- x\$allcrit
catn("------------------------------------------------------------")
catn("Variance principal components:\n", percent(AC\$varpc,ndec))
catn("Variance simple components   :\n", percent(AC\$varsc,ndec))
catn("------------------------------------------------------------")
catn("Extracted variability PCA:", percent(AC\$cumpc,ndec))
catn("Extracted variability SCA:", percent(AC\$cumsc,ndec))
catn("Optimality SCA           :", percent(AC\$opt,ndec))
catn("------------------------------------------------------------")
catn("Correlations simple components:")
print(round(AC\$corsc,ndec))
catn("------------------------------------------------------------")
cat("Max (abs) correlation: ")
catn(round(AC\$maxcor\$val,ndec), "(",AC\$maxcor\$row,"-",AC\$maxcor\$col,")")
catn("------------------------------------------------------------")
invisible(x)
}
```

## Try the sca package in your browser

Any scripts or data that you put into this service are public.

sca documentation built on Jan. 14, 2023, 5:07 p.m.