Nothing
mbpls <- function(dudiY, ktabX, scale = TRUE, option = c("uniform", "none"), scannf = TRUE, nf = 2) {
## -------------------------------------------------------------------------------
## Some tests
##--------------------------------------------------------------------------------
if (!inherits(dudiY, "dudi"))
stop("object 'dudi' expected")
if (!inherits(ktabX, "ktab"))
stop("object 'ktab' expected")
if (any(row.names(ktabX) != row.names(dudiY$tab)))
stop("ktabX and dudiY must have the same rows")
if (!(all.equal(ktabX$lw/sum(ktabX$lw), dudiY$lw/sum(dudiY$lw))))
stop("ktabX and dudiY must have the same row weights")
if (nrow(dudiY$tab) < 6)
stop("Minimum six rows are required")
if (any(ktabX$blo < 2))
stop("Minimum two variables per explanatory block are required")
if (!(is.logical(scale)))
stop("Non convenient selection for scaling")
if (!(is.logical(scannf)))
stop("Non convenient selection for scannf")
if (nf < 0)
nf <- 2
## Only works with centred pca (dudi.pca with center=TRUE) with uniform row weights
#if (!any(dudi.type(dudiY$call) == c(3,4)))
# stop("Only implemented for centred pca")
# Vérifier la formule / arrondi
#if (any(dudiY$lw != 1/nrow(dudiY$tab)))
# stop("Only implemented for uniform row weights")
option <- match.arg(option)
## -------------------------------------------------------------------------------
## Arguments and data transformation
## -------------------------------------------------------------------------------
## Preparation of the data frames
Y <- as.matrix(dudiY$tab)
nblo <- length(ktabX$blo)
Xk <- lapply(unclass(ktabX)[1 : nblo], scalewt, wt = ktabX$lw, center = TRUE, scale = scale)
nr <- nrow(Y)
ncolY <- ncol(Y)
## Block weighting
if (option[1] == "uniform"){
Y <- Y / sqrt(sum(dudiY$eig)) ## Here we use biased variance. We should use Y <- Y / sqrt(nr/(nr-1)*sum(dudiY$eig)) for unbiased estimators
for (k in 1 : nblo){
Xk[[k]] <- Xk[[k]] / sqrt((nblo/nr) * sum(diag(crossprod(Xk[[k]])))) ## same : Xk[[k]] <- Xk[[k]] / sqrt((nblo/(nr-1)) * sum(diag(crossprod(Xk[[k]])))) for unbiased estimators
}
}
X <- cbind.data.frame(Xk)
colnames(X) <- col.names(ktabX)
ncolX <- ncol(X)
maxdim <- qr(X)$rank
##-----------------------------------------------------------------------
## Prepare the outputs
##-----------------------------------------------------------------------
## Yc1 (V in Bougeard et al): was c1
## lY (U): was ls
## Ajout: de Yco (cov(Y, lX)) -> norme total = eig
## lX (T): was li
## faX (W*): was Wstar
## TlX (Tk): was Tk
## Tfa (Wk): was Wk Tc1 !!!!!!!!!
## Ajout: cov2 (cov^2(lY, Tl1))
## XYcoef: (Beta) was beta
## bip, bipc
## vip, vipc
## Suppression: W
## Suppression: l1
## Suppression de C (remplacé par Yco)
## Suppression de Ak (remplacé par cov2)
dimlab <- paste("Ax", 1:maxdim, sep = "")
res <- list(tabX = X, tabY = as.data.frame(Y), nf = nf, lw = ktabX$lw, X.cw = ktabX$cw, blo = ktabX$blo, rank = maxdim, eig = rep(0, maxdim), TL = ktabX$TL, TC = ktabX$TC)
res$Yc1 <- matrix(0, nrow = ncolY, ncol = maxdim, dimnames = list(colnames(dudiY$tab), dimlab))
res$lX <- res$lY <- matrix(0, nrow = nr, ncol = maxdim, dimnames = list(row.names(dudiY$tab), dimlab))
res$cov2 <- Ak <- matrix(0, nrow = nblo, ncol = maxdim, dimnames = list(names(ktabX$blo), dimlab))
res$Tc1 <- lapply(1:nblo, function(k) matrix(0, nrow = ncol(Xk[[k]]), ncol = maxdim, dimnames = list(colnames(Xk[[k]]), dimlab)))
res$TlX <- rep(list(matrix(0, nrow = nr, ncol = maxdim, dimnames = list(row.names(dudiY$tab), dimlab))), nblo)
res$faX <- matrix(0, nrow = ncolX, ncol = maxdim, dimnames = list(col.names(ktabX), dimlab))
lX1 <- res$lX
W <- res$faX
##-----------------------------------------------------------------------
## Compute components and loadings by an iterative algorithm
##-----------------------------------------------------------------------
Y <- as.matrix(Y)
X <- as.matrix(X)
f1 <- function(x) crossprod(x * res$lw, Y)
for(h in 1 : maxdim) {
## iterative algorithm
## Compute the matrix M for the eigenanalysis
M <- lapply(lapply(Xk, f1), crossprod)
M <- Reduce("+", M)
## Compute the loadings V and the components U (Y dataset)
eig.M <- eigen(M)
if (eig.M$values[1] < sqrt(.Machine$double.eps)) {
res$rank <- h-1 ## update the rank
break
}
res$eig[h] <- eig.M$values[1]
res$Yc1[, h] <- eig.M$vectors[, 1, drop = FALSE]
res$lY[, h] <- Y %*% res$Yc1[, h]
## Compute the loadings Wk and the components Tk (Xk datasets)
covutk <- rep(0, nblo)
for (k in 1 : nblo) {
res$Tc1[[k]][, h] <- crossprod(Xk[[k]] * res$lw, res$lY[, h])
res$Tc1[[k]][, h] <- res$Tc1[[k]][, h] / sqrt(sum(res$Tc1[[k]][, h]^2))
res$TlX[[k]][, h] <- Xk[[k]] %*% res$Tc1[[k]][, h]
covutk[k] <- crossprod(res$lY[, h] * res$lw, res$TlX[[k]][, h])
res$cov2[k, h] <- covutk[k]^2
}
for(k in 1 : nblo) {
Ak[k, h] <- covutk[k] / sqrt(sum(res$cov2[,h]))
res$lX[, h] <- res$lX[, h] + Ak[k, h] * res$TlX[[k]][, h]
}
lX1[, h] <- res$lX[, h] / sqrt(sum(res$lX[, h]^2))
## use ginv to avoid NA in coefficients (collinear system)
W[, h] <- tcrossprod(MASS::ginv(crossprod(X)), X) %*% res$lX[, h]
## Deflation of the Xk datasets on the global components T
Xk <- lapply(Xk, function(y) lm.wfit(x = as.matrix(res$lX[, h]), y = y, w = res$lw)$residuals)
X <- as.matrix(cbind.data.frame(Xk))
}
##-----------------------------------------------------------------------
## Compute regressions coefficients
##-----------------------------------------------------------------------
## Use of the original (and not the deflated) datasets X and Y
X <- as.matrix(res$tabX)
Y <- as.matrix(res$tabY)
## Computing the regression coefficients of X onto the global components T (Wstar)
## res$faX <- lm.wfit(x = X, y = res$lX, w = res$lw)$coefficients ## lm is not used to avoid NA coefficients in the case of not full rank matrices
res$faX[, 1] <- W[, 1, drop = FALSE]
A <- diag(ncolX)
if(maxdim >= 2){
for(h in 2:maxdim){
a <- crossprod(lX1[, h-1], X) / sqrt(sum(res$lX[, h-1]^2))
A <- A %*% (diag(ncolX) - W[, h-1] %*% a)
res$faX[, h] <- A %*% W[, h]
X <- X - tcrossprod(lX1[, h-1]) %*% X
}
}
## Computing the regression coefficients of X onto Y (Beta)
res$Yco <- t(Y) %*% diag(res$lw) %*% res$lX
norm.li <- diag(crossprod(res$lX * sqrt(res$lw)))
##res$C <- t(lm.wfit(x = res$lX, y = Y, w = res$lw)$coefficients)
##res$XYcoef <- lapply(1:ncolY, function(x) t(apply(sweep(res$faX, 2 , res$C[x,], "*"), 1, cumsum)))
res$XYcoef <- lapply(1:ncolY, function(x) t(apply(sweep(res$faX, 2 , res$Yco[x,] / norm.li, "*"), 1, cumsum)))
names(res$XYcoef) <- colnames(dudiY$tab)
##-----------------------------------------------------------------------
## Variable and block importances
##-----------------------------------------------------------------------
## Block importances
res$bip <- Ak^2
if (nblo == 1 | res$rank ==1)
res$bipc <- res$bip
else
res$bipc <- t(sweep(apply(sweep(res$bip, 2, res$eig, "*") , 1, cumsum), 1, cumsum(res$eig), "/"))
## Variable importances
WcarreAk <- res$faX^2 * res$bip[rep(1:nblo, ktabX$blo),]
res$vip <- sweep(WcarreAk, 2, colSums(WcarreAk), "/")
if (nblo == 1 | res$rank ==1)
res$vipc <- res$vip
else
res$vipc <- t(sweep(apply(sweep(res$vip, 2, res$eig, "*") , 1, cumsum), 1, cumsum(res$eig), "/"))
##-----------------------------------------------------------------------
## Modify the outputs
##-----------------------------------------------------------------------
if (scannf == TRUE){
barplot(res$eig[1:res$rank])
cat("Select the number of global components: ")
res$nf <- as.integer(readLines(n = 1))
}
if(res$nf > res$rank)
res$nf <- res$rank
## keep results for the nf dimensions (except eigenvalues and lX)
res$eig <- res$eig[1:res$rank]
res$lX <- res$lX[, 1:res$rank]
res$Tc1 <- do.call("rbind", res$Tc1)
res$TlX <- do.call("rbind", res$TlX)
res <- modifyList(res, lapply(res[c("Yc1", "Yco", "lY", "Tc1", "TlX", "cov2", "faX", "vip", "vipc", "bip", "bipc")], function(x) x[, 1:res$nf, drop = FALSE]))
res$XYcoef <- lapply(res$XYcoef, function(x) x[, 1:res$nf, drop = FALSE])
res$call <- match.call()
class(res) <- c("multiblock", "mbpls")
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.