Nothing
#############################################################################################################
# Authors:
# Florian Rohart, The University of Queensland, The University of Queensland Diamantina Institute, Translational Research Institute, Brisbane, QLD
# Benoit Gautier, The University of Queensland, The University of Queensland Diamantina Institute, Translational Research Institute, Brisbane, QLD
# Amrit Singh, the University of British Columbia, Vancouver, Canada
# Kim-Anh Le Cao, The University of Queensland, The University of Queensland Diamantina Institute, Translational Research Institute, Brisbane, QLD
#
# created: 20-07-2014
# last modified: 04-10-2017
#
# Copyright (C) 2014
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#############################################################################################################
#############################################################################################################
# Functions modified from RGCCA R-library
# sgcca(), sgccak()
#
# Functions acquired from RGCCA R-library, see 'internal_mint.block_helpers.R'
# cov2(), initsvd(), crossprod(),
# defl.select(),
#############################################################################################################
internal_mint.block = function (A, indY = NULL, design = 1 - diag(length(A)), tau=NULL,#rep(1, length(A)),
ncomp = rep(1, length(A)), scheme = "horst", scale = TRUE,
init = "svd.single", tol = 1e-06,
mode = "canonical", max.iter = 100,study = NULL, keepA = NULL,
penalty = NULL, all.outputs = FALSE, misdata = NULL, is.na.A = NULL, ind.NA = NULL, ind.NA.col = NULL, remove.object=NULL)
{
# A: list of matrices
# indY: integer, pointer to one of the matrices of A
# design: design matrix, links between matrices. Diagonal must be 0
# tau: numeric vector of length the number of blocks in \code{X}. Each regularization parameter will be applied on each block and takes the value between 0 (no regularisation) and 1. If tau = "optimal" the shrinkage paramaters are estimated for each block and
# ncomp: vector of ncomp, per matrix
# scheme: a function "g", refer to the article (thanks Benoit)
# scale: do you want to scale ? mean is done by default and cannot be changed (so far)
# init: one of "svd" or "random", initialisation of the algorithm
# tol: nobody cares about this
# mode: canonical, classic, invariant, regression
# max.iter: nobody cares about this
# study: factor for each matrix of A, must be a vector
# keepA: keepX of spls for each matrix of A. must be a list. Each entry must be of the same length (max ncomp)
# penalty: numeric vector of length the number of blocks in \code{X}. Each penalty parameter will be applied on each block and takes the value between 0 (no variable selected) and 1 (all variables included).
# all.outputs: calculation of non-essential outputs (e.g. explained variance, loadings.Astar, etc)
# misdata: optional. any missing values in the data? list, misdata[[q]] for each data set
# is.na.A: optional. where are the missing values? list, is.na.A[[q]] for each data set (if misdata[[q]] == TRUE)
# ind.NA: optional. which rows have missing values? list, ind.NA[[q]] for each data set.
# ind.NA.col: optional. which col have missing values? list, ind.NA.col[[q]] for each data set.
names(ncomp) = names(A)
time = FALSE
if(time) time1=proc.time()
# center the data per study, per matrix of A, scale if scale=TRUE, option
mean_centered = lapply(A, function(x){mean_centering_per_study(x, study, scale)})
if(time) time1bis=proc.time()
if(time) print("scaling part1")
if(time) print(time1bis-time1)
A = lapply(mean_centered, function(x){as.matrix(x$concat.data)})
#save rownames study
mean_centered.rownames.study = vector("list", nlevels(study))
for (m in 1:nlevels(study))
mean_centered.rownames.study[[m]] = mean_centered[[1]]$rownames.study[[m]]
rm(mean_centered) #free memory
ni = table(study) #number of samples per study
### Start: Initialization parameters
pjs = sapply(A, NCOL)
nb_ind = NROW(A[[1]])
J = length(A)
R = A # R: residuals matrices, will be a list of length ncomp
N = max(ncomp)
AVE_inner = AVE_outer = rep(NA, max(ncomp))
# keepA[[comp]] is a matrix where each row is all the keepX the test over the block (each block is a column)
#number of models to be tested: either a keepA per component, or multiple (e.g. in tune functions)
number.models.per.comp = sapply(keepA, nrow)
one.model = !any( number.models.per.comp !=1)
if(time) print("one.model")
if(time) print(one.model)
AVE_X = crit = loadings.partial.A = variates.partial.A = tau.rgcca = list()
P = loadings.A = loadings.Astar = variates.A = vector("list", J)
if(one.model) # more outputs that what is needed for tune functions
{
for (k in 1:J)
variates.A[[k]] = matrix(NA_real_, nb_ind, N)
for (k in 1:J)
{
loadings.A[[k]] = matrix(NA_real_, pjs[[k]], N)
if(all.outputs)
P[[k]] = loadings.Astar[[k]]= matrix(NA_real_, pjs[[k]], N)
}
for (k in 1:J)
{
loadings.partial.A[[k]] = variates.partial.A[[k]] = vector("list", length = nlevels(study))
for(m in 1:nlevels(study))
{
loadings.partial.A[[k]][[m]] = matrix(NA_real_,nrow = NCOL(A[[k]]), ncol = N)
variates.partial.A[[k]][[m]] = matrix(NA_real_,nrow = ni[m], ncol = N)
}
}
} else {
for (k in 1:J)
{
variates.A[[k]] = matrix(NA_real_, nb_ind, sum(number.models.per.comp))
loadings.A[[k]] = matrix(NA_real_, pjs[[k]], sum(number.models.per.comp))
}
loadings.partial.A = variates.partial.A = NULL # not needed for tune functions
}
ndefl = ncomp - 1
J2 = J-1
if (is.vector(tau))
tau = matrix(rep(tau, N), nrow = N, ncol = length(tau), byrow = TRUE)
#save(list=ls(),file="temp.Rdata")
# if missing values are not given as input (only when direct call to a (mint).(block).(s)pls(da)), we search for them here (takes time)
if(is.null(misdata) & is.null(is.na.A) & is.null(ind.NA) & is.null(ind.NA.col))
{
misdata = sapply(A, anyNA) # Detection of missing data per block
misdata.all = any(misdata) # is there any missing data overall
if(time) print(misdata.all)
#save(list=ls(),file="temp.Rdata")
if (misdata.all)
{
is.na.A = temp = vector("list",length=length(A))
is.na.A[misdata] = lapply(A[misdata], is.na) # size n*p, which entry is na. might be none, but at least one in all the block will be a TRUE
temp[misdata] = lapply(is.na.A[misdata], function(x){which(x,arr.ind=TRUE)})
ind.NA = lapply(temp, function(x){unique(x[,1])})
ind.NA.col = lapply(temp, function(x){unique(x[,2])})
#ind.NA = ind.NA.col = list()
#for(q in 1:J)
#{
# ind.NA[[q]] = which(apply(is.na.A[[q]], 1, sum) >0) # indice of the row that have missing values. used in the calculation of loadings
# ind.NA.col[[q]] = which(apply(is.na.A[[q]], 2, sum) >0) # indice of the col that have missing values. used in the deflation
#}
}else {
is.na.A = NULL
ind.NA = ind.NA.col = NULL
}
} else{
misdata.all = any(misdata)
}
if(time) print(misdata)
if(all.outputs & J==2 & nlevels(study) == 1 & one.model) #(s)pls(da) models, we calculate mat.c
{
if(misdata.all)
{
p.ones = rep(1, ncol(A[[1]]))
is.na.X = is.na.A[[1]]
}
mat.c = matrix(0, nrow = ncol(A[[1]]), ncol = N, dimnames = list(colnames(A[[1]], paste0("comp ", 1:N))))
} else {mat.c = NULL}
### End: Initialization parameters
iter=NULL
compteur = 0
for (comp in 1 : N)
{
if(time) cat("====================================================================================================\n")
if(time) print(paste0("component ", comp))
if(time) cat("====================================================================================================\n")
#save(list=ls(),file="temp.Rdata")
if(misdata.all)# replace NA in A[[q]] by 0
for(j in c(1:J)[misdata])
R[[j]][is.na.A[[j]]]=0 # faster than using replace
#R = lapply(1:J, function(q){if(misdata[q]) {replace(R[[q]], is.na.A[[q]], 0)}else{R[[q]]}}) # if missing data, R is the one replace by 0 where NA are supposed to be
# initialisation_by_svd, get the loadings.A
loadings.A.init = initialisation_by_svd(R, indY, misdata, is.na.A, init = init)
# loop on keepA[[comp]]: multiple values per block and we go through them. Need to have the same number of values per block.
# we assume keepA[[comp]] is a grid here: columns are the blocks, rows are the different keepX
for(ijk.keepA in 1:nrow(keepA[[comp]]))
{
compteur = compteur +1
if(time) cat("---------------------------------------------------------\n")
if(time) print(paste0("keepA ", ijk.keepA))
if(time) cat("---------------------------------------------------------\n")
keepA.ijk = keepA[[comp]][ijk.keepA,]
### start - repeat/convergence
if (is.null(tau))
{
mint.block.result = sparse.mint.block_iteration(R, design, study = study, loadings.A = loadings.A.init,
keepA = keepA.ijk, #keepA is one value per block
scheme = scheme, max.iter = max.iter, tol = tol, penalty = penalty,
misdata=misdata, is.na.A=is.na.A, ind.NA = ind.NA,
all.outputs = all.outputs)
} else {
mint.block.result = sparse.rgcca_iteration(R, design, tau = if (is.matrix(tau)){tau[comp, ]} else {"optimal"},
scheme = scheme, init = init, tol = tol,
max.iter = max.iter, penalty = penalty,
keepA = keepA.ijk, all.outputs = all.outputs)
}
### end - repeat/convergence
if(time) time3 = proc.time()
if(time) time4 = proc.time()
if(time) print("one comp")
if(time) print(time4-time3)
if(time) time4 = proc.time()
if(one.model)
{
# reshape outputs
for (k in 1 : J)
{
loadings.A[[k]][, comp] = mint.block.result$loadings.A[[k]]
variates.A[[k]][, comp] = mint.block.result$variates.A[, k]
if(is.null(tau))
{
# recording loadings.partials, $Ai$study[,ncomp]
# recording variates.partials, $Ai[,ncomp]
for(k in 1:J)
{
for(m in 1:nlevels(study))
{
loadings.partial.A[[k]][[m]][, comp] = matrix(mint.block.result$loadings.partial.A.comp[[k]][[m]], ncol=1)
variates.partial.A[[k]][[m]][, comp] = matrix(mint.block.result$variates.partial.A.comp[[k]][[m]], ncol=1)
}
}
}
}
} else {
# no record of partial component for multilple models, for gain of memory
for (k in 1 : J)
{
loadings.A[[k]][, compteur] = mint.block.result$loadings.A[[k]]
variates.A[[k]][, compteur] = mint.block.result$variates.A[, k]
}
}
crit[[comp]] = mint.block.result$crit
tau.rgcca[[comp]] = mint.block.result$tau
if(all.outputs)
AVE_inner[comp] = mint.block.result$AVE_inner
if(all.outputs & J==2 & nlevels(study) == 1 & one.model)# mat.c, (s)pls(da)
{
if(misdata.all) #only one model, so misdata[1]=TRUE
{
R.temp = R[[1]]
R.temp[is.na.X] = 0
c = crossprod(R.temp, variates.A[[1]][,comp])
rm(R.temp) #free memory
#save(list=ls(),file="temp.Rdata")
t.norm = rep(crossprod(variates.A[[1]][,comp]), length(c))
if(length(ind.NA.col[[1]])>0) # should always be true
{
temp = drop(variates.A[[1]][,comp]) %o% rep(1, length(ind.NA.col[[1]])) #p*n -> p * where there are NA
temp[is.na.X[,ind.NA.col[[1]],drop=FALSE]] = 0
t.norm[ind.NA.col[[1]]] = apply(temp,2, crossprod)
}
c = c / t.norm
mat.c[,comp] = c
} else {
mat.c[,comp] <- t(crossprod(variates.A[[1]][,comp], R[[1]])) / drop(crossprod (variates.A[[1]][,comp]))
}
} else {
mat.c = NULL
}
# deflation if there are more than 1 component and if we haven't reach the max number of component (N)
if (N != 1 & comp != N)
{
if(time) time4 = proc.time()
defla.result = defl.select(yy=mint.block.result$variates.A, rr=R, nncomp=ndefl, nn=comp, nbloc = J, indY = indY, mode = mode, aa = mint.block.result$loadings.A,
misdata=misdata, is.na.A=is.na.A, ind.NA = ind.NA.col)
R = defla.result$resdefl
if(!(all.outputs & one.model)) #rm only if not used in the loop below
defla.result$resdefl=NULL #free memory
if(time) time5 = proc.time()
if(time) print("deflation")
if(time) print(time5-time4)
}
if(all.outputs & one.model) #loadings.Astar
{
for (k in 1 : J)
{
if (N != 1)
P[[k]][, comp - 1] = defla.result$pdefl[[k]]
}
if (comp == 1)
{
for (k in 1 : J)
loadings.Astar[[k]][, comp] = mint.block.result$loadings.A[[k]]
} else {
for (k in 1 : J)
loadings.Astar[[k]][, comp] = mint.block.result$loadings.A[[k]] - loadings.Astar[[k]][, (1 : comp - 1), drop = F] %*% drop(t(loadings.A[[k]][, comp]) %*% P[[k]][, 1 : (comp - 1), drop = F])
}
} else {
loadings.Astar = NULL
}
iter = c(iter, mint.block.result$iter)
} ### End loop on keepA
} ### End loop on ncomp
#### any model
# loadings.A[[block]][1:p, all.keepA.tested]
# variates.A[[block]][1:n, all.keepA.tested]
#### a single model
# loadings.partial.A[[block]][[study]][, 1:ncomp]
# variates.partial.A[[block]][[study]][, 1:ncomp]
# loadings.Astar[[block]][, 1:ncomp]
if(one.model)
{
# only one model
shave.matlist = function(mat_list, nb_cols) mapply(function(m, nbcomp) m[, 1:nbcomp, drop = FALSE], mat_list, nb_cols, SIMPLIFY = FALSE)
shave.veclist = function(vec_list, nb_elts) mapply(function(m, nbcomp) m[1:nbcomp], vec_list, nb_elts, SIMPLIFY = FALSE)
for (k in 1:J)
{
rownames(loadings.A[[k]]) = colnames(A[[k]])
if(all.outputs)
rownames(loadings.Astar[[k]]) = colnames(A[[k]])
rownames(variates.A[[k]]) = rownames(A[[k]])
colnames(variates.A[[k]]) = colnames(loadings.A[[k]]) = paste0("comp ", 1:max(ncomp))
if(all.outputs)
AVE_X[[k]] = apply(cor(A[[k]], variates.A[[k]])^2, 2, mean)
if (is.null(tau))
{
names(loadings.partial.A[[k]]) = names(variates.partial.A[[k]]) = levels(study)
for (m in 1:nlevels(study))
{
rownames(loadings.partial.A[[k]][[m]]) = colnames(A[[k]])
colnames(loadings.partial.A[[k]][[m]]) = paste0("comp ", 1:max(ncomp))
rownames(variates.partial.A[[k]][[m]]) = mean_centered.rownames.study[[m]]
colnames(variates.partial.A[[k]][[m]]) = paste0("comp ", 1:max(ncomp))
}
}
}
variates.A = shave.matlist(variates.A, ncomp)
if(all.outputs)
{
# AVE
outer = matrix(unlist(AVE_X), nrow = max(ncomp))
for (j in 1 : max(ncomp))
AVE_outer[j] = sum(pjs * outer[j, ])/sum(pjs)
AVE_X = shave.veclist(AVE_X, ncomp)
AVE = list(AVE_X = AVE_X, AVE_outer = AVE_outer, AVE_inner = AVE_inner)
names(AVE$AVE_X) = names(A)
loadings.Astar = shave.matlist(loadings.Astar, ncomp)
#calcul explained variance
A_split=lapply(A, study_split, study) #split the data per study
expl.A=lapply(1:length(A),function(x){
if (nlevels(study) == 1)
{
temp = suppressWarnings(explained_variance(A[[x]], variates = variates.A[[x]], ncomp = ncomp[[x]]))
}else{
temp = lapply(1:nlevels(study), function(y){
suppressWarnings(explained_variance(A_split[[x]][[y]], variates = variates.partial.A[[x]][[y]], ncomp = ncomp[[x]]))})
temp[[length(temp)+1]] = explained_variance(A[[x]], variates = variates.A[[x]], ncomp = ncomp[[x]])
names(temp) = c(levels(study), "all data")
}
temp
})
names(expl.A) = names(A)
} else {
expl.A = NULL
AVE = NULL
}
### Start: output
names(loadings.A) = names(variates.A) = names(A)
if (is.null(tau))
names(loadings.partial.A) = names(variates.partial.A) = names(A)
names = lapply(1:J, function(x) {colnames(A[[x]])})
names(names) = names(A)
names[[length(names) + 1]] = row.names(A[[1]])
names(names)[length(names)] = "indiv"
} else {
# multiple models (tune)
#### any model
# loadings.A[[block]][1:p, all.keepA.tested]
# variates.A[[block]][1:n, all.keepA.tested]
#### a single model
# loadings.partial.A[[block]][[study]][, 1:ncomp]
# variates.partial.A[[block]][[study]][, 1:ncomp]
# loadings.Astar[[block]][, 1:ncomp]
keepA.names = unlist(lapply(1:N, function(x){
paste(paste0("comp",x),apply(keepA[[x]],1,function(x) paste(x,collapse="_")), sep=":")
}))
for(k in 1:J)
colnames(loadings.A[[k]]) = colnames(variates.A[[k]]) = keepA.names
names(loadings.A) = names(variates.A) = names(iter) = names(A)
expl.A = NULL
AVE = NULL
}
out = list(A = A, indY = indY, ncomp = ncomp, mode = mode,
keepA = keepA,
variates = variates.A, loadings = loadings.A,#shave.matlist(loadings.A, ncomp),
variates.partial= if(is.null(tau)) {variates.partial.A} ,loadings.partial= if(is.null(tau)) {loadings.partial.A},
loadings.star = loadings.Astar,
names = list(sample = row.names(A[[1]]), colnames = lapply(A, colnames), blocks = names(A)),
tol = tol, iter=iter, max.iter=max.iter,
design = design,
scheme = scheme, crit = crit, AVE = AVE, mat.c = mat.c, #defl.matrix = defl.matrix,
init = init,
scale = scale, tau = if(!is.null(tau)) tau.rgcca, study = study,
explained_variance = expl.A)
### End: Output
return(out)
}
# ----------------------------------------------------------------------------------------------------------
# sgccak - Runs sgccak() modified from RGCCA
# inputs: A - list of datasets each with the same number of rows (samples)
# design - design matrix
# ncomp - vector specifying number of components to keep per datasets
# outputs:
# ----------------------------------------------------------------------------------------------------------
sparse.mint.block_iteration = function (A, design, study = NULL, loadings.A, keepA = NULL,
scheme = "horst", max.iter = 100, tol = 1e-06,
misdata = NULL, is.na.A = NULL, ind.NA = NULL,
penalty=NULL, all.outputs = FALSE)
{
# keepA is a list of length the number of blocks. Each entry is a vector of numbers: variables to select for that block (component is fixed)
# study is a vector
# == == == == no check needed as this function is only used in internal_mint.block, in which the checks are conducted
time=FALSE
### Start: Initialization parameters
J = length(A)
J2 = J-1
pjs = sapply(A, NCOL)
AVE_X = rep(0, J)
if (!is.null(penalty))
penalty = penalty * sqrt(pjs)
iter = 1
converg = crit = numeric()
variates.A = Z = matrix(0, NROW(A[[1]]), J)
g = function(x) switch(scheme, horst = x, factorial = x^2, centroid = abs(x))
# study split
A_split = lapply(A, study_split, study)
n = lapply(A_split, function(x){lapply(x,nrow)})
p = lapply(A,ncol)
nlevels_study = nlevels(study)
### End: Initialization parameters
if(time) time2 = proc.time()
### End: Initialisation "a" vector
variates.partial.A.comp = NULL
loadings.partial.A.comp = list()
for (q in 1:J)
{
if(misdata[q])
{
loadings.temp = loadings.A[[q]]
variates.A.temp = A[[q]] %*% loadings.temp
# we only want the diagonal, which is the norm of each column of temp
# loadings.A.norm = crossprod(temp)
# variates.A[, q] = variates.A.temp / diag(loadings.A.norm)
#only calculating the ones where there's a NA
d.variates.A.norm = rep(crossprod(loadings.temp), length(variates.A.temp))
if(length(ind.NA[[q]])>0) # should always be true
{
temp = drop(loadings.temp) %o% rep(1, length(ind.NA[[q]])) #p*n -> p * where there are NA
temp[t(is.na.A[[q]][ind.NA[[q]],,drop=FALSE])] = 0
d.variates.A.norm[ind.NA[[q]]] = apply(temp,2, crossprod)
}
variates.A[, q] = variates.A.temp / d.variates.A.norm
# we can have 0/0, so we put 0
a = is.na(variates.A[, q])
if (any(a))
variates.A[a, q] = 0
}else{
variates.A[, q] = A[[q]]%*%loadings.A[[q]]
}
loadings.A[[q]] = l2.norm(as.vector(loadings.A[[q]]))
loadings.partial.A.comp[[q]] = list()
}
loadings.A_old = loadings.A
if(time) time3 = proc.time()
if(time) print("loadings")
if(time) print(time3-time2)
if(time) print("repeat")
### Start Algorithm 1 Sparse generalized canonical analysis (See Variable selection for generalized canonical correlation analysis (Tenenhaus))
repeat {
#variates.Aold can be used for the convergence of the algorithm, not used at the moment, so please disregard variates.Aold
variates.Aold = variates.A
for (q in 1:J)
{
if(time) print(paste("repeat, block",q))
### Start : !!! Impact of the diag of the design matrix !!! ###
if (scheme == "horst")
CbyCovq = design[q, ]
if (scheme == "factorial")
CbyCovq = design[q, ] * cov2(variates.A, variates.A[, q])
if (scheme == "centroid")
CbyCovq = design[q, ] * sign(cov2(variates.A, variates.A[, q]))
### End : !!! Impact of the diag of the design matrix !!! ###
### Step A start: Compute the inner components
Z[, q] = rowSums(mapply("*", CbyCovq, as.data.frame(variates.A)))
Z_split = study_split(Z[,q,drop=FALSE],study) # split Z by the study factor
### Step A end: Compute the inner components
if(time) time5 = proc.time()
### Step B start: Computer the outer weight ###
# possibility of removing NA (replacing by 0) and use crossprod, further development
#time5 = proc.time()
temp=0
for (m in 1:nlevels_study)
{
loadings.partial.A.comp[[q]][[m]] = crossprod(A_split[[q]][[m]],Z_split[[m]])
temp=temp+loadings.partial.A.comp[[q]][[m]]
}
loadings.A[[q]] = temp
if(time) time6 = proc.time()
if(time) print("loadings")
if(time) print(time6-time5)
if(time) time6 = proc.time()
# sparse using keepA / penalty
if (!is.null(penalty))
{
loadings.A[[q]] = sparsity(loadings.A[[q]], keepA = NULL, penalty = penalty[q])
}else{
loadings.A[[q]] = sparsity(loadings.A[[q]], keepA[[q]], penalty = NULL)
}
loadings.A[[q]]=l2.norm(as.vector(loadings.A[[q]]))
if(time) time7 = proc.time()
### Step B end: Computer the outer weight ###
if(misdata[q])
{
variates.A.temp = A[[q]] %*% loadings.A[[q]]
d.variates.A.norm = rep(crossprod(loadings.A[[q]]), length(variates.A.temp))
if(length(ind.NA[[q]])>0)
{
temp = drop(loadings.A[[q]]) %o% rep(1, length(ind.NA[[q]]))
temp[t(is.na.A[[q]][ind.NA[[q]],,drop=FALSE])] = 0
d.variates.A.norm[ind.NA[[q]]] = apply(temp,2, crossprod)
}
variates.A[, q] = variates.A.temp / d.variates.A.norm
# we can have 0/0, so we put 0
a = is.na(variates.A[, q])
if (any(a))
variates.A[a, q] = 0
}else{
variates.A[, q] = A[[q]]%*%loadings.A[[q]]
}
if(time) time8 = proc.time()
if(time) print("variates")
if(time) print(time8-time7)
}
crit[iter] = sum(design * g(cov2(variates.A)))
if (iter > max.iter)
warning("The SGCCA algorithm did not converge", call. = FALSE)#cat("The SGCCA algorithm did not converge after", max.iter ,"iterations."))
### Start: Match algorithm with mixOmics algo (stopping point)
### if ((converg[iter] < tol & sum(stationnary_point) == J) | iter > max.iter)
if (max(sapply(1:J, function(x){crossprod(loadings.A[[x]] - loadings.A_old[[x]])})) < tol | iter > max.iter)
break
### End: Match algorithm with mixOmics algo (stopping point)
loadings.A_old = loadings.A
iter = iter + 1
}
### End Algorithm 1 (See Variable selection for generalized canonical correlation analysis (Tenenhaus))
#calculation variates.partial.A.comp
variates.partial.A.comp = apply(variates.A, 2, study_split, study)
if(all.outputs){
AVE_inner = sum(design * cor(variates.A)^2/2)/(sum(design)/2)
} else{
AVE_inner = NULL
}
names(loadings.A) = colnames(variates.A) = names(variates.partial.A.comp) = names(A)
result = list(variates.A = variates.A, loadings.A = loadings.A, crit = crit[which(crit != 0)],
AVE_inner = AVE_inner, loadings.partial.A.comp = loadings.partial.A.comp, variates.partial.A.comp = variates.partial.A.comp, iter = iter)
return(result)
}
# ----------------------------------------------------------------------------------------------------------
# rgccak - Runs sgccak() modified from RGCCA
# inputs: A - list of datasets each with the same number of rows (samples)
# design - design matrix
# ncomp - vector specifying number of components to keep per datasets
# outputs:
# ----------------------------------------------------------------------------------------------------------
sparse.rgcca_iteration = function (A, design, tau = "optimal", scheme = "horst", scale = FALSE, max.iter = 100,
init = "svd.single", tol = .Machine$double.eps, keepA = NULL, penalty = NULL, all.outputs = FALSE)
{
### Start: Initialisation parameters
A = lapply(A, as.matrix)
J = length(A)
n = NROW(A[[1]])
pjs = sapply(A, NCOL)
variates.A = matrix(0, n, J)
if (!is.null(penalty))
penalty = penalty * sqrt(pjs)
### End: Initialisation parameters
if (!is.numeric(tau))
tau = sapply(A, tau.estimate)
loadings.A = alpha = M = Minv = K = list()
which.primal = which((n >= pjs) == 1)
which.dual = which((n < pjs) == 1)
if (init == "svd.single")
{
for (j in which.primal)
loadings.A[[j]] = initsvd(lapply(j, function(x) {replace(A[[x]], is.na(A[[x]]), 0)})[[1]])
for (j in which.dual)
{
alpha[[j]] = initsvd(lapply(j, function(x) {replace(A[[x]], is.na(A[[x]]), 0)})[[1]])
K[[j]] = A[[j]] %*% t(A[[j]])
}
} else {
stop("init should be 'svd.single'.")
}
N = n
for (j in 1 : J)
{
if (j %in% which.primal)
{
M[[j]] = ginv(tau[j] * diag(pjs[j]) + (1 - tau[j]) * cov2(A[[j]]))
loadings.A[[j]] = drop(1/sqrt(t(loadings.A[[j]]) %*% M[[j]] %*% loadings.A[[j]])) * M[[j]] %*% loadings.A[[j]]
}
if (j %in% which.dual)
{
M[[j]] = tau[j] * diag(n) + (1 - tau[j])/(N) * K[[j]]
Minv[[j]] = ginv(M[[j]])
alpha[[j]] = drop(1/sqrt(t(alpha[[j]]) %*% M[[j]] %*% K[[j]] %*% alpha[[j]])) * alpha[[j]]
loadings.A[[j]] = t(A[[j]]) %*% alpha[[j]]
}
variates.A[, j] = A[[j]] %*% loadings.A[[j]]
}
iter = 1
converg = crit = numeric()
Z = matrix(0, NROW(A[[1]]), J)
loadings.A_old = loadings.A
g = function(x) switch(scheme, horst = x, factorial = x^2, centroid = abs(x))
repeat {
variates.Aold = variates.A
for (j in c(which.primal, which.dual))
{
if (scheme == "horst")
CbyCovq = design[j, ]
if (scheme == "factorial")
CbyCovq = design[j, ] * cov2(variates.A, variates.A[, j])
if (scheme == "centroid")
CbyCovq = design[j, ] * sign(cov2(variates.A, variates.A[, j]))
# Compute the inner components
Z[, j] = rowSums(mapply("*", CbyCovq, as.data.frame(variates.A)))
# Computer the outer weight
if (j %in% which.primal)
loadings.A[[j]] = drop(1/sqrt(t(Z[, j]) %*% A[[j]] %*% M[[j]] %*% t(A[[j]]) %*% Z[, j])) * (M[[j]] %*% t(A[[j]]) %*% Z[, j])
# Compute the outer weight
if (j %in% which.dual)
{
alpha[[j]] = drop(1/sqrt(t(Z[, j]) %*% K[[j]] %*% Minv[[j]] %*% Z[, j])) * (Minv[[j]] %*% Z[, j])
loadings.A[[j]] = t(A[[j]]) %*% alpha[[j]]
}
# sparse using keepA / penalty
if (!is.null(keepA) || !is.null(penalty))
{
temp.norm = norm2(loadings.A[[j]])
if (!is.null(keepA))
{
loadings.A[[j]] = sparsity(loadings.A = loadings.A[[j]], keepA = keepA[[j]], penalty = NULL)
} else if (!is.null(penalty)) {
loadings.A[[j]] = sparsity(loadings.A = loadings.A[[j]], keepA = NULL, penalty = penalty[j])
}
loadings.A[[j]] = (loadings.A[[j]]/norm2(loadings.A[[j]]))*temp.norm
}
# Update variate
variates.A[, j] = A[[j]] %*% loadings.A[[j]]
}
crit[iter] = sum(design * g(cov2(variates.A)))
if (iter > max.iter)
warning("The RGCCA algorithm did not converge")#"cat("The RGCCA algorithm did not converge after", max.iter ,"iterations."))
### Start: Match algorithm with mixOmics algo (stopping point)
if (max(sapply(1:J, function(x){crossprod(loadings.A[[x]] - loadings.A_old[[x]])})) < tol | iter > max.iter)
break
### End: Match algorithm with mixOmics algo (stopping point)
loadings.A_old = loadings.A
iter = iter + 1
}
#if (verbose)
#plot(crit, xlab = "iteration", ylab = "criteria")
if(all.outputs){
AVE_inner = sum(design * cor(variates.A)^2/2)/(sum(design)/2)
} else{
AVE_inner = NULL
}
result = list(variates.A = variates.A, loadings.A = loadings.A, crit = crit[which(crit != 0)],
AVE_inner = AVE_inner, design = design, tau = tau, scheme = scheme,iter=iter, keepA = keepA)
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.