Nothing
library(tensor)
#' Bayesian Factorization of a Tensor
#'
#' \code{tensorBF} implements the Bayesian factorization of a tensor.
#'
#'
#' Bayesian Tensor Factorization performs tri-linear (CP) factorization of a tensor.
#' The method automatically identifies the number of components,
#' given K is initialized to a large enough value, see arguments.
#' Missing values are supported and should be set as NA's in the data.
#' They will not affect the model parameters, and can be predicted
#' with function \code{\link{predictTensorBF}}, based on the observed values.
#'
#' @param Y is a three-mode tensor to be factorized.
#' @param method the factorization method. Currently only "CP" (default) is supported.
#' @param opts List of model options; see function \code{\link{getDefaultOpts}} for details and default.
#' @param K The number of components (i.e. latent variables or factors). Recommended to be
#' set somewhat higher than the expected component number, so that the method
#' can determine the model complexity by prunning excessive components
#' (default: 20\% of the sum of lower two dimensions).
#' High values result in high CPU time.
#'
#' NOTE: Adjust parameter noiseProp if sufficiently
#' large values of K do not lead to a model with pruned components.
#' @param fiberCentering the mode for which fibers are to be centered at zero (default = NULL).
#' Fiber is analogous to a vector in a particular mode.
#' Fiber centering and Slab scaling are the recommended normalizations for a tensor.
#' For details see the provided normalization functions and the references therein.
#' @param slabScaling the mode for which slabs are to be scaled to unit variance (default = NULL).
#' Slab is analogous to a matrix in a particular mode.
#' Alternativly, you can preprocess the data using the provided normalization functions.
#' @param noiseProp c(prop,conf); sets an informative noise prior for tensorBF.
#' The model sets the noise prior such that the expected proportion of
#' variance explained by noise is defined by this parameter. It is recommended when
#' the standard prior from \code{\link{getDefaultOpts}} seems to overfit the model
#' by not prunning any component with high initial K. Use NULL to switch off
#' informative noise prior.
#'
#' - prop defines the proportion of total variance to be explained by noise (between 0.1 and 0.9),
#'
#' - conf defines the confidence in the prior (between 0.1 and 10).
#'
#'
#' We suggest a default value of c(0.5,0.5) for real data sets.
#' @return A list containing model parameters.
#' For key parameters, the final posterior sample ordered w.r.t. component variance is provided to aid in
#' initial checks; all the posterior samples should be used for model
#' analysis. The list elements are:
#' \item{K}{The number of learned components. If this value is not less then the input argument K, the model should be rerun with a larger K or use the noiseProp parameter.}
#' \item{X}{a matrix of \eqn{N \times K} dimensions, containing the last Gibbs sample of the first-mode latent variables.}
#' \item{W}{a matrix of \eqn{D \times K} dimensions, containing the last Gibbs sample of the second-mode latent variables.}
#' \item{U}{a matrix of \eqn{L \times K} dimensions, containing the last Gibbs sample of the third-mode latent variables.}
#' \item{tau}{The last sample of noise precision.}
#' and the following elements:
#' \item{posterior}{the posterior samples of model parameters (X,U,W,Z,tau).}
#' \item{cost}{The likelihood of all the posterior samples.}
#' \item{opts}{The options used to run the model.}
#' \item{conv}{An estimate of the convergence of the model, based on reconstruction
#' of data using the Geweke diagnostic. Values significantly above 0.05 occur when
#' model has not converged and should therefore be rerun with a higher value of iter.burnin in \code{\link{getDefaultOpts}}.}
#' \item{pre}{A list of centering and scaling values used to transform the data, if any. Else an empty list.}
#' @export
#' @examples
#' #Data generation
#' K <- 2
#' X <- matrix(rnorm(20*K),20,K)
#' W <- matrix(rnorm(25*K),25,K)
#' U <- matrix(rnorm(3*K),3,K)
#' Y = 0
#' for(k in 1:K) Y <- Y + outer(outer(X[,k],W[,k]),U[,k])
#' Y <- Y + array(rnorm(20*25*3,0,0.25),dim=c(20,25,3))
#'
#' #Run the method with default options
#' \dontrun{res2 <- tensorBF(Y=Y)}
#'
#' #Run the method with K=3 and iterations=1000
#' \dontrun{opts <- getDefaultOpts(); opts$iter.burnin = 1000}
#' \dontrun{res1 <- tensorBF(Y=Y,K=3,opts=opts)}
#'
#' #Vary the user defined expected proportion of noise variance
#' #explained. c(0.2,1) represents 0.2 as the noise proportion
#' #and confidence of 1
#' \dontrun{res3 <- tensorBF(Y=Y,noiseProp=c(0.2,1))}
#'
tensorBF <- function(Y,method="CP",K=NULL,opts=NULL,fiberCentering=NULL,slabScaling=NULL,noiseProp=c(0.5,0.5)){
if(is.null(Y)) stop("Please specify the input tensor Y to factorize")
if(length(dim(Y))!=3) stop(paste0("Y should be a 3-mode tensor. Current Y is a ",length(dim(Y)),"-mode tensor."))
if(mean(apply(Y,1:length(dim(Y)),is.numeric))!=1) print("Y contains non-numeric values")
if("CP"!=method) {
print("Only CP factorization supported currently. Running with CP factorization."); method = "CP"
}
if(is.null(K))
K <- ceiling(0.2*sum(sort(dim(Y))[1:2]))
if(is.null(opts) || is(opts)[1] != "list"){
print("Initializing with default options.")
opts <- getDefaultOpts()
}
pre <- list()
if(!is.null(fiberCentering)){
tmp <- normFiberCentering(Y,fiberCentering);
Y <- tmp$data; pre$centers <- tmp$pre$centers;
pre$centeringMode <- tmp$pre$centeringMode;
}
if(!is.null(slabScaling)) {
tmp <- normSlabScaling(Y,slabScaling);
Y <- tmp$data; pre$scales <- tmp$pre$scales;
pre$scalingMode <- tmp$pre$scalingMode;
}
if(!is.null(noiseProp)){
if(length(noiseProp)==2)
opts <- noiseProportion(Y,opts,prop=noiseProp[1],conf=noiseProp[2],fiberCentering=NULL,slabScaling=NULL)
else
stop(paste0("noiseProp should contain prop and conf in the format c(prop,conf). User specified ",length(noiseProp)," values."))
}
res <- tensorBF.compute(Y=list(Y),0,K,opts);
res$pre <- pre
return(res)
}
#' Predict Missing Values using the Bayesian tensor factorization model
#'
#' \code{predictTensorBF} predicts the missing values in the data \code{Y} using the learned model \code{res}.
#'
#' If the original data \code{Y} contained missing values (NA's),
#' this function predicts them using the model. The predictions are
#' returned in the un-normalized space if \code{res$pre} contains appropriate
#' preprocessing information.
#'
#' @param Y is a 3-mode tensor containing missing values as NA's. See function \code{\link{tensorBF}} for details.
#' @param res the model object returned by the function \code{\link{tensorBF}}.
#' @return A tensor of the same size as \code{Y} containing predicted values in place of NA's.
#' @export
#' @examples
#' #Data generation
#' \dontrun{K <- 2}
#' \dontrun{X <- matrix(rnorm(20*K),20,K)}
#' \dontrun{W <- matrix(rnorm(30*K),30,K)}
#' \dontrun{U <- matrix(rnorm(3*K),3,K)}
#' \dontrun{Y = 0}
#' \dontrun{for(k in 1:K) Y <- Y + outer(outer(X[,k],W[,k]),U[,k])}
#' \dontrun{ Y <- Y + array(rnorm(20*30*3,0,0.25),dim=c(20,30,3))}
#'
#' #insert missing values
#' \dontrun{m.inds = sample(prod(dim(Y)),100)}
#' \dontrun{Yobs = Y[m.inds]}
#' \dontrun{Y[m.inds] = NA}
#'
#' #Run the method with default options and predict missing values
#' \dontrun{res <- tensorBF(Y)}
#' \dontrun{pred = predictTensorBF(Y=Y,res=res)}
#' \dontrun{plot(Yobs,pred[m.inds],xlab="obs",ylab="pred",main=round(cor(Yobs,pred[m.inds]),2))}
predictTensorBF <- function(Y,res){
recon <- reconstructTensorBF(res)
recon[!is.na(Y)] = Y[!is.na(Y)]
return(recon)
}
tensorBF.compute <- function(Y,IsMat,K,opts){
if(!is.numeric(K)) stop("K must be a numeric value.")
if(K<1) stop(paste0("K =",K,". Too small value specified. Increase K and rerun."))
M <- length(Y)
N <- dim(Y[[1]])[1]
if(sum(IsMat==0) == 0)
L <- 1
else
L <- dim(Y[[which(!IsMat)[1]]])[3]
D <- unlist(lapply(Y,function(x){dim(x)[2]}))
Ls <-unlist(lapply(Y,function(x){dim(x)[3]})); Ls[is.na(Ls)] = 1
const <- - N*sum(Ls*D)*log(2*pi)/2
id <- rep(1,K)
X <- matrix(rnorm(N*K),N,K)
covX <- diag(K)
XX <- crossprod(X)
XXtmp <- XX
XXdiag <- rep(1,K)
if(sum(IsMat==0) == 0)
U <- matrix(1,L,K)
else
U <- matrix(rnorm(L*K),L,K)
covU <- diag(K)
UU <- crossprod(U)
UUdiag <- rep(1,K)
alpha_0 <- opts$prior.alpha_0 # ARD hyperparameters
beta_0 <- opts$prior.beta_0
Z <- matrix(1,M,K) # binary mask
alpha_0t <- opts$prior.alpha_0t
beta_0t <- opts$prior.beta_0t
if(length(alpha_0t)<M) alpha_0t = rep(alpha_0t,M)
if(length(beta_0t)<M) beta_0t = rep(beta_0t,M)
prior.betaW1 <- opts$prior.betaW1
prior.betaW2 <- opts$prior.betaW2
ARDW <- opts$ARDW
ARDU <- opts$ARDU
ARDX <- opts$ARDX
TAU_SETTING <- "S"
DEBUGMODE <- FALSE
VERBOSE <- opts$verbose
alphaU <- matrix(K*ARDU+1,L,K)
alphaX <- matrix(K*ARDX+1,N,K)
alpha <- vector("list")
for(m in 1:M)
alpha[[m]] <- matrix(K*ARDW+1,D[m],K)
tau <- vector("list")
b_tau <- vector("list")
for(m in 1:M)
{
tau[[m]] <- matrix(rep(opts$init.tau,Ls[m]*D[m]),Ls[m],D[m],byrow=TRUE)
b_tau[[m]] <- matrix(rep(1,Ls[m]*D[m]),Ls[m],D[m],byrow=TRUE) # initialization
}
a_tau <- N/2#(N*L)/2 #N/2#
W <- vector("list",length=M)
for(m in 1:M)
W[[m]] <- matrix(0,D[m],K)
r <- rep(0.5,K)
iter.burnin <- opts$iter.burnin
iter.sampling <- opts$iter.sampling
iter.thinning <- opts$iter.thinning
maxiter <- iter.burnin + iter.sampling*iter.thinning
if(VERBOSE >= 0) print(paste0("Initializing the model with ",K," components."))
posterior <- list();
if(iter.sampling>0)# && length(postVar)>0)
{
posterior$tau <- list(); length(posterior$tau) <- ceiling(iter.sampling/iter.thinning)
posterior$U <- list(); length(posterior$U) <- ceiling(iter.sampling/iter.thinning)
posterior$W <- list(); length(posterior$W) <- ceiling(iter.sampling/iter.thinning)
posterior$X <- list(); length(posterior$X) <- ceiling(iter.sampling/iter.thinning)
posterior$Z <- list(); length(posterior$Z) <- ceiling(iter.sampling/iter.thinning)
#posterior$cost <- rep(NA,ceiling(iter.sampling/iter.thinning))
}
# the component numbers and loglikelihood
cost <- rep(0,maxiter)
if(K>300 & VERBOSE>0) {
print("Computational Costs may be high for K>300. Consider noiseProp parameter to set an informative noise prior and lower values of K.")
}
##Missing Values
missingValues = FALSE
na.inds = list()
for(m in 1:M)
{
inds = which(is.na(Y[[m]]))
if(length(inds)>0)
{
na.inds[[m]] = inds
missingValues = TRUE
}
}
na.inds[M+1] = 0
missingValues.InitIter = round(iter.burnin/2,0) #5000
if(missingValues && VERBOSE>0){
str="Missing Values Detected"
if(missingValues.InitIter<=iter.burnin)
str = paste0(str,", Learning using EM type approximation after ",missingValues.InitIter," iterations.")
print(str)
}
## Missing Values end
for(iter in 1:maxiter){
if(VERBOSE==2)
print(paste("iter: ",iter,sep=""))
#
# sample Z,U and W
#
if(iter > 1){
XXUU <- XX*UU#DONE
XXUUdiag <- diag(XXUU)
diag(XXUU) <- 0 #for j!=k matrix summations
XXtmp <- XX
XXdiag <- diag(XXtmp)
diag(XXtmp) <- 0 #for j!=k matrix summations
for(m in 1:M){ #DONE
for(k in 1:K){
if(IsMat[m]) lambda <- tau[[m]][1,]*XXdiag[k] + alpha[[m]][,k] #DONE
else lambda <- tau[[m]][1,]*XXUUdiag[k] + alpha[[m]][,k]
if(missingValues && (missingValues.InitIter >= iter) )
{
mu_sub <- 0
if(IsMat[m])
{
UX = X; UXK = UX[,k]; UX[,k] = 0
ss <- tcrossprod(UX,W[[m]])
tmp = Y[[m]]-ss; tmp[is.na(tmp)] = 0;
mu_sub <- mu_sub + crossprod(tmp,UXK)
}
else{
for(l in 1:L)
{
UX = X*matrix(rep(U[l,],N),N,K,byrow=T); UXK = UX[,k]; UX[,k] = 0
ss <- tcrossprod(UX,W[[m]])
tmp = Y[[m]][,,l]-ss; tmp[is.na(tmp)] = 0;
mu_sub <- mu_sub + crossprod(tmp,UXK)
}}
mu = mu_sub*tau[[m]][1,1]/lambda
}
else
{
if(IsMat[m]) mu <- (tau[[m]][1,]/lambda)*(crossprod(Y[[m]],X[,k]) - W[[m]]%*%(XXtmp[k,]))
else mu <- (tau[[m]][1,]/lambda)*(crossprod(tensor::tensor(Y[[m]],U[,k],3,1),X[,k]) - W[[m]]%*%(XXUU[k,]))
}
#logpr of activating the component
logpr <- 0.5*( sum(log(alpha[[m]][,k]) - log(lambda)) + crossprod(mu*sqrt(lambda)) ) + log(r[k]) - log(1-r[k]) # m - k
zone <- 1/(1+exp(-logpr))
if(DEBUGMODE)
debug$zone[m,k] = logpr
if(iter > 200){
a = Z[m,k]
Z[m,k] <- as.double((runif(1) < zone))
}
if(Z[m,k]==1){
W[[m]][,k] <- mu + 1/sqrt(lambda)*rnorm(D[m])
}else{
W[[m]][,k] <- 0
}
}
}
r <- rep( (prior.betaW1+sum(Z))/(M*K+prior.betaW1+prior.betaW2), K)
}else{
# alternative is to use method similar to VB
# note now this is more connected to initialization
for(m in 1:M){
if(missingValues && (missingValues.InitIter < iter))
{
##Could give bad init when high missing values. therefore skip from here.
if(length(na.inds[[m]])>0)
Y[[m]][na.inds[[m]]] = mean(Y[[m]][-na.inds[[m]]])
}
if(IsMat[m]) covW <- diag(1,K) + opts$init.tau*XX
else covW <- diag(1,K) + opts$init.tau*XX*UU
eSW <- eigen(covW)
covW <- tcrossprod(eSW$vectors*outer(id,1/eSW$values),eSW$vectors)
estimW = matrix(0,D[m],K) #equivalent of crossprod(Y[[m]],X)
for(k in 1:K){
if(missingValues && (missingValues.InitIter >= iter))
{
tmp = Y[[m]]; tmp[is.na(tmp)] = 0;
if(IsMat[m]) estimW[,k] = crossprod(tmp,X[,k])
else estimW[,k] = crossprod(tensor::tensor(tmp,U[,k],3,1),X[,k])
}
else{
if(IsMat[m]) estimW[,k] = crossprod(Y[[m]],X[,k])
else estimW[,k] = crossprod(tensor::tensor(Y[[m]],U[,k],3,1),X[,k])
}
}
W[[m]] <- estimW%*%covW*opts$init.tau + matrix(rnorm(D[m]*K),D[m],K)%*%t( eSW$vectors*outer(id,1/sqrt(eSW$values)) )
}
#Initialize alphaU here
if(TRUE == ARDU)
{
##Elementwise Alpha
alphaU <- apply((U^2)/2,c(1,2),function(x){rgamma(1,shape=alpha_0+1/2,rate=beta_0+x)})
}
#else
#{
##Viewwise Alpha
#tmp <- apply(U^2,2,function(x){rgamma(1,shape=alpha_0+1/2,rate=beta_0+sum(x)/2)})
#alphaU <- matrix(rep(tmp,L),L,K,byrow=TRUE)
#}
if(TRUE == ARDX)
{
alphaX <- apply((X^2)/2,c(1,2),function(x){rgamma(1,shape=alpha_0+1/2,rate=beta_0+x)})
}
#else
#{
#tmp <- apply(X^2,2,function(x){rgamma(1,shape=alpha_0+1/2,rate=beta_0+sum(x)/2)})
#alphaX <- matrix(rep(tmp,N),N,K,byrow=TRUE)
#}
}# end initialization else
################################################################################
#
# sample X (latent variables)
#
################################################################################
#Sample X_n Matrix Version
X <- 0
if(missingValues && (missingValues.InitIter >= iter) )
{
for(m in 1:M){
tmp = Y[[m]]; tmp[is.na(tmp)] = 0;
if(IsMat[m]) X <- X + tmp %*% (W[[m]]*tau[[m]][1,1])
else{
for(l in 1:L) X <- X + ((tmp[,,l]*matrix(rep(tau[[m]][1,],N),N,D[m],byrow=TRUE)) %*% (W[[m]]*matrix(rep(U[l,],D[m]),D[m],ncol(U),byrow=TRUE)))
}
}
}
else
{
for(m in 1:M){
if(IsMat[m]) X <- X + Y[[m]] %*% (W[[m]]*tau[[m]][1,1])
else{
for(l in 1:L) X <- X + ((Y[[m]][,,l]*matrix(rep(tau[[m]][1,],N),N,D[m],byrow=TRUE)) %*% (W[[m]]*matrix(rep(U[l,],D[m]),D[m],ncol(U),byrow=TRUE)))
}
}
}
covX.bk <- 0
for(m in 1:M){
if(IsMat[m]) covX.bk <- covX.bk + crossprod(W[[m]]*sqrt(tau[[m]][1,]))
else covX.bk <- covX.bk + crossprod(W[[m]]*sqrt(tau[[m]][1,]))*UU
}
if(TRUE == ARDX)
{
for(n in 1:N)
{
covX <- covX.bk + diag(alphaX[n,]) #ARD Prior
eS <- eigen(covX)
covX <- tcrossprod( eS$vectors*outer(id,1/sqrt(eS$values)) )
X[n,] <- X[n,]%*%covX + matrix(rnorm(K),1,K)%*%t( eS$vectors*outer(id,1/sqrt(eS$values)) )
}
} else {
#Gaussian Prior with variance I on X
covX <- covX.bk + diag(K)
eS <- eigen(covX)
covX <- tcrossprod( eS$vectors*outer(id,1/sqrt(eS$values)) )
X <- X%*%covX + matrix(rnorm(N*K),N,K)%*%t( eS$vectors*outer(id,1/sqrt(eS$values)) )
}
# do scaling
Rx <- apply(X,2,sd)
X <- X*outer(rep(1,N),1/Rx)
for(m in 1:M)
W[[m]] <- W[[m]]*outer(rep(1,D[m]),Rx)
XX <- crossprod(X)
################################################################################
#
# sample U
#
################################################################################
if(sum(IsMat==0) != 0) #Update if there is atleast one tensor, else ignore U
{
U <- 0
if(missingValues && (missingValues.InitIter >= iter) )
{
for(m in 1:M){
if(!IsMat[m]){
tmp = Y[[m]]; tmp[is.na(tmp)] = 0;
for(n in 1:N){
U <- U + ((t(tmp[n,,])*matrix(rep(tau[[m]][1,],L),L,D[m],byrow=TRUE)) %*% (W[[m]]*matrix(rep(X[n,],D[m]),D[m],ncol(X),byrow=TRUE)))
}
}}
}
else
{
for(m in 1:M){
if(!IsMat[m]) {
for(n in 1:N) U <- U + ((t(Y[[m]][n,,])*matrix(rep(tau[[m]][1,],L),L,D[m],byrow=TRUE)) %*% (W[[m]]*matrix(rep(X[n,],D[m]),D[m],ncol(X),byrow=TRUE))) }
}
}
covU.bk <- 0
for(m in 1:M){
if(!IsMat[m]) covU.bk <- covU.bk + crossprod(W[[m]]*sqrt(tau[[m]][1,]))*XX
}
if(TRUE == ARDU)
{
for(l in 1:L)
{
covU <- covU.bk + diag(alphaU[l,]) #ARD Prior
eS <- eigen(covU)
covU <- tcrossprod( eS$vectors*outer(id,1/sqrt(eS$values)) )
U[l,] <- U[l,]%*%covU + matrix(rnorm(K),1,K)%*%t( eS$vectors*outer(id,1/sqrt(eS$values)) )
}
} else {
#Gaussian Prior with variance I on U
covU <- covU.bk + diag(K)
eS <- eigen(covU)
covU <- tcrossprod( eS$vectors*outer(id,1/sqrt(eS$values)) )
U <- U%*%covU + matrix(rnorm(L*K),L,K)%*%t( eS$vectors*outer(id,1/sqrt(eS$values)) )
}
# do scaling
if(L == 1)
{
Ru <- as.vector(U)
U <- U*outer(rep(1,L),1/Ru)
U[is.na(U)] = 0
for(m in 1:M) #DONE, not in all M
if(!IsMat[m]) W[[m]] <- W[[m]]*outer(rep(1,D[m]),Ru)
}
else
{
Ru <- apply(U,2,sd)
U <- U*outer(rep(1,L),1/Ru)
U[is.na(U)] = 0
for(m in 1:M)
if(!IsMat[m]) W[[m]] <- W[[m]]*outer(rep(1,D[m]),Ru)
}
UU <- crossprod(U)
}
################################################################################
#
# Sample Hyperparameters
#
################################################################################
if(TRUE == ARDU)
{
#Elementwise alpha
alphaU <- U^2/2
keep <- which(alphaU!=0)
for(n in keep) alphaU[n] <- rgamma(1,shape=alpha_0+0.5,rate=beta_0+alphaU[n])+ 1e-7 #+ 1e-7 to prevent bad values of gamma
alphaU[-keep] <- rgamma( length(alphaU[-keep]),shape=alpha_0,rate=beta_0)+ 1e-7
}
#else
#{
#View-Wise alpha
#tmp <- apply(U^2,2,function(x){rgamma(1,shape=alpha_0+L/2,rate=beta_0+sum(x)/2)})
#alphaU <- matrix(rep(tmp,L),L,K,byrow=TRUE)+ 1e-7
#}
if(TRUE == ARDX)
{
alphaX <- X^2/2
keep <- which(alphaX!=0)
for(n in keep) alphaX[n] <- rgamma(1,shape=alpha_0+0.5,rate=beta_0+alphaX[n])+ 1e-7 #+ 1e-7 to prevent bad values of gamma
alphaX[-keep] <- rgamma( length(alphaX[-keep]),shape=alpha_0,rate=beta_0)+ 1e-7
}
for(m in 1:M)
{
if(TRUE == ARDW)
{
#Elementwise alpha
alpha[[m]] <- W[[m]]^2/2
keep <- which(alpha[[m]]!=0)
for(n in keep) alpha[[m]][n] <- rgamma(1,shape=alpha_0+0.5,rate=beta_0+alpha[[m]][n])+ 1e-7
alpha[[m]][-keep] <- rgamma( length(alpha[[m]][-keep]),shape=alpha_0,rate=beta_0)+ 1e-7
}
#else
#{
#View-Wise alpha
#tmp <- apply(W[[m]]^2,2,function(x){rgamma(1,shape=alpha_0+(as.double(sum(x)!=0)*D[m])/2,rate=beta_0+sum(x)/2)})
#alpha[[m]] <- matrix(rep(tmp,D[m]),D[m],K,byrow=TRUE)+ 1e-7
#}
}
#
# sample noise
#
XU <- KhatriRao.reshaped(X,U)
for(m in 1:M){
if(IsMat[m]){ #DONE
XW <- tcrossprod(X,W[[m]])
if(missingValues && (missingValues.InitIter > iter) )
{
XW <- Y[[m]] - XW
XW[is.na(XW)] = 0;
b_tau[[m]][1,] <- 0.5*crossprod((XW)^2,rep(1,dim(Y[[m]])[1]))
}
else{
if(missingValues)
{
if(length(na.inds[[m]])>0)
Y[[m]][na.inds[[m]]] = XW[na.inds[[m]]]
}
b_tau[[m]][1,] <- 0.5*crossprod((Y[[m]] - XW)^2,rep(1,dim(Y[[m]])[1]))
}
#Vector b_tau
if("S" == TAU_SETTING)
{
a_tau <- (N*Ls[m]*D[m])/2 #DONE
b_tau2 <- sum(b_tau[[m]][1,])
b_tau[[m]] <- matrix(b_tau2,Ls[m],D[m])
tau[[m]] <- matrix(rgamma(1,shape= alpha_0t[m]+ a_tau,rate= beta_0t[m] + b_tau2),Ls[m],D[m])
}
}
else {
#single valued tau
#b_tau[m] <- ( tau_tmp[m] - 2*sum( W[[m]]*crossprod(Y[[m]],X) ) +
# sum((W[[m]]%*%XX)*W[[m]]) )/2
#Matrix b_tau generation of D values (not LD) is correct, verified computationally
XWU <- aperm(tensor::tensor(XU,W[[m]],3,2),c(1,3,2))
if(missingValues && (missingValues.InitIter > iter) ) #in the last iter of ignoring missing values, update Y[[m]] with the model updates
{
XWU <- Y[[m]] - XWU
XWU[is.na(XWU)] = 0;
b_tau[[m]][1,] <- 0.5*tensor::tensor( tensor::tensor((XWU)^2,rep(1,dim(Y[[m]])[3]),3,1) ,rep(1,dim(Y[[m]])[1]),1,1)
}
else{
if(missingValues)
{
if(length(na.inds[[m]])>0)
Y[[m]][na.inds[[m]]] = XWU[na.inds[[m]]]
}
b_tau[[m]][1,] <- 0.5*tensor::tensor( tensor::tensor((Y[[m]] - XWU)^2,rep(1,dim(Y[[m]])[3]),3,1) ,rep(1,dim(Y[[m]])[1]),1,1)
}
#Vector b_tau
if("S" == TAU_SETTING)
{
a_tau <- (N*Ls[m]*D[m])/2
b_tau2 <- sum(b_tau[[m]][1,])
b_tau[[m]] <- matrix(b_tau2,Ls[m],D[m])
tau[[m]] <- matrix(rgamma(1,shape= alpha_0t[m]+ a_tau,rate= beta_0t[m] + b_tau2),Ls[m],D[m])
}
}
}
# calculate log likelihood
cost[iter] <- const
for(m in 1:M)
{
if("S" == TAU_SETTING)
cost[iter] <- cost[iter] + N*L*D[m]*0.5*log(tau[[m]][1,1]) - b_tau[[m]][1,1]*tau[[m]][1,1] # Will change when tau reduced to D vector
if("D" == TAU_SETTING)
cost[iter] <- cost[iter] + N*L*0.5*sum(log(tau[[m]][1,])) - sum(b_tau[[m]][1,]*tau[[m]][1,])
if("LD" == TAU_SETTING)
cost[iter] <- cost[iter] + N*0.5*sum(log(tau[[m]])) - sum(b_tau[[m]]*tau[[m]])
}
# print component number and collect numbers
if(VERBOSE==2 || ((VERBOSE==1) && ((iter %% 500) == 0)))
{
str = paste0("Iter: ",iter," Components: ",rowSums(Z)," Empty: ",sum(colSums(Z) == 0)," Cost: ",round(cost[iter],0))
print(str)
}
if(iter > iter.burnin && (((iter - iter.burnin) %% iter.thinning) == 0))
{
ind <- (iter - iter.burnin)/iter.thinning
posterior$tau[[ind]] <- list(tau[[1]][1,1]) #for Bayesian CP
posterior$U[[ind]] <- U
posterior$W[[ind]] <- W
posterior$X[[ind]] <- X
posterior$Z[[ind]] <- Z
if(ind==1 && (sum(colSums(Z) == 0) == 0) )
{
warning("Burn-in period finished, but no components have been shut down, preventing automatic model complexity selection. Consider higher inital K, if computational resources permit. Alternativly, refer to noiseProp parameter of the tensorBF function for defining an informative prior for residual noise.")
}
if(ind==1 && (sum(Z) == 0))
{
warning("Burn-in period finished, and all components have been shut down. Returning a NULL model.")
}
if(ind==1 && (sum(colSums(Z) == 0) > 0.7*K))
{
warning("Burn-in period finished, and more than 70% components have been shut down. Very high values of K could result in a poorly initialized model, therefore, it is recommended to re-run the model around K=", ceiling((sum(colSums(Z) != 0)+2)*1.25 ) )
}
}
}
if(opts$checkConvergence & length(posterior$X)>=8) {
if((sum(Z) == 0)){
conv <- NA
if(VERBOSE>0) print("No active components. Can not estimate convergence.")
} else {
#Estimate the convergence of the data reconstruction, based on the Geweke diagnostic
ps <- floor(length(posterior$X)/4)
start <- 1:ps
end <- (-ps+1):0+length(posterior$X)
Df <- N*D*L
y <- matrix(NA,0,Df)
for(ps in c(start,end)) {
tmp <- rep(0,N*D*L)
for(k in 1:K)
tmp <- tmp + c(outer(outer(posterior$X[[ps]][,k],posterior$W[[ps]][[1]][,k]),posterior$U[[ps]][,k]))
y <- rbind(y, tmp)
}
foo <- rep(NA,Df)
for(j in 1:Df)
foo[j] <- t.test(y[start,j],y[-start,j])$p.value
conv <- mean(foo<=0.05)
if(VERBOSE>0) {
print(paste0("Convergence diagnostic: ",round(conv,4),". Values significantly greater than 0.05 imply a non-converged model."))
}
}
} else {
conv <- NA
}
emptyK = which(colSums(Z) == 0)
if(length(emptyK)>0 && length(emptyK)<K){
if(nrow(Z)==1)
Z = matrix(Z[,-emptyK],1,K-length(emptyK))
else
Z = Z[,-emptyK]
U = U[,-emptyK]
X = X[,-emptyK]
for(m in 1:M)
W[[m]] = W[[m]][,-emptyK]
K = ncol(X)
}
if(K>1){
sdv = apply(X,2,sd)*apply(U,2,sd)
for(m in 1:M)
sdv = sdv*apply(W[[m]],2,sd)
sdv = order(sdv,decreasing=T)
if(nrow(Z)==1)
Z = matrix(Z[,sdv],1,K)
else
Z = Z[,sdv]
U = U[,sdv]
X = X[,sdv]
for(m in 1:M)
W[[m]] = W[[m]][,sdv]
}
if(VERBOSE>=0) {
print(paste0("Run completed with ",K," active components."))
}
#Specialize the output for Bayesian CP
tau = tau[[1]][1,1]
W = W[[1]]
if(length(posterior)>0)
for(i in 1:length(posterior$W))
{
posterior$W[[i]] = posterior$W[[i]][[1]]
}
return(list(X=X,W=W,U=U,K=K,tau=tau,posterior=posterior,cost=cost,opts=opts,conv=conv))
}
#' A function for generating a default set of parameters for Bayesian Tensor Factorization methods
#'
#' \code{getDefaultOpts} returns the default choices for model parameters.
#'
#' This function returns options for defining the model's high-level structure
#' (sparsity priors), the hyperparameters, and the uninformative
#' priors. We recommend keeping these as provided.
#'
#' @param method the factorization method for which options are required.
#' Currently only "CP" (default) is supported.
#' @return A list with the following model options:
#' \item{ARDX}{TRUE: use elementwise ARD prior for X, resulting in sparse X's.
#' FALSE: use guassian prior for a dense X (default).}
#' \item{ARDW}{TRUE: use elementwise ARD prior for W, resulting in sparse W's (default).
#' FALSE: use guassian prior for a dense W.}
#' \item{ARDU}{TRUE: use elementwise ARD prior for U, resulting in sparse U's.
#' FALSE: use guassian prior for a dense U (default).}
#' \item{iter.burnin}{The number of burn-in samples (default 5000).}
#' \item{iter.sampling}{The number of saved posterior samples (default 50).}
#' \item{iter.thinning}{The thinning factor to use in saving posterior samples (default 10).}
#' \item{prior.alpha_0t}{The shape parameter for residual noise (tau's) prior (default 1).}
#' \item{prior.beta_0t}{The rate parameter for residual noise (tau's) prior (default 1).}
#' \item{prior.alpha_0}{The shape parameter for the ARD precisions (default 1e-3).}
#' \item{prior.beta_0}{The rate parameter for the ARD precisions (default 1e-3).}
#' \item{prior.betaW1}{Bernoulli prior for component activiations, prior.betaW1 < prior.betaW2: sparsity inducing (default: 1).}
#' \item{prior.betaW2}{Bernoulli prior for component activation, (default: 1).}
#' \item{init.tau}{The initial value for noise precision (default 1e3).}
#' \item{verbose}{The verbosity level. 0=no printing, 1=moderate printing,
#' 2=maximal printing (default 1).}
#' \item{checkConvergence}{Check for the convergence of the data reconstruction,
#' based on the Geweke diagnostic (default TRUE).}
#' @export
#' @examples
#' #To run the algorithm with other values:
#' opts <- getDefaultOpts()
#' opts$ARDW <- FALSE #Switch off Feature-level Sparsity on W's
#' \dontrun{res <- tensorBF(Y=Y,opts=opts)}
getDefaultOpts <- function(method = "CP")
{
if("CP" != method)
print("Only CP factorization supported in this version.")
return(list(prior.alpha_0=1e-3,prior.beta_0=1e-3,
prior.alpha_0t=1,prior.beta_0t=1,
verbose=1, init.tau=10^3,
prior.betaW1=1,prior.betaW2=1,
iter.burnin=5000,iter.sampling=50,iter.thinning=10,
checkConvergence=TRUE,
ARDW = TRUE, ARDU = FALSE, ARDX = FALSE))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.