# R/rgeode_root_m.R In RGeode: Geometric Density Estimation

#### Defines functions rgeode_root_m

```# The core of the Gibbs sampling with missing data.
# Perform GEODE with missing data.

rgeode_root_m<- function(Y, d, burn, its, tol, atau, asigma,
bsigma, starttime, stoptime, fast, c0, c1)
{
#*************************************************************************
#***        Author:      L. Rimella <lorenzo.rimella@hotmail.it>       ***
#***                                                                   ***
#***        Supervisors: M. Beccuti                                    ***
#***                     A. Canale                                     ***
#***                                                                   ***
#*************************************************************************

# Fit GEODE on dataset with no missing data

##########################################################################
# First step:
# Computation of the mean and the principal axes and creation
# of the sufficient statistics A,Z
##########################################################################

#Mean and Principal axes
N= dim(Y)[1]
D= dim(Y)[2]

#Extract missing values
id_m= seq(1,N)[which(apply(Y,1,function(x)return(NaN %in% x)))]
pos_m= apply(Y[id_m,],1,function(x)return(which(is.nan(x))))

if(!is.list(pos_m))
{
if(is.matrix(pos_m))
{
pos_m2= pos_m
pos_m=  list()
for(i in 1:length(id_m))
{
pos_m[[i]]= pos_m2[,i]
}
}

else if(is.vector(pos_m))
{
pos_m2= pos_m
pos_m=  list()
for(i in 1:length(id_m))
{
pos_m[[i]]= pos_m2[i]
}
}

else
{
stop("Some problems with pos_m.")
}
}

Y_complete= Y[-id_m,]

mu =  matrix(apply(Y_complete,2,mean), D, 1)
Y_c=  t(apply(Y_complete,1,function(x)return(x-mu)))

#Perform a fast rank-d SVD or a simple SVD
if(fast)
{
#print('fast')
W= randSVD(t(Y_c), k= d )\$u
}

if(!fast)
{ print('not fast')
W= svd(t(Y_c), nu= d, nv= d)\$u
}

#Sufficient Statistics
A = matrix(apply(Y, 1, function(x)
return(sum((x[which(!is.nan(x))]-
mu[which(!is.nan(x))])^2))), N, 1)

Z = t(apply(Y,1,function(x)
return(t(W[which(!is.nan(x)),])%*%
(x[which(!is.nan(x))]- mu[which(!is.nan(x))]))))

AA= lapply(pos_m, function(x) return(t(W[-x,])%*% W[-x,]) )

##########################################################################
# Second step:
# Preparation for the Gibbs sampler:
# nc=its nb=burn tol=tol a=atau a_sigma = asigma; b_sigma= bsigma
# step=step stoptime = ; starttime = ; T= Time
##########################################################################

Time = burn + its

InD=   rep(list(c(1:d)), stoptime)

yms= rep(list(), Time)

u=      matrix(0.5, d, Time)

tau=       matrix(1, d, Time)
tau[,1]= rexptr(d, atau, range= c(1,Inf))

sigmaS= matrix(1, Time, 1)
sigmaS[1]= 1/rgamma(1,shape= asigma, rate= bsigma)

##########################################################################
# Third step:
# Gibbs sampler:
##########################################################################

InDtmp = seq(1,d)

summ=0

for(iter in 2:Time)
{
#Impute Missing Data
#tic(func.tic = NULL)

y_m = GEODE_impute(u[,iter-1],sigmaS[iter-1],
W,mu,AA,Z,id_m,pos_m,InDtmp)

#    Sig       = (1/u[,iter-1]-1)*sigmaS[iter-1]
#    if(length(InDtmp)< length(Sig) )
#    {
#      Sig[-InDtmp] = 0
#    }

#    helpU=u[,iter-1]
#    helpSIGMAS=sigmaS[iter-1]

#    y_m = CGEODE_impute(helpU, helpSIGMAS,
#                     W, mu, AA, Z, id_m, pos_m, Sig,
#                     d)

#c=toc(func.toc = NULL)
#cc= c\$toc- c\$tic
#summ=summ+cc

yms[[iter]] = y_m

#Update Sufficient Statistics
A_temp = A;
Z_temp = Z;
for(i in 1:length(id_m))
{
A_temp[id_m[i]] = A_temp[id_m[i]] +
sum((t(y_m[[i]])-mu[pos_m[[i]] ])^2)
Z_temp[id_m[i],] = Z_temp[id_m[i],] + y_m[[i]]%*%W[pos_m[[i]],]
}

#Update u
#u[,iter] = generateU_root(Z_temp, sigmaS[iter-1], N,
#                         tau[,iter-1], InDtmp, u[,iter-1])
u[,iter]= u[,iter-1]
u[InDtmp,iter] = CgenerateU_root(tau[InDtmp,iter-1], N, sigmaS[iter-1],
Z_temp[,InDtmp])

#tau[,iter] = generateTau_root(u[,iter], atau, InDtmp, tau[,iter-1])
tau[,iter]= tau[,iter-1]
tau[InDtmp,iter]= CgenerateTau_root(u[InDtmp,iter],tau[InDtmp,iter],
atau, max(InDtmp))

sigmaS[iter] = generateSigmaS_root(A_temp, Z_temp, u[,iter], N,
asigma, bsigma, D, InDtmp)

# Adaptively prune the intrinsic dimension
# Wang version:
#if (iter <= stoptime & iter > starttime)
#{
#  u_accum = u_accum + ( u[,iter]==1 )
#  {
#    ind = InD[[iter-1]]
#    tmp = u_accum[ind]
#    vec = (1/u[InD[[iter-1]],iter]-1)*sigmaS[iter]
#    d1 = ind

#    if ( sum(tmp > (stoptime-starttime)*tol) + sum(vec/max(vec) < tol) )>0
#    {
#      ind = ind[ ind %in% ind[ d1 >= min(d1( (tmp > (stoptime-starttime)*tol) |
#                                                (vec/max(vec) < tol) )) ] = [];
#    }
#    InD[[iter]] = ind

#    else InD[[iter]]= InD[[iter-1]]
#

#  }
#  InDtmp = InD[[iter]]

#}

if(iter <= stoptime & iter > starttime)
{
ind= InD[[iter-1]]
#find our alphas
vec_alpha= (1/u[InD[[iter-1]],iter]-1)*sigmaS[iter]
max_alpha= max(vec_alpha)
if(iter== stoptime)
{
if( sum(ind[vec_alpha/max_alpha < tol])>0 )
{
ind= ind[ ind < min( ind[vec_alpha/max_alpha < tol] ) ]
}

}

else if( runif(1, 0, 1)< p(iter, c0, c1) )
{
if( sum(ind[vec_alpha/max_alpha < tol])>0 )
{
ind= ind[ ind < min( ind[vec_alpha/max_alpha < tol] ) ]
}

else
{
if(ind[length(ind)]+1<d)
{
ind= c(ind, ind[length(ind)]+1)
}
}
}

InD[[iter]]= ind

InDtmp = InD[[iter]]

}

}

return( list(InD, u, tau, sigmaS, W, mu, id_m, pos_m, yms) )
}
```

## Try the RGeode package in your browser

Any scripts or data that you put into this service are public.

RGeode documentation built on May 2, 2019, 1:09 p.m.