#' Modified EM algorithm for hub set selection in hub models with the null component
#'
#' @param G observed group data
#' @param A initial estimate of adjacent matrix, the first row contains the probabilities in non-hub group
#' @param rho a vector of hub weight
#' @param lam tuning parameter for component selection, degenrate to a standard EM without penalty if lam=0
#' @param pen.type type of penalty, including 'log': penalization for logarithm of all components;
#' 'plog': penalization for logarithm of partial components except the null component;
#' 'plasso': penalization for lasso form of partial components
#' @param iter.max maximum iteration steps
#' @param tol threshold for shrinking rho to 0
#' @return a list of components
#' \item{A}{a matrix containg estimated correlation among nodes}
#' \item{rho}{a vector containing estimated component weight}
#' \item{l}{log-likelihood}
#' \item{iteration}{number of iterations used to converge}
#' @export
#' @import Rsolnp
#' @examples
#' set.seed(2020)
#' n0 = 5; n=100; T=1000
#' A0 = GenA(n,n0,0.4,0.1,rep(0.05,n))
#' G0 = GenG(A0,T,c(0.2,rep(0.8/n0,n0)))
#' M = 10
#' A = matrix(runif((M+1)*n),nrow=(M+1)); diag(A[-1,]) = 1
#' rho = runif(M+1); rho = rho/sum(rho)
#' phub(G0,A,rho,0.030,pen.type="log")$rho
#' phub(G0,A,rho,0.030,pen.type="plog")$rho
#' phub(G0,A,rho,0.8,pen.type="plasso")$rho
#'
phub = function(G,A,rho,lam,pen.type=c("plog","plasso","log"),iter.max=1000,tol=1e-4){
## f(G(t)|Z(t)=k,A)
pen.type = match.arg(pen.type)
posterior = function(G,A,rho){
T = dim(G)[1]; n = dim(G)[2]; q = dim(A)[1]
H = Pr_cond = matrix(NA,T,q)
for(t in 1:T) Pr_cond[t,] = apply(t(t(A)^G[t,])*t(t(1-A)^(1-G[t,])),1,prod)
## posterior probability H and log-likelihood
Pr_G = t(t(Pr_cond)*rho)
# conditional probability is extremely small for some groups
# divide the sample into 2 parts: computable and numerical underflow
id = which(rowSums(Pr_G)==0) # abnormal sample index
Pr_G1 = Pr_G[setdiff(1:T,id),] # normal group
H[setdiff(1:T,id),] = Pr_G1/rowSums(Pr_G1)
l1 = sum(log(rowSums(Pr_G1))); l2 = 0
btm = matrix(NA,length(id),q)
if(length(id)!=0){
for(ii in 1:length(id)){
pid = which(G[ii,]==1)
if(length(pid)!=0) t1 = apply(A,1,function(x)sum(x[pid]))
else t1 = rep(0,q)
t2 = rowSums(log(1-A)[,setdiff(1:n,pid)])
btm[ii,] = t1+t2+log(rho)
}
ct = apply(btm,1,max)
l2 = sum(ct+log(rowSums(exp(btm-ct))))
H[id,] = t(apply(exp(btm-ct),1,function(x) x/sum(x)))
}
return(list(H=H,l=l1+l2))
}
## update rho: some rho will shrink to 0
if(pen.type=="log"){
hatrho = function(rho,H){
Mi = sum(rho!=0)
if(Mi*lam==1) stop("\nPlease input a new lam avoiding 1-M*lam is 0\n",call.=FALSE)
pars = sapply((colMeans(H) - lam)/(1-Mi*lam),function(x) max(0,x))
pars[pars<tol] = 0
return(pars)
}
}
else{
hatrho = function(rho,H){
T = dim(G)[1]
Hx = colSums(H)
ind = which(rho!=0); len = length(ind)
eqft = function(t) sum(t)
if(pen.type=="plasso"){
# convex function
ft = function(t) -sum(Hx[ind]*log(t)) + T*lam*(1-t[1])
pars = Rsolnp::solnp(pars=rho[ind],fun=ft,eqfun=eqft,eqB=1,LB=rep(0,len),
UB=rep(1,len),control=list(trace=0))$pars
}
else{
epsi = 1e-8
ans = list()
ft = function(t) -sum(Hx[ind]*log(t)) + T*lam*sum(log(epsi+t[-1]))
fun = function(i){
if(i==1) par = rho[ind]
else {par = runif(len); par = par/sum(par)} # normalized
sol = Rsolnp::solnp(pars=par,fun=ft,eqfun=eqft,eqB=1,LB=rep(0,len),
UB=rep(1,len),control=list(trace=0))
ans$pars = sol$pars
ans$values = tail(sol$values,1)
return(ans)
}
# adding one initial values rho from the last step.
fval = lapply(as.list(1:3),fun) # repetition
best = sapply(fval,function(x) x$values)
pars = fval[[which.min(best)]]$pars
}
pars[pars<tol] = 0
pars[1] = 1 - sum(pars[-1]) # sum is 1
rho[ind] = pars
return(rho)
}
}
## update A: Am. will be 0 if rho_m=0
hatA = function(G,H){
q = dim(H)[2]; n = dim(G)[2]
HatA = matrix(NA,q,n)
for(m in 1:q){
if (all(H[,m]==0)) HatA[m,] = 0
else HatA[m,] = colSums(H[,m]*G)/sum(H[,m])
}
diag(HatA[-1,]) = 1 # Amm must be 1
return(HatA)
}
## EM Iteration
count = 0;diff = 1
L = vector();L[1] = 1
while(count < iter.max & diff > 1e-6){
count = count + 1
# E Step
post = posterior(G,A,rho)
H = post$H
L[count+1] = post$l
# cat("count and likelihood:",c(count,tail(L,1)),sep="\n")
if(is.infinite(L[count+1])) break
if(is.na(tail(L,1))) break
# M Step
rho = hatrho(rho,H)
A = hatA(G,H)
diff = abs((L[count+1]-L[count])/L[count+1])
}
return(list(A=A,rho=rho,l=L[count+1],iter=count))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.