R/HMMmain.R

Defines functions HMMmain

################################################################
## Copyright 2014 Tracy Holsclaw.

## This file is part of NHMM.

## NHMM is free software: you can redistribute it and/or modify it under
## the terms of the GNU General Public License as published by the Free Software
## Foundation, either version 3 of the License, or any later version.

## NHMM is distributed in the hope that it will be useful, but WITHOUT ANY
## WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
## A PARTICULAR PURPOSE.  See the GNU General Public License for more details.

## You should have received a copy of the GNU General Public License along with
## NHMM.  If not, see <http://www.gnu.org/licenses/>.
#############################################################


HMMmain=function(Rgettheta, z, theta, y, yboo, subseqy, subboo, dirprior, K, iters, emdist, burnin, nmix,  W, psipriorm, psipriorp,  priors, outdir,outboo, delta, yrep, Wp, ypred,pT, yhold, ymiss)
{

T=dim(y)[1]
J=dim(y)[2]
if(!is.null(W)){A=dim(W)[1]}else{A=0}
LL=A+K



Q=iters+burnin
pb = txtProgressBar(min = 0, max = Q, style = 3)
if(delta==FALSE){  mixes=nmix;  delt=0}
if(delta==TRUE) {  mixes=nmix+1; delt=1}
if(emdist=="gamma"){fam=1}
if(emdist=="normal"){fam=2}
if(emdist=="poisson"){fam=3}


#### BICp
BICp=(K-1)*(K)      #dirprior
if(A>0){BICp=BICp+(A+K)*J+(K-2)*J}    #psi, gamma (have W)
if(A==0){BICp=BICp+(mixes-1)*K*J}   #ppp (no W)

if(fam==1 || fam==2){ BICp=BICp+K*nmix*J+K*nmix*J }   ## gamma or normal
if(fam==3){ BICp=BICp+K*nmix*J} ## poisson





### alocate and intialize
denzity=matrix(0,K,T)
QQ=array(1/K,dim=c(K,K,T))
psi=matrix(0,K+A,J)  #W - A,T,J
mus=matrix(0,T,J)
M=matrix(0,T,J)
gams=c(-Inf,seq(0,1,length=mixes-1),Inf)
gamy=matrix(rep(gams,each=J),mixes+1,J, byrow=TRUE)
gamboo=c(0,rep(1,mixes-1),0)



#########  Set up input variables W for the emissions 
zbin=matrix(0,T,K)
for(k in 1:K) {  zbin[z==k,k]=1 }
Wbin=array(0,dim=c(K+A,T,J))
if(A>0){ Wbin[(K+1):(K+A),,]=W }
Wbin[1:K,,]=getWbin(z,K,J)

ppp= array(0,dim=c(T,J,mixes))
for(j in 1:J){  mus[,j]=matrix(psi[,j],1,LL) %*% Wbin[,,j] }  ### need for rcpp_getppp
ppp=array(rcpp_getppp(gamy, mus),dim=c(T,J,mixes))



##############  Set up the mixture component latent variable vvv
vvv=matrix(0,T,J)
if(delta==TRUE)  ### assign vvv to non-zero y's
{  v1=rep(1:(mixes-1),each=sum(y>0)/(mixes-1))
   v1=c(rep(1,sum(y>0)-length(v1)),v1)  #makes sum(y>0) and v1 the same length
   v2=v1
   v2[order(y[y>0])]=v1
   vvv[y>0]=v2
}
if(delta==FALSE)
{  v1=rep(0:(mixes-1),each=length(y)/mixes)
   v1=c(rep(0,length(y)-length(v1)),v1)  #makes sum(y>0) and v1 the same length
   v2=v1
   v2[order(y)]=v1
   vvv=matrix(v2,T,J)
}  



############# Saves   #############################
meanQQ=array(0,dim=c(K,K,T))
loglik=numeric(iters)

if(outboo==FALSE)   #theta, z get save to output directory
{  zsave=matrix(0,T,iters)
   thetasave=array(0,dim=c(2,nmix,K,J,iters)) 
   psisave=array(0,dim=c(K+A,J,iters))
}else{  
   thetasave=NULL
}


yfull=y
ccc=1
ddd=1


if(ypred>0 || !is.null(yhold))
{  zp=rep(1,pT)
   ypred1=matrix(0,pT,J)
   musp=matrix(0,pT,J)
   
   zbinp=matrix(0,pT,K)
   for(k in 1:K) {  zbinp[zp==k,k]=1 }
   Wbinp=array(0,dim=c(K+A,pT,J))
   if(A>0){ Wbinp[(K+1):(K+A),,]=Wp }
   Wbinp[1:K,,]=getWbin(zp,K,J)
   
   pppp= array(0,dim=c(pT,J,mixes))
   for(j in 1:J){  musp[,j]=matrix(psi[,j],1,LL) %*% Wbinp[,,j] }  ### need for rcpp_getppp
   pppp=array(rcpp_getppp(gamy, musp),dim=c(pT,J,mixes))
   
   ####################  transitions 
   QQp=array(1/K,dim=c(K,K,pT))
   pls1=matrix(0,pT,J)
   pls2=numeric(pT)
   pls3=numeric(pT)
}


countJ=apply(yboo,2,sum)  #how many missing for each sequence




theK=K

#### MCMC iterations
for(q in 1:Q) 
{  
    theta=Rgettheta(y,z, priors, theta, nmix, vvv, delt)
    
    vvv=rcpp_getvvv(fam, K, mixes, delt, y,c(ppp), c(theta[1,,,]),  c(theta[2,,,]),  z)   
    M=RgetM(A,K, psi, gamy, Wbin, gamboo, vvv )
    if(nmix>1){ gamy=Rgetgams(gamy, vvv,M,nmix,mixes) }
    psi=Rgetpsi(M, psi, Wbin, psipriorm, psipriorp, A, K)
               
    if(theK>1){QQ=array(rcpp_getQQ(K, z, dirprior,  subseqy) ,dim=c(K,K,T))}
                
    denzity=array(rcpp_getdenzity(A, c(Wbin), psi, gamy, fam, K, mixes, delt, y,c(ppp), c(theta[1,,,]),  c(theta[2,,,])) ,dim=c(K,T,J))
          
          z=Cgetz( z, QQ, denzity, subseqy) #slow 
    #rcpp_getz( z, c(QQ), denzity, subseqy)  #BROKEN 
    for(j in 1:J){  mus[,j]=matrix(psi[,j],1,LL) %*% Wbin[,,j]  }  ### need for rcpp_getppp
    ppp=array(rcpp_getppp(gamy, mus),dim=c(T,J,mixes))
    
    yfull=rcpp_getymiss(fam, K, z, c(ppp), c(theta[1,,,]),  c(theta[2,,,]),mixes, delt, J)
    y[yboo]=yfull[yboo]
    
    zbin=Cgetzbin(K,z) 
    Wbin[1:K,,]=array(rcpp_getWbin(z,K,J),dim=c(K,T,J))
   
   
   ##print replicates to file
   if(q>(Q-yrep))
   { write(t(yfull), ncolumns=J, paste(outdir,ccc,"-yrep.txt",sep="")) 
     ccc=ccc+1
   }   

   
   if(q>(Q-ypred)|| (!is.null(yhold) && q>burnin))
   {
     zbinp=Cgetzbin(K,zp)
     if(theK>1){QQp=array(rcpp_getQQ(K, z, dirprior,  subseqy) ,dim=c(K,K,T))}
     
     zp[1]= rcpp_rmultinom(QQp[z[T],,1])
     for(tt in 2:pT)
     {  zp[tt]=rcpp_rmultinom(QQp[zp[tt-1],,tt-1])
     }
     Wbinp[1:K,,]=array(rcpp_getWbin(zp,K,J),dim=c(K,pT,J))
     for(j in 1:J){  musp[,j]=matrix(psi[,j],1,LL) %*% Wbinp[,,j]  }  ### need for rcpp_getppp
     pppp=array(rcpp_getppp(gamy, musp),dim=c(pT,J,mixes))
     
     if(q>(Q-ypred) )  #only need to output ypred of these
     { ypred1=rcpp_getymiss(fam, K, zp, c(pppp), c(theta[1,,,]),  c(theta[2,,,]),mixes, delt, J)
       write(zp, ncolumns=pT, paste(outdir,"zpred.txt",sep=""), append=TRUE) 
       write(t(ypred1), ncolumns=J, paste(outdir,ddd,"-ypred.txt",sep="")) 
       ddd=ddd+1
     }
     
     
     if(!is.null(yhold))  #PLS
     {  denzityp = array(rcpp_getdenzity(A, c(Wbinp), psi, gamy, fam, K, mixes, delt, yhold, c(pppp), c(theta[1,,,]),  c(theta[2,,,])) ,dim=c(K,pT,J))
        for(tt in 1:pT)
        {  pls1[tt,]=denzityp[zp[tt],tt,]  #T by J
        }
        pls1[is.na(yhold)]=mean(pls1[!is.na(yhold)])  #fill in missing values with mean PLS value
        pls2=apply(pls1,1,prod)  #prod for all J
        pls3=pls3+pls2/(Q-burnin) #mean of Q  
     }
     
   }
   
   
   
  if(q > burnin)  #save iters worth: Q=iters+burnin
  {  
    if(ymiss==TRUE)
    {  for(j in 1:J)
       {  if(countJ[j]>0) {write(y[yboo[,j],j], ncolumns=countJ[j] ,paste(outdir,"ymiss-J",j,".txt", sep=""), append=TRUE)}
       }
    } 
     loglik[q-burnin]=Rgetloglik(denzity,z) 
     meanQQ=meanQQ+QQ/iters
     
     if(outboo==FALSE)  
     {   zsave[,q-burnin]=z
         thetasave[,,,,q-burnin]=theta
         psisave[,,q-burnin]=psi
     }else{
        for(k in 1:K)  #2,nmix,K,J 
        {  for(j in 1:J)
           {  write(theta[1,,k,j],ncolumns=nmix,paste(outdir,"K",k,"-J",j,"-theta1.txt", sep=""), append=TRUE)
              write(theta[2,,k,j],ncolumns=nmix,paste(outdir,"K",k,"-J",j,"-theta2.txt", sep=""), append=TRUE)
           }
        }   
        
        for(j in 1:J)
        { write(psi[,j],ncolumns=K+A, paste(outdir,"J-",j,"-psi.txt", sep=""), append=TRUE)  #K+A,J
        }
        write(z, ncolumns=T ,paste(outdir,"zsave.txt", sep=""), append=TRUE)
     } 
  }

  theK=length(unique(z))

  
  setTxtProgressBar(pb, q)
} 
close(pb)

   PLS=NULL
   if(!is.null(yhold))  #PLS    
   {  PLS=mean(na.omit(log(pls3)))
   }
  
   B=1
   if(outboo==FALSE)
   {  fin=list(fam,T,J,K,A,B,iters, burnin, yrep, outboo,outdir, loglik, BICp, meanQQ,  thetasave, psisave, zsave, PLS)
      names(fin)=c("fam","T","J","K","A","B","iters", "burnin","yrep", "outboo","outdir", "loglik", "BICp", "meanQQ", "thetasave", "psisave", "zsave", "PLS")
   }else{  fin=list(fam,T,J,K,A,B,iters, burnin,yrep,outboo,outdir,loglik, BICp, meanQQ, PLS)
      names(fin)=c("fam","T","J","K","A","B","iters", "burnin","yrep", "outboo","outdir", "loglik", "BICp", "meanQQ", "PLS")
   }
   fin
}

Try the NHMM package in your browser

Any scripts or data that you put into this service are public.

NHMM documentation built on July 1, 2020, 7:28 p.m.