R/EM.HODC.R

Defines functions EM.HODC

Documented in EM.HODC

#'Hierachical ordered density clustering (HODC) Algorithm with input generated by Mclust
#'@param pvalue a vector of p-values obtained from large scale statistical hypothesis testing
#'@details Without the information of networking, we can have an approximation to the marginal density by DPM model fitting on \strong{r}. Suppose the number of finite mixture normals is equal to L_0+L_1, which means the number of classes we have, we apply HODC algorithm in partitioning the $L_0$ and $L_1$ components into two classes,
#'For this function, the input is generated by Mclust
#'@return a list of HODC algorithm returned parameters. 
#'\describe{
#'\item{mean}{the mean of each of two clusters}
#'\item{variance}{the variance of each of two clusters}
#'\item{pro}{the probability of each of two clusters}
#'\item{classificaiton}{The classification corresponding to each cluster}
#'}
#'@examples 
#'\dontrun{
#'rstat=c(rnorm(50,mean=1),rnorm(50,mean=2),rnorm(100,mean=4),rnorm(100,mean=8))
#'pvalue=pnorm(-rstat)
#'mclustHODC=EM.HODC(pvalue)
#'}
#'@export
EM.HODC=function(pvalue)
{
  rstat=Transfer(pvalue)
  mclust=mclust::Mclust(rstat)
  
  while (length(mclust$parameter$variance$sigmasq)!=length(unique(mclust$parameter$variance$sigmasq))){
    for (i in 1:length(mclust$parameter$mean)){
      for (j in 1:length(mclust$parameter$mean)){
        if(i!=j){if(mclust$parameter$variance$sigmasq[i]==mclust$parameter$variance$sigmasq[j]){mclust$parameter$variance$sigmasq[j]=mclust$parameter$variance$sigmasq[j]+0.0001}}
      }
    }
  }
  
  while (length(mclust$parameter$pro)!=length(unique(mclust$parameter$pro))){
    for (i in 1:length(mclust$parameter$mean)){
      for (j in 1:length(mclust$parameter$mean)){
        if(i!=j){if(mclust$parameter$pro[i]==mclust$parameter$pro[j]){mclust$parameter$pro[j]=mclust$parameter$pro[j]+0.0001}}
        
      }
    }
  }
  
  if (length(mclust$parameter$mean)==1) {print("warning: the input is not appropriate for mclust since only one cluster was detected by the function Mclust" )}
  if (length(mclust$parameter$mean)==1) break
  ###Step1 find the min distance
  if (length(mclust$parameter$mean)==2) {
    hodcmclust=list()
    hodcmclust$mean=unique(mclust$parameter$mean)
    hodcmclust$pro=unique(mclust$parameter$pro)
    hodcmclust$variance=unique(mclust$parameter$variance$sigmasq[!is.na(mclust$parameter$variance$sigmasq)])
    hodcmclust$classification=mclust$classification
  }else{
    repeat{
      
      distance_all=0
      mclust$parameter$mean=unique(mclust$parameter$mean)
      mclust$parameter$pro=unique(mclust$parameter$pro)
      mclust$parameter$variance$sigmasq=unique(mclust$parameter$variance$sigmasq[!is.na(mclust$parameter$variance$sigmasq)])
      for (i in 1:(length(mclust$parameter$mean)-1))
      {
        distance=Inte_Distance(i,mclust)
        distance_all=c(distance_all,distance)
      }
      lmin=which(distance_all[-1]==min(distance_all[-1]))
      
      if (length(lmin)!=1){lmin=sample(lmin,1)}
      
      for (l in 1:(length(mclust$parameter$mean)-1))
      {
        if (l<lmin){mclust$parameter$mean[l]=mclust$parameter$mean[l]
        mclust$parameter$pro[l]=mclust$parameter$pro[l]
        
        }else if (l==lmin){mclust$parameter$mean[l]=mclust$parameter$mean[l]*mclust$parameter$pro[l]/(mclust$parameter$pro[l]+mclust$parameter$pro[l+1])+mclust$parameter$mean[l+1]*mclust$parameter$pro[l+1]/(mclust$parameter$pro[l]+mclust$parameter$pro[l+1])
        mclust$parameter$pro[l]=mclust$parameter$pro[l]+ mclust$parameter$pro[l+1]
        k=lmin
        repeat{
          k=k+1                           
          if (length(mclust$classification[which(mclust$classification==k)])!=0){mclust$classification[which(mclust$classification==k)]=lmin
          break}
        }
        
        if (mclust$parameter$variance$modelName!="E"){mclust$parameter$variance$sigmasq[l]=stats::var(rstat[which(mclust$classification==l)])}
        }else if (l>lmin){mclust$parameter$mean[l]=mclust$parameter$mean[l+1]
        mclust$parameter$variance$sigmasq[l]=mclust$parameter$variance$sigmasq[l+1]
        mclust$parameter$pro[l]=mclust$parameter$pro[l+1]
        }
        
      }
      
      if (lmin==(length(mclust$parameter$mean)-1)){mclust$parameter$mean=mclust$parameter$mean[-length(mclust$parameter$mean)]
      mclust$parameter$pro=mclust$parameter$pro[-length(mclust$parameter$pro)]
      if (mclust$parameter$variance$modelName!="E"){mclust$parameter$variance$sigmasq=mclust$parameter$variance$sigmasq[-length(mclust$parameter$variance$sigmasq)] }}
      if (length(unique(mclust$parameter$mean))==2) break
    }
    hodcmclust=list()
    index=sort(unique(mclust$classification))
    hodcmclust$mean[1]=mean(rstat[which(mclust$classification==index[1])])
    hodcmclust$mean[2]=mean(rstat[which(mclust$classification==index[2])])
    hodcmclust$variance[1]=stats::var(rstat[which(mclust$classification==index[1])])
    hodcmclust$variance[2]=stats::var(rstat[which(mclust$classification==index[2])])
    hodcmclust$pro[1]=length(rstat[which(mclust$classification==index[1])])/length(rstat)
    hodcmclust$pro[2]=length(rstat[which(mclust$classification==index[2])])/length(rstat)
    hodcmclust$classification=mclust$classification
  }
  return(hodcmclust) 
}

Try the BANFF package in your browser

Any scripts or data that you put into this service are public.

BANFF documentation built on May 29, 2017, 11:59 a.m.