R/MCCCA.func.R
In mccca: Visualizing Class Specific Heterogeneous Tendencies in Categorical Data

Documented in MCCCA

#' apply MCCCA for dataset.
#'
#' @description Applies MCCCA to \code{mcccadata.list}.
#' @param mccca.data A list created in \code{\link{create.MCCCAdata}}.
#' @param K.vec An integer vector of length C (the number of classes). Each element corresponds to the number of clusters in each class specified for estimation.
#' @param known.vec A vector of length C giving logical values indicating whether a cluster allocation in each class is known or not. The default is all \code{FALSE}.
#' @param knowncluster.list A vector of length C giving logical values indicating whether a cluster allocation in each class is known or not. The default is all \code{FALSE}.
#' @param p An integer indicating the dimension of quantification.The default is 2.
#' @param nstart An integer indicating the number of random initial values.
#' @param kmeans.initial A logical value indicating whether the 1st initial value for indicator matrix is generated by kmeans or not. The default is \code{TRUE}.
#' @param remove.miss A logical value indicating whether categories nobody choose are removed nor not. The default is \code{TRUE}.
#' @param maxit An integer indicating the maximum number of iterations.
#' @param tol A numeric value indicating the absolute convergence tolerance.
#' @param verbose A logical value indicating. If \code{TRUE}, tracing information on the progress of the optimization is produced.
#' @return Returns a list with the following elements.
#' \item{\code{G}}{A (Kxp) quantification matrix for all clusters (K=\code{sum(K.vec)}).}
#' \item{\code{Gg}}{Scaled \code{G}. See details.}
#' \item{\code{B}}{A (Qxp) quantification matrix for all categories (Q=\code{sum(q.vec)}, and \code{q.vec} is given in \code{create.MCCCAdata}).}
#' \item{\code{Bg}}{Scaled \code{B}.}
#' \item{\code{Q}}{A (Nxp) quantification matrix for all observations.}
#' \item{\code{Qg}}{Scaled \code{Q}.}
#' \item{\code{clses.list}}{A list of C vectors, giving the estimated cluster index for each observation in each class.}
#' \item{\code{clses.vec}}{A vector of length N, where each element represents the cluster index to which the observations in the rows of \code{data.mat} (given in \code{mccca.data}) belong.}
#' \item{\code{optval}}{A numeric value giving the optimized value of the objective function that is the smallest among all initial values.}
#' \item{\code{optval.vec}}{A numeric vector of length \code{nstart} giving the optimized values of the objective function for each initial value.}
#' \item{\code{stepconv}}{An integer giving the number of iterations until convergence at the initial value where the objective function was the smallest.}
#' \item{\code{stepconv.vec}}{An integer vector of length \code{nstart} giving the number of iterations until convergence for each initial value.}
#' \item{\code{catename.vec}}{A characteristic vector of length \code{Q} that combines the category names of each categorical variable into a single vector.}
#' \item{\code{catename.vari.vec}}{A characteristic vector of length \code{Q} with \code{catename.vec} plus the name of categorical variable (by default, this is used as the column name of \code{B} and \code{Bg}).}
#' \item{\code{cate.removed}}{If there is a category that no one chooses and \code{remove.miss}=TRUE, \code{cate.removed} gives which category was removed (given by the index of column in dummy matrix). Otherwise, return \code{NULL}.}
#' \item{\code{cluster.vec}}{An integer vector of length K, where each index in the \code{clses.list} and \code{clses.vec} indicates which class it corresponds to.}
#' \item{\code{q.vec}}{A vector of length J, same as the one given in \code{mccca.data}.}
#' \item{\code{K.vec}}{A vector of length C, which is used as an input in this \code{MCCCA} function.}
#' \item{\code{classlabel}}{A characteristic vector of length C, same as the one given in \code{mccca.data}.}
#' @seealso \code{\link{create.MCCCAdata}}
#' @details
#' \code{Bg},\code{Gg} and \code{Qg} are scaled \code{B},\code{G} and \code{Q} respectively, such that the average squared deviation from the origin of the row and column points is the same (See section 2.3 in the paper).
#'
#' If you want to specify the cluster allocation for some or all classes, prepare the following two.
#'
#' -\code{knowncluster.list}: A list of C vectors. The length of each vector in the list should be the same as the number of rows in each matrix in the \code{data.list}
#' (ex. \code{length(knowncluster.list[[c]])=nrow(data.list[[c]])}, (c=1,..,C)).
#' For example, suppose that \code{data.list} is a list of 4 matrices (meaning C=4),
#' and the cluster assignment is known only for the second class,
#' and the assignments in other classes are estimated. In this case,
#' the second vector of \code{knowncluster.list} should be specified as the vector of cluster indexes
#' to which the observations in each row of \code{data.list[[2]]} belong, with length \code{nrow(data.list[[2]])},
#' and the other vectors (1, 3, and 4) in the list can be specified as \code{NA}. For each vector in the \code{knowncluster.list},
#' the specified cluster index should start from 1, and there should not be any skipping numbers.
#'
#' -\code{known.vec}: A vector of logical values of length C. For example,
#' if C=4 and you want to know the cluster assignment of only the second class, it should be \code{known.vec=c(FALSE,TRUE,FALSE,FALSE)}.
#' @importFrom magic adiag
#' @importFrom stats kmeans
#' @export
#' @references Takagishi & Michel van de Velden (2022): Visualizing Class Specific
#' Heterogeneous Tendencies in Categorical Data, Journal of Computational and Graphical Statistics,
#' DOI: 10.1080/10618600.2022.2035737
#' @examples
#' #setting
#' N <- 100 ; J <- 5 ; Ktrue <- 2 ; q.vec <- rep(5,J) ; noise.prop <- 0.2
#' extcate.vec=c(2,3)#the number of categories for each external variable
#'
#' #generate categorical variable data
#' catedata.list <- generate.onedata(N=N,J=J,Ktrue=Ktrue,q.vec=q.vec,noise.prop = noise.prop)
#' data.cate=catedata.list$data.mat
#' clstr0.vec=catedata.list$clstr0.vec
#'
#' #generate external variable data
#' data.ext=generate.ext(N,extcate.vec=extcate.vec)
#'
#' #create mccca.list to be applied to MCCCA function
#' mccca.data=create.MCCCAdata(data.cate,ext.mat=data.ext,clstr0.vec =clstr0.vec)
#'
#' #specify the number of cluster for each of C classes
#' C=length(mccca.data$data.list)
#' K.vec=rep(2,C)
#'
#' #apply MCCCA
#' mccca.res=MCCCA(mccca.data,K.vec=K.vec)
#'
#' #plot MCCCA result
#' plot(mccca.res)
#'
#' #if you want to specify cluster allocation in the 2nd class:
#' knowncluster.list=rep(list(NA),C)
#' #specify cluster index for the 2nd class
#' N2=nrow(mccca.data$data.list[[2]])
#' knowncluster.list[[2]]=rep(c(1,2),times=c(2,N2-2))
#' known.vec=c(FALSE,TRUE,FALSE,FALSE,FALSE,FALSE)
#' mccca.res=MCCCA(mccca.data,K.vec=K.vec,known.vec=known.vec,knowncluster.list = knowncluster.list)


## @references Takagishi, M. & Velden, M. van de (2022). Visualizing class specific heterogeneous tendencies in categorical data, to appear in Journal of Computational and Graphical Statistics.

MCCCA <- function(mccca.data,K.vec=K.vec,known.vec=NULL,knowncluster.list=NULL,nstart=3,maxit=50,p=2,tol=1e-8,
                       verbose=TRUE,remove.miss=TRUE,kmeans.initial=TRUE){
  if(is.null(known.vec)) known.vec=rep(FALSE,length(K.vec))
  nstart.k=100 ; grpbase <- FALSE

  paraname=c("B","cluster allocation & cluster center")
  updateorder=c(1,2)

  #if(trace>1) message("iteration: nstart=",nstart,", maxit=",maxit,".")

  if(!is.null(mccca.data)){

    data.list<-mccca.data$data.list
    data.mat=mccca.data$data.mat
    mccca.data$data.list<-mccca.data$data.mat<-NULL#reduce memory
    N.vec<-mccca.data$N.vec ; Ktrue.vec<-mccca.data$Ktrue.vec
    class.n.vec<-mccca.data$class.n.vec
    q.vec=mccca.data$q.vec
    classlabel<-mccca.data$classlabel
  }else{
    stop("mccca.data is NULL")
  }

  #######check###########
  checkpara<-c(nstart,maxit,p)
  if(any(is.na(checkpara))){
    stop(checkpara[is.na(checkpara)],"is NA\n")
  }

  if((any(known.vec))){
    if(any(N.vec[known.vec]!=1) & (is.null(knowncluster.list))){
    stop("specify cluster allocation for known class.")
    }
    #check if knowncluster.list is appropriate
    for(kk in which(known.vec)){
      message("Cluster allocation is specified in the ",kk,"th class.")
      if(N.vec[kk]!=1){
        if(all(knowncluster.list[[kk]]!=1)){
          stop(paste0("Specified cluster indecies in ",kk,"th vector in knowncluster.list should start from 1 in each class."))
        }else if(any(diff(sort(unique(knowncluster.list[[kk]])))!=1)){
          stop(paste0("There's a cluster with a skipped index in ",kk,"th vector in knowncluster.list."))
        }else if(length(knowncluster.list[[kk]])!=nrow(data.list[[kk]])){
          stop(paste0("The length of ",kk,"th vector in knowncluster.list should be equal to the number of row of ",kk,"th matrix in data.list."))
        }
      }
    }##end check for all cluster-known class
    }##end check for any(known.vec)

  if(any(N.vec==1)){
    message("Some classes have only one subject, so the clusters of that class are set to be known")
    K.vec[N.vec==1]=1 ; known.vec[N.vec==1]=TRUE
    #print(K.vec)
  }
  #if any specified Kc (the number of the cth class-specific cluster) is equal to the number of observation in the cth class,
  if(any(sapply(c(1:C),function(x){K.vec[x]>=N.vec[x]}))){
    ##check if its unknown cluster class
    whcls=which(sapply(c(1:C),function(x){K.vec[x]>=N.vec[x]}))
    if(any(!known.vec[whcls])){
      stop("inappropriate K.vec (some specified number of class-specific cluster is equal or larger than the number of observaton in that class)")
    }
  }
  if(all(known.vec)){
    message("all known cluster")
    paraname<-c("B & cluster center")
    updateorder<-1 ; maxit <- 1 ; nstart <- 1
  }

  ###########define several commands###########
  ###parameter
  N <- sum(N.vec) ; K <- sum(K.vec) ; C <- length(N.vec)
  if(!is.null(data.mat)){
    J <- ncol(data.mat)
  }else{J <- length(q.vec)}
  known.vec.up <- known.vec
  cluster.vec<-rep(seq(1,C),times=K.vec)
  names(cluster.vec)=paste("cluster",c(1:K))
  if(is.null(classlabel))classlabel <- paste0(seq(1,C),"class")
  #Jn <- diag(N)-(1/N)*(rep(1,N)%*%t(rep(1,N)))
  Jn=create_Jn(N)

  if(!is.data.frame(data.mat))data.mat=as.data.frame(data.mat)
  dummy.res=dummy.data.frame.ed(data.mat,sep=":", dummy.classes = "ALL")
  dummy.mat=as.matrix(dummy.res$df)
  catename.vec=dummy.res$category.vec
  rm(dummy.res)

  #browser()
  ##make dummy.diag(block diag)
  dummy.list=rep(list(NA),J)
  for(j in 1:J){
    #as.matrix is needed for adiag function
    dummy.list[[j]]=as.matrix(dummy.data.frame.ed(data.mat[,j,drop=FALSE],sep=":", dummy.classes = "ALL")$df)
  }
  dummy.diag=do.call(args=dummy.list,what=adiag)
  rm(dummy.list)#to reduce memory

  catename.vari.vec=colnames(dummy.mat)

  ###########check missing categories###########
  #if theres any col vector that is all 0 (meaning nobody choses the category)
  dummycheck <- apply(dummy.diag,2,function(x)all(x==0))

  if(any(dummycheck)){
    whichzero<-which(dummycheck)
    warning(paste(whichzero,collapse = ","),"th colvec is all 0 (meaning nobody choses the category)\n")
    #browser()
    if(remove.miss){
      dummy.mat <- dummy.mat[,-whichzero]
      dummy.diag <- dummy.diag[,-whichzero]

      ncatesum <- sapply(c(1:J),function(x){sum(q.vec[c(1:x)])}) #+ 1
      ncatesum <- c(0,ncatesum)
      zerovari <- rep(NA,length(whichzero))
      jz <- 1
      for(jz in 1:length(whichzero)){
        #choose the biggest elements in ncatesum among those which are smaller than whichzero[jz]
        whichbig <- which(whichzero[jz] > ncatesum)
        #if whichbig is NULL, this means whichzero[jz] is in the 1st vari.
        #zerovari[jz] <- ifelse(length(whichbig)!=0,whichbig,1)
        zerovari[jz] <- max(which(whichzero[jz] > ncatesum))
        #whichvari <- ; cate.rem <- whichzero[jz]-ncatesum[whichvari] #which category in that variable
      }
      #cat(paste(whichzero,collapse = ","),"th colvec is all 0 (meaning nobody choses the category\n")
      #cat("cate in the ",zerovari,"vari is removed\n")
      #browser()
      q.vec <- q.vec - table(factor(zerovari,levels=c(1:J)))
      catename.vec <- catename.vec[-whichzero]
      catename.vari.vec <- catename.vari.vec[-whichzero]
      #cat(q.vec,"\n")
    }
    cate.removed <-whichzero #TRUE
  }else{cate.removed <- NULL}


  #####calculate Ddiag.sq (used for update B)#####
  #eig.d <- eigen(t(dummy.diag)%*%dummy.diag)
  #browser()
  # if(grpbase){ #if its groupals base
  #   Jn.list <- rep(list(Jn),J)
  #   Jn.diag=do.call(args=Jn.list,what=adiag)
  #   #Jn.diag <- list2mat.func(data=Jn.list,outputform="diag")$data.diag
  #   DD <- t(dummy.diag)%*%Jn.diag%*%dummy.diag
  # }else{
  Ddiag.sq=calc_Ddiag_sq(dummy_diag = dummy.diag)

  ###before Rcpp
  # DD <- t(dummy.diag)%*%dummy.diag
  # # }
  # eig.d <- eigen(DD)
  # eig.d.val <- (sqrt(abs(eig.d$values)))^(-1)
  # #take abs to ignore numerical miscalculation (since symmetric should have positive eigen value.)
  # #eig.d.val <- (sqrt(eig.d$values))^(-1)
  # if(trace>4) cat("    eig val of Ddiag.sq",eig.d.val[c(1:5)],"\n")#[c(1:5)]
  # Ddiag.sq <- (eig.d$vectors)%*%diag(eig.d.val)%*%t(eig.d$vectors)
  #all(Ddiag.sq==t(Ddiag.sq))


  #######prepare for iteration of init########
  allstep<-(maxit+1)*length(paraname)
  #OB.mat<-matrix(0,nstart,allstep)
  #down.para.mat<-matrix(0,nstart,allstep)
  res.list<-rep(list(NA),nstart)
  optval.vec <- stepconv.vec <- rep(0,nstart)
  tot.list <- sapply(c(1:nstart),list)

  #######start iteration for initial values########
  #for(ti in 1:nstart){
  oneinit.func <- function(ti){

    #if(trace>1) message("  ",ti,"th initial value.")

    kmeans.initial.ti<-ifelse(ti==1,kmeans.initial,FALSE)
    known.vec.up<-known.vec

    ######paralist
    Ugrp.para <- G.para <- B.para <- Distmat <- rep(list(NA),maxit)

    #browser()
    ###initial
    #Ugrp1.list<-rep(list(NA),C)
    U.new=matrix(0,N,K) #U1→U.new(22/9/12)
    cc<-1
    for(cc in 1:C){
      Kc<-K.vec[cc] ; Nc<-N.vec[cc]
      #Ugrp1.list[[cc]]<-matrix(0,Nc,Kc)
      #browser()
      if(known.vec[cc]){
        if(Kc==1){
          Uc<-matrix(1,Nc,Kc)
        }else{
          Kc2=length(unique(knowncluster.list[[cc]]))
          #Uc=1*outer(clstr.vec[class.n.vec==cc], 1:Kc, "==")
          Uc=1*outer(knowncluster.list[[cc]], 1:Kc2, "==")
        }
        U.new[class.n.vec==cc,cluster.vec==cc]=Uc
      }else{ #if its not known

        if(kmeans.initial.ti){
          #browser()
          kres=kmeans(dummy.mat[class.n.vec==cc,],Kc,nstart = nstart.k)
          cls.init <- kres$cluster

        }else if(!kmeans.initial.ti){##end kmeans.initial
          #remove while (5/23)
          #browser()
          kinit.moto <- rep(c(1:Kc),Nc)[c(1:Nc)]
          cls.init <- kinit.moto[sample(Nc,Nc)]
          #cls.init<-sample(Kc,Nc,replace=TRUE)
        }
        Uc=1.0*outer(cls.init, 1:Kc, "==")
        U.new[class.n.vec==cc,cluster.vec==cc]=Uc
      }###end k is unknown
    }#end all data
    ##make b-diag
    #Ugrp.para[[1]] <- U1 #com out(22/9/12)

    #browser()
    break.ite<-FALSE ; OB.vec<-rep(0,allstep)

    #############start iteration##############
    ite.ob<-1
    ite.cate<-1 ; ite.u<-1 ; ite.g<-1
    ite<-1
    for(ite in 1:maxit){
      U.old=U.new
      if(ite>1){B.old=B.new;G.old=G.new}

      #if(trace>2) message("  ite=",ite,".")
      #browser()
      paran<-updateorder[1]
      for(paran in updateorder){

        #if(trace>2) cat("    ite.cate=",ite.cate,",ite.u=",ite.u,",ite.g=",ite.g,"\n")

        if(paran==1){# B update
          ####update quantification of categories###
          #if(trace>3) message(paste("update",paraname[paran]),".")
          ite.cate<-ite+1
          #U=Ugrp.para[[ite.u]]

          # updateQcate.res<-updateQcate.MCCCA(dummy.mat=dummy.mat,Ddiag.sq=Ddiag.sq
          #                                           ,Ugrp=U,m=J,lowdim=p,
          #                                           #grpbase=grpbase,
          #                                           printcheck=trace,ncatevec=q.vec)
          # lambda.vec<-updateQcate.res$lambda.vec
          #
          # if(is.complex(lambda.vec)){
          #   cat("WARN:theres complex values.\n")
          #   B.para[[ite.cate]] <- B.para[[ite.cate-1]]
          #   break.ite <- TRUE
          #   #break
          # }else{
          #   B.para[[ite.cate]]<-updateQcate.res$Qcate.mat
          # }
          #browser()
          B.new=updateB(dummy_mat=dummy.mat,Ddiag_sq=Ddiag.sq
                  ,U=U.old,Jn=Jn,m=J,lowdim=p)
          #all(round(BB,5)==round(B.para[[ite.cate]],5))#四捨五入しないと一致しない

          #####update cluster center#####
          ite.g<-ite+1
          #JZB <- Jn%*%dummy.mat%*%B.para[[ite.cate]]
          JZB=calc_JnDumB(Jn=Jn,dummy_mat = dummy.mat,B=B.new)
          #G.para[[ite.g]] <- (1/J)*(solve(t(U.old)%*%U.old)%*%t(U.old)%*%JZB) #com out(22/9/12)
          #G.new <- (1/J)*(solve(t(U.old)%*%U.old)%*%t(U.old)%*%JZB)
          G.new=calc_updateG(U=U.old, X=JZB,J=J)
          #browser()

        }else if(paran==2){# cluster allpcation

          #if(trace>3) message(paste("update",paraname[paran]),".")

          #####update cluster center#####
          ite.u<-ite+1
          updateCluster.res <- updateUG.MCCCA(data.k=((1/J)*(JZB)),
                                              class.n.vec=class.n.vec,cluster.vec=cluster.vec,
                                              Ggrp=G.new,knownvec=known.vec.up,
                                       #Ggrp=G.para[[ite.g]],knownvec=known.vec.up,
                                       U0=U.old,K.vec=K.vec,n.vec=N.vec,
                                       #U0=Ugrp.para[[ite.u-1]],K.vec=K.vec,n.vec=N.vec,
                                       total.init.k=nstart.k,use.kmeans = kmeans.initial)

          if(updateCluster.res$empty.cls){
            message("  theres an empty clusters.")
            U.new=U.old
            #Ugrp.para[[ite.u]]<-Ugrp.para[[ite.u-1]] #com out(22/9/12)
            #Ggrp.para[[ite.g]]<-Ggrp.para[[ite.u]]
          }else{
            #G.para[[ite.g]]<-updateCluster.res$Ggrp #com out(22/9/12)
            #Ugrp.para[[ite.u]]<-updateCluster.res$Ugrp
            G.new<-updateCluster.res$Ggrp
            U.new=updateCluster.res$Ugrp
          }

          #U=Ugrp.para[[ite.u]]

        }##end cluster allocation

        #if(trace) cat(table(apply(Ugrp.diag,1,which.max)))
        #if(trace>3){
        #  message("  # of ind for each clusters.") ; cat("  ",colSums(U),"\n")
        #}
        if(any(colSums(U.new)==0)){
          #if(length(table(apply(Ugrp.diag,1,which.max)))<sum(K.vec)){

          #the data which yields 0 cluster is set to fix
          which0<-which(colSums(U.new)==0)
          known.vec.up[cluster.vec[which0]]<-TRUE

          #if(trace>0) {
            message("# of cluster has decreased.")
            if(all(known.vec.up))message("all known.vec is TRUE!")
          #}

          #K.vec
          ###only for that data, previous res is used.
          for(ll in 1:length(which0)){
            ll.d<-cluster.vec[which0[ll]]
            #Ugrp.para[[ite.u]][,cluster.vec==which0[ll]]=Ugrp.para[[ite.u-1]][,cluster.vec==which0[ll]] #com out(22/9/12)
            U.new[,cluster.vec==which0[ll]]=U.old[,cluster.vec==which0[ll]]
            #Ugrp.para[[ite.u]][[ll.d]]<-Ugrp.para[[ite.u-1]][[ll.d]]
          }
          #U=Ugrp.para[[ite.u]] #com out(22/9/12)
          #U.new=U.new
          #if(trace>3) cat("  colsum(U) is ",colSums(U),"\n") #check
          #if(trace>3) cat("  known.vec",known.vec.up,"\n")
          # break.ite<-T
          # break
        }

      }#####end paran######

      ite.ob=ite

      ####calculate obj only after updating UG #com out(22/9/12)
      #if(trace>4) cat("calculate obj function\n")
      #OB.vec[ite.ob]<-obj(dummy_mat=dummy.mat,dummy_diag=dummy.diag,
      #                    U=U,G=G.para[[ite.g]], B=B.para[[ite.cate]], Jn=Jn)

      #ite.ob<-ite.ob+1
      #browser()
      #com out(22/9/12)
      # if(verbose) cat("Start", formatC(ti, width = 5, digits = 0, mode = "integer"),
      #                 "Iter", formatC(ite, width = 5, digits = 0, mode = "integer"),
      #                 "Loss", formatC(OB.vec[ite.ob], width = 10, digits = 8, format = "f"),
      #                 "Diff", formatC(ifelse(ite==1,0,OB.vec[ite.ob]-OB.vec[ite.ob-1])
      #                 #"Diff", formatC(ifelse((ite==1 & paran==updateorder[1]),0,OB.vec[ite.ob]-OB.vec[ite.ob-1])
      #                                 #"Diff", formatC(ifelse((ite==1 & paran==updateorder[1]),0,OB.vec[1]-OB.vec[ite.ob-1])
      #                                 , width = 10, digits = 12, format = "f"),
      #                 "\n")

      if(verbose) cat("Start", formatC(ti, width = 3, digits = 0, mode = "integer"),
                      "Iteration", formatC(ite, width = 3, digits = 0, mode = "integer"),
                      "\n")

      if(ite.ob>1){

        #if(abs(OB.vec[ite.ob]-OB.vec[ite.ob-1])<tol){ #com out(22/9/12)
        if(sum(abs(B.new-B.old))<tol){
          #browser()
          #if(trace>2) cat(paste("  ",ite,"th ite converge at",paraname[paran],"update.\n"))
          #OB.vec[(ite.ob+1):length(OB.vec)]<-OB.vec[ite.ob]
          #convergence<-TRUE
          #break
          ####calculate obj only after iteration converged
          #if(trace>4) cat("calculate obj function\n")
          optval <- obj(dummy_mat=dummy.mat,dummy_diag=dummy.diag,
                              U=U.new,G=G.new, B=B.new, Jn=Jn)

          break.ite<-TRUE
          break
        }#else{
        # convergence<-FALSE
        #
      }

      #ite.ob<-ite.ob+1

      ###old####
      # para.list.now<-list(dummy.mat=dummy.mat,dummy.diag=dummy.diag,Qcate=B.para[[ite.cate]]
      #                     ,Ugrp=U,Ggrp=G.para[[ite.g]],ncatevec=q.vec,grpbase=grpbase)
      # obcheck.res<-objcheck.func(para.list=para.list.now,ite=ite,OB.vec=OB.vec,printcheck=trace
      #                            ,paraname.p=paraname[paran],ite.ob=ite.ob,obcheck.start=1,e.cri=tol)
      # OB.vec<-obcheck.res$OB.vec

      #down.para.vec[ite.ob]<-obcheck.res$down.para.save
      #ite.ob<-obcheck.res$ite.ob
      #only first cat, Dif is 0, otherwise print obval0-obval.

      # if(obcheck.res$convergence){
      #   break.ite<-TRUE
      #   break
      # }

      if(break.ite)  {
        #if(trace>2) cat("  stop it!!!\n")
        break
      }


    }###################end iteration#########################

    #if(trace>1) cat("  increased para:",paste(unique(down.para.vec),collapse="/"),"\n")
    #if(trace>0) cat("  ",ti,"th init, converged at",(ite),"th iteration.\n")

    #optval<-OB.vec[ite.ob-1] #com out(22/9/12)

    #############################name of row etc#########################
    #Bp<-B.para[[ite.cate]] #com out(22/9/12)
    Bp=B.new
    rownames(Bp)<-catename.vari.vec#stringr::str_sub(catevec,start=4)#,end=shortlab.ps)
    #catevec#rep(0,nrow(Bp))

    ##create U and G and Q
    #Ugrp.p<-Ugrp.para[[ite.u]] ; Gp<-G.para[[ite.g]] #com out(22/9/12)
    Ugrp.p=U.new ; Gp=G.new

    rownames(Gp)<-rep(classlabel,times=K.vec)

    objscale<-1/J
    #Qp<-objscale*(Jn%*%dummy.mat%*%Bp)
    Qp<-objscale*calc_JnDumB(Jn=Jn,dummy_mat = dummy.mat,B=Bp)
    rownames(Qp)<-rownames(Ugrp.p)<-rownames(dummy.mat)#paste0("obj",c(1:N))#rep("",N)

    clses.list<-rep(list(NA),C) ; clses.vec<-rep(NA,N)

    for(cc in 1:C){

      ###G
      ddk<-ifelse(cc!=1,sum(K.vec[c(1:(cc-1))]),0)
      dvec<-c((ddk+1):(ddk+K.vec[cc]))
      #rownames(Gp)[dvec]<-sapply(seq(1,K.vec[cc]),function(x)paste(cc,x,sep="-"))
      rownames(Gp)[dvec]<-sapply(seq(1,K.vec[cc]),function(x)paste(classlabel[cc],x,sep="-"))
      #seq(1,K.vec[cc])

      ###U
      #rownames(Ugrp.p[[cc]])<-sapply(seq(1,N.vec[cc]),function(x)paste(cc,x,sep="-"))
      #seq(1,N.vec[cc])
      Uc=Ugrp.p[class.n.vec==cc,cluster.vec==cc,drop=FALSE]
      clses.list[[cc]]<-apply(Uc,1,which.max)+ifelse(cc!=1,sum(K.vec[c(1:(cc-1))]),0)
      clses.vec[class.n.vec==cc]=clses.list[[cc]]
    }

    ####################finish all for that initial value####################
    return(list(G=Gp,B=Bp,Q=Qp,clses.vec=clses.vec,clses.list=clses.list,
                optval=optval,stepconv=ite))


  }#######################end initial value func#########################

  ## Apply algorithm over all starts
  res.list <- lapply(X = tot.list, FUN = oneinit.func)

  ti <- 1
  for(ti in 1:nstart){
    optval.vec[ti] <- res.list[[ti]]$optval
    #OB.mat[ti,]<-res.list[[ti]]$OB.vec
    #down.para.mat[ti,]<-res.list[[ti]]$down.para.vec
    stepconv.vec[ti] <- res.list[[ti]]$stepconv
  }



  ######decide minimum obvalue######
  res.list.min<-res.list[[which.min(optval.vec)]]

  #####gamma scale##########
  #if (scale.gamma) {
    B<-res.list.min$B
    Q<-res.list.min$Q
    G<-res.list.min$G
    distB <- sum(diag(t(B)%*%  B))
    distG <- sum(diag(t(G)%*% G))
    gam<- ((nrow(G)/nrow(B))* distB/distG)^.25

    res.list.min$Bg = (1/gam)*B
    res.list.min$Gg = gam*G
    res.list.min$Qg = gam*Q

  #}

  #res.list.min$class.n.vec <- class.n.vec
  res.list.min$cluster.vec <- cluster.vec
  #res.list.min$OB.mat <- OB.mat
  res.list.min$optval.vec <- optval.vec
  res.list.min$stepconv.vec <- stepconv.vec


  #for plot
  res.list.min$K.vec<-K.vec
  res.list.min$q.vec<-q.vec
  res.list.min$classlabel <- classlabel
  #res.list.min$classlabel.short <- classlabel.short
  res.list.min$catename.vec=catename.vec
  res.list.min$catename.vari.vec=catename.vari.vec
  res.list.min$cate.removed=cate.removed

  #rm(res.list.min$)

 # if(save.allinit) res.list.min$allinit.list <- res.list

  class(res.list.min)<-"mccca"
  res.list.min

}