R/em.algorithm.R
In EBarrays: Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

Documented in emfit0 emfit1 emfit2 emfit3 emfitmv

###class for result of single time emfit
setClass("ebarraysEMfit",
         representation(family = "ebarraysFamily",
                        hypotheses = "ebarraysPatterns",
                        thetaEst = "matrix",
                        probEst = "matrix",
                        ic="numeric"))

setMethod("show", "ebarraysEMfit",
          function(object) {

              cat(paste("\n EB model fit",
                        "\n\t Family:", object@family,
                        "(", object@family@description, ")"))
              n.cluster = dim(object@probEst)[1]
              cat("\n\n Model parameter estimates:\n\n")
              thetaEst<-NULL
              for (i in 1:dim(object@thetaEst)[1])
                  thetaEst<-rbind(thetaEst,object@family@invlink(object@thetaEst[i,]))

              if (is.null(rownames(object@thetaEst))) {
                rownames(thetaEst)<-paste("Cluster",1:dim(thetaEst)[1])
              }
              
              print(data.frame(thetaEst))
              cat("\n Estimated mixing proportions:\n\n")
              probEst<-object@probEst
              colnames(probEst)<-paste("Pattern",1:dim(probEst)[2])
              if (is.null(rownames(object@probEst))) {
                rownames(probEst)<-paste("Cluster",1:dim(thetaEst)[1])
              }
              print(data.frame(probEst))
          })

setGeneric("emfit",
           function(data, family, hypotheses, ...) standardGeneric("emfit"))


setMethod("emfit",
          signature(data = "matrix",
                    family = "character",
                    hypotheses = "ebarraysPatterns"),

          function(data,
                   family,
                   hypotheses, ...) {

              family <- as(family, "ebarraysFamily")
              callGeneric(data = data,
                          family = family,
                          hypotheses = hypotheses, ...)

          })



setMethod("emfit",
          signature(data = "matrix",
                    family = "ebarraysFamily",
                    hypotheses = "ebarraysPatterns"),

          function(data,
                   family,
                   hypotheses,
                   cluster=1,
                   type=2,
                   criterion="BIC",
                   cluster.init = NULL,
                   independent = F,
                   num.iter = 20,
                   verbose = getOption("verbose"),
                   optim.control = list(),
                   ...) {

             if (!require(cluster, quietly = TRUE)) stop("The cluster package could not be loaded")

             if (family=="LNNMV") {
                return(emfitmv(data=data,
                              family=family,
                              hypotheses=hypotheses,
                              num.iter=num.iter,
                              verbose=verbose,
                              optim.control=optim.control,
                              ...))
             } else if (type==1) { #fixed cluster
                return(emfit2(data,
                              family,
                              hypotheses,
                              cluster,
                              num.iter,
                              verbose,
                              optim.control,
                              ...))
              } else if (type==2) { # one number of clusters
                if (length(cluster)==1) {
                  return(emfit1(data,
                                family,
                                hypotheses,
                                cluster,
                                cluster.init,
                                independent,
                                num.iter,
                                verbose,
                                optim.control,
                                ...))
                } else { # several numbers of clusters
                  return(emfit3(data,
                                family,
                                hypotheses,
                                cluster,
                                criterion,
                                cluster.init,
                                independent,
                                num.iter,
                                verbose,
                                optim.control,
                                ...))
                }
              }
           })

emfit0 <- function(data,
                   family,
                   hypotheses,
                   num.iter = 20,
                   verbose = getOption("verbose"),
                   optim.control = list(),
                   ...) {

              family <- as(family, "ebarraysFamily")

              ## Store current process time
              if (exists("proc.time")) startTime <- proc.time()

              ## Define function to be optimized by optim:

              complete.loglik <-
                  function(theta,
                           zhat,
                           zhatlogp,
                           hypotheses,
                           f0, f0.args)
                  {
                      pred.dens <- zhat # values unimportant, just need the same dimensions

                      ## NOTE: f0 returns the log value of the predictive probability
                      ## density function.
                      for (i in seq(along = hypotheses))
                      {
                          foo <- 0 #numeric(nrow(zhat))
                          for (j in seq(along = hypotheses[[i]]))
                          {
                              foo <- 
                                  foo +
                                      f0(theta = theta,
                                         args = c(f0.args$common.args,
                                         f0.args$pattern.args[[i]][[j]]))
                          }
                          pred.dens[, i] <- foo
                      }
                      -(sum(zhat * pred.dens) + zhatlogp)
                  }


              ## Some preprocessing:
              verbose <- as.numeric(verbose)
              patternObj <- hypotheses
              hypotheses <- as(hypotheses@patterns, "list")

              if (verbose > 0) cat(paste("\n Checking for negative entries..."))
              all.pos <- apply(data, 1, function(x) !any(x <= 0))
              if (verbose > 0 && sum(all.pos) != length(all.pos)) {
                  cat(paste("\n\t", length(all.pos) - sum(all.pos),
                            "rows out of", length(all.pos),
                            "had at least one negative entry"))
                  cat("\n\t These rows will not be used in the EM fit\n")
              }


              if (verbose > 0)
                  cat(paste("\n Generating summary statistics for patterns.",
                            "\n This may take a few seconds...\n"))


              f0.args <-
                          family@f0.arglist(data[all.pos, ],
                                    hypotheses)


              theta.temp <- family@thetaInit(data[all.pos, ])
              ## should we rm(data) now ? Is data copied in memory ?


              p.init <- rep(1, length(hypotheses))
              p.init <- p.init / sum(p.init)
              p.temp <- p.init

              ## Start Iterations
              if (verbose > 0)
                  cat(paste("\n Starting EM iterations (total", num.iter,
                            ").\n This may take a while\n\n"))

              notdone <- TRUE ## not used now, perhaps some convergence criteria
              iter <- 0
              zhat <- matrix(0, sum(all.pos), length(hypotheses))

              while ({iter <- iter + 1;  (iter <= num.iter && notdone)})
              {

                  if (verbose > 0)
                      cat(paste("\t Starting iteration", iter, "...\n"))
                  
                  ## E step
                  zhat[,] <- 0

                  ## NOTE: f0 returns the log value of the predictive
                  ## probability density function. Thus to get the log of the
                  ## product of the predictive probability densities, we add
                  ## (log(PA=1 * PA=2) = log(PA=1) + log(PA=2)).

                  for (i in seq(along = hypotheses))
                  {
                      foo <- 0
                      for (j in seq(along = hypotheses[[i]]))
                      {
                          foo <- 
                              foo + 
                                  family@f0(theta = theta.temp,
                                            args = c(f0.args$common.args,
                                            f0.args$pattern.args[[i]][[j]]))
                      }
                      #zhat[, i] <- foo

                      if (patternObj@ordered & length(hypotheses[[i]])>1)
                      {
                        zhat[,i] <- foo+family@deflate(theta = theta.temp,f0.args$pattern.args[[i]])
                      } else {
                        zhat[,i] <- foo
                      }
                  }

                  zhat <- .Call("makeProbVect", zhat, p.temp, PACKAGE = "EBarrays")

                  #complete.lik<-zhat #value unimportant, just need the same dimension
                  #for (i in 1:dim(zhat)[1])
                  #{
                  #  complete.lik[i,]<-exp(zhat[i,])*p.temp
                  #}                  
                  #zhat<-complete.lik/apply(complete.lik,1,sum)

                  p.temp <- colSums(zhat)
                  p.temp <- p.temp / sum(p.temp)
                  if (verbose > 1) {
                      cat("\n\n      p.temp: ")
                  }


                  ## M step
                  zhatlogp <- sum(zhat %*% log(p.temp))
                  fit.temp <-
                      optim(par = theta.temp,
                            fn = complete.loglik,
                            method = c("L-BFGS-B"),
                            lower = family@lower.bound,
                            upper = family@upper.bound,
                            control = optim.control,
                            zhat = zhat,
                            zhatlogp = sum(zhat %*% log(p.temp)),
                            hypotheses = hypotheses,
                            f0 = family@f0,
                            f0.args = f0.args)
                  theta.temp <- fit.temp$par

                  if (verbose > 1) {
                      cat("\n\n  theta.temp: ")
                      print(theta.temp)
                  }
              }


              ## Finish up

              if (verbose > 1) {
                  cat("\n\n")
                  print("model parameter estimates:")
                  print(theta.temp)
                  print("Estimates of mixing proportions:")
                  print(p.temp)
              }

              if (verbose && exists("proc.time")) {
                  endTime <- proc.time()
                  cat(sprintf("\n\n Fit used %5.2f seconds user time\n",
                              endTime[1] - startTime[1]))
              }

              thetaEst<-rbind(NULL,theta.temp)
              probEst<-rbind(NULL,p.temp)
              rownames(thetaEst)<-NULL
              rownames(probEst)<-NULL

              new("ebarraysEMfit", 
                   family = family,
                   hypotheses = patternObj,
                   thetaEst = thetaEst,
                   probEst = probEst,
                   ic = fit.temp$value+(3+length(hypotheses)-1)*c(0.5*log(sum(all.pos)),1,log(log(sum(all.pos)))))
            }

emfit1<-function(data,
                 family,
                 hypotheses,
                 n.cluster,
                 cluster.init = NULL,
                 independent = FALSE,
                 num.iter = 20,
                 verbose = getOption("verbose"),
                 optim.control = list(),
                 ...) {

              if (is(data, "ExpressionSet")) data<-exprs(data)
              
              family <- as(family, "ebarraysFamily")
              
              if (!is.null(cluster.init) & length(cluster.init)!=dim(data)[1]) {
                warning("Number of genes appeared in cluster.init does not match the data; cluster.init is discarded!")
                cluster.init <- NULL
              }

              ## Store current process time
              if (exists("proc.time")) startTime <- proc.time()

              ## Define function to be optimized by optim:

              complete.loglik <-
                  function(theta,
                           zhat,
                           zhatlogp,
                           hypotheses,
                           f0, f0.args)
                  {
                      pred.dens <- zhat # values unimportant, just need the same dimensions

                      ## NOTE: f0 returns the log value of the predictive probability
                      ## density function.
                      for (i in seq(along = hypotheses))
                      {
                          foo <- 0 #numeric(nrow(zhat))
                          for (j in seq(along = hypotheses[[i]]))
                          {
                              foo <- 
                                  foo +
                                      f0(theta = theta,
                                         args = c(f0.args$common.args,
                                         f0.args$pattern.args[[i]][[j]]))
                          }
                          pred.dens[, i] <- foo
                      }
                      -(sum(zhat * pred.dens) + zhatlogp)
                  }


              ## Some preprocessing:
              verbose <- as.numeric(verbose)
              patternObj <- hypotheses
              hypotheses <- as(hypotheses@patterns, "list")

              if (verbose > 0) cat(paste("\n Checking for negative entries..."))
              all.pos <- apply(data, 1, function(x) !any(x <= 0))
              if (verbose > 0 && sum(all.pos) != length(all.pos)) {
                  cat(paste("\n\t", length(all.pos) - sum(all.pos),
                            "rows out of", length(all.pos),
                            "had at least one negative entry"))
                  cat("\n\t These rows will not be used in the EM fit\n")
              }

              data<-data[all.pos,]

              if (is.null(cluster.init)) {
                  cluster.init<-clara(log(data),n.cluster)$clustering
              } else {
                  cluster.init<-cluster.init[all.pos,]
              }

              unique.cluster<-unique(cluster.init)
              n.cluster <- length(unique.cluster)
              n.gene<-dim(data)[1]
              
              if (n.cluster==1) {
                return(emfit0(data,
                              family,
                              patternObj,
                              num.iter,
                              verbose,
                              optim.control,
                              ...))
              }
                   
              if (verbose > 0)
                  cat(paste("\n Generating summary statistics for patterns.",
                            "\n This may take a few seconds...\n"))

              f0.args <- family@f0.arglist(data,hypotheses)

              theta.temp<-NULL
              for (i in unique(cluster.init))
              {
                  theta.temp<-rbind(theta.temp,family@thetaInit(data[cluster.init==i,]))
              }
              
              p.temp <- matrix(1, n.cluster,length(hypotheses))

              p.temp <- p.temp / sum(p.temp)

              ## Start Iterations
              if (verbose > 0)
                  cat(paste("\n Starting EM iterations (total", num.iter,
                            ").\n This may take a while\n\n"))

              notdone <- TRUE ## not used now, perhaps some convergence criteria
              iter <- 0
              zhat<-array(0,dim=c(n.gene, n.cluster, length(hypotheses)))
              
              while ({iter <- iter + 1;  (iter <= num.iter && notdone)})
              {

                  if (verbose > 0)
                      cat(paste("\t Starting iteration", iter, "...\n"))
                  
                  ## E step
                  zhat<-zhat*0
                  
                  ## NOTE: f0 returns the log value of the predictive
                  ## probability density function. Thus to get the log of the
                  ## product of the predictive probability densities, we add
                  ## (log(PA=1 * PA=2) = log(PA=1) + log(PA=2)).

                  for (k in 1:n.cluster)
                  {
                    for (i in seq(along = hypotheses))
                    {
                      foo <- 0
                      for (j in seq(along = hypotheses[[i]]))
                      {
                          foo <- 
                              foo + 
                                  family@f0(theta = theta.temp[k,],
                                            args = c(f0.args$common.args,
                                            f0.args$pattern.args[[i]][[j]]))
                      }
                      
                      if (patternObj@ordered & length(hypotheses[[i]])>1)
                      {
                        zhat[,k,i] <- foo+family@deflate(theta = theta.temp[k,],f0.args$pattern.args[[i]])
                      } else {
                        zhat[,k,i] <- foo
                      }
                    }
                  }

                  #update posterior prob
                  zhat <- .Call("makeProbVectArr", zhat, p.temp, PACKAGE = "EBarrays")

                  #complete.lik<-zhat #value unimportant, just need the same dimension
                  #for (i in 1:n.gene)
                  #{
                  #  complete.lik[i,,]<-exp(zhat[i,,])*p.temp
                  #}
                  #
                  #zhat<-complete.lik/apply(complete.lik,1,sum)
                  
                  p.temp<-apply(zhat,c(2,3),mean)

                  if (independent)
                  {
                      p.pat<-apply(p.temp,1,sum)
                      p.clus<-apply(p.temp,2,sum)
                      p.temp<-outer(p.pat,p.clus,"*")
                  }
                          
                  ## M step
                  bic<-0
                  for (i in 1:n.cluster) {
                      zhatlogp <- sum(zhat[,i,] %*% log(p.temp[i,]))
                      max.comp.lik <-
                          optim(par = theta.temp[i,],
                                fn = complete.loglik,
                                method = c("L-BFGS-B"),
                                lower = family@lower.bound,
                                upper = family@upper.bound,
                                control = optim.control,
                                zhat = zhat[,i,],
                                zhatlogp = zhatlogp,
                                hypotheses = hypotheses,
                                f0 = family@f0,
                                f0.args = f0.args)
                    
                      theta.temp[i,] <- max.comp.lik$par
                      bic<-bic+max.comp.lik$value
                  }

                  if (verbose > 1) {
                      cat("\n\n  theta.temp: ")
                      print(theta.temp)
                  }
              }

              ## Finish up
              if (verbose > 1) {
                  cat("\n\n")
                  print("model parameter estimates:")
                  print(theta.temp)
                  print("Estimates of mixing proportions:")
                  print(t(p.temp))
              }

              if (verbose && exists("proc.time")) {
                  endTime <- proc.time()
                  cat(sprintf("\n\n Fit used %5.2f seconds user time\n",
                              endTime[1] - startTime[1]))
              }

              ic<- bic+(3*n.cluster+length(hypotheses)*n.cluster-1)*c(0.5*log(sum(all.pos)),1,log(log(sum(all.pos))))
              names(ic)<-c("BIC","AIC","HQ")
              
              new("ebarraysEMfit", 
                   family = family,
                   hypotheses = patternObj,
                   thetaEst = theta.temp,
                   probEst = p.temp,
                   ic = ic)
}

emfit2<-function(data,
                 family,
                 hypotheses,
                 cluster,
                 num.iter = 20,
                 verbose = getOption("verbose"),
                 optim.control = list(),
                 ...) {

              if (is(data, "ExpressionSet")) data<-exprs(data)
              
              family <- as(family, "ebarraysFamily")
              
              if (length(cluster)!=dim(data)[1]) {
                stop("Number of genes appeared in cluster does not match the data!")
              }

              thetaEst <- NULL
              probEst <- NULL
              
              unique.cluster<-unique(cluster)

              ic<-rep(0,3)
              for (i in unique.cluster) {
                fit <- emfit0(data,
                              family,
                              hypotheses,
                              num.iter,
                              verbose,
                              optim.control,
                              ...)

                ic<-ic+fit@ic
                
                thetaEst<-rbind(thetaEst,fit@thetaEst[1,])
                probEst<-rbind(probEst,fit@probEst[1,])
              }
              
              rownames(thetaEst)<-unique.cluster
              rownames(probEst)<-unique.cluster
              
              new("ebarraysEMfit", 
                   family = family,
                   hypotheses = hypotheses,
                   thetaEst = thetaEst,
                   probEst = probEst,
                   ic = ic)
}

emfit3<-function(data,
                 family,
                 hypotheses,
                 n.cluster,
                 criterion="BIC",
                 cluster.init = NULL,
                 independent = FALSE,
                 num.iter = 20,
                 verbose = getOption("verbose"),
                 optim.control = list(),
                 ...) {

             crit<-switch(EXPR=criterion,BIC=1,AIC=2,HQ=3)
             n.cluster<-sort(n.cluster)
             fit<-emfit1(data,family,hypotheses,n.cluster[1],cluster.init,independent, num.iter,verbose,optim.control)

             for (i in 2:length(n.cluster)) {
               cur.fit<-emfit1(data=data,family=family,hypotheses=hypotheses,n.cluster=n.cluster[i],cluster.init=cluster.init,independent=independent, num.iter=num.iter,verbose=verbose,optim.control=optim.control)

               if (cur.fit@ic[crit]<fit@ic[crit]) {
                 fit<-cur.fit
               }
             }

             return(fit)
           }

emfitmv<-          function(data,
                   family,
                   hypotheses,
                   groupid = NULL,
                   theta.init = NULL, p.init = NULL,
                   num.iter = 20,
                   verbose = getOption("verbose"),
                   trace = TRUE,
                   optim.control = list(),
                   ...) {

              ## Store current process time
              
              if (hypotheses@ordered)
                 stop("Family LNNMV does not support ordered hypotheses.")
                 
              if (exists("proc.time")) startTime <- proc.time()

              ## Define function to be optimized by optim:

              complete.loglik <-
                  function(theta,
                           zhat,
                           zhatlogp,
                           hypotheses,
                           f0, f0.args)
                  {
                      pred.dens <- zhat # values unimportant, just need the same dimensions

                      ## NOTE: f0 returns the log value of the predictive probability
                      ## density function.
                      for (i in seq(along = hypotheses))
                      {
                          foo <- 0 #numeric(nrow(zhat))
                          for (j in seq(along = hypotheses[[i]]))
                          {
                              foo <- 
                                  foo +
                                      f0(theta = theta,
                                         args = c(f0.args$common.args,
                                         f0.args$pattern.args[[i]][[j]]))
                          }
                          pred.dens[, i] <- foo
                      }
                      -(sum(zhat * pred.dens) + zhatlogp)
                  }


              ## Some preprocessing:
              verbose <- as.numeric(verbose)
              patternObj <- hypotheses
              hypotheses <- as(hypotheses@patterns, "list")

              if (verbose > 0) cat(paste("\n Checking for negative entries..."))
              all.pos <- apply(data, 1, function(x) !any(x <= 0))
              if (verbose > 0 && sum(all.pos) != length(all.pos)) {
                  cat(paste("\n\t", length(all.pos) - sum(all.pos),
                            "rows out of", length(all.pos),
                            "had at least one negative entry"))
                  cat("\n\t These rows will not be used in the EM fit\n")
              }


              if (verbose > 0)
                  cat(paste("\n Generating summary statistics for patterns.",
                            "\n This may take a few seconds...\n"))

              if (family=="LNNMV"){
                      f0.args <- 
                          family@f0.arglist(data[all.pos, ],
                                            hypotheses,groupid)
              }
              else
                f0.args <- 
                          family@f0.arglist(data[all.pos, ],
                                            hypotheses)
              
              

              theta.temp <-

                  if (is.null(theta.init))

                      ## note that this is expected in terms of the
                      ## parametrization used in actual optimization,
                      ## not necessarily the one expected by the user.

                      family@thetaInit(data[all.pos, ])

                  else

                      ## explicitly supplied initial estimates are in
                      ## the 'user' parametrization, which are
                      ## converted to the required parametrization by
                      ## the family@link function

                      family@link(theta.init)

              ## should we rm(data) now ? Is data copied in memory ?

              theta.mat <- matrix(NA, num.iter + 1, length(theta.temp))
              theta.mat[1,] <- theta.temp

              ## If missing, set initial probabilities as all equal
              if (is.null(p.init)) p.init <- rep(1, length(hypotheses))
              p.init <- p.init / sum(p.init)
              p.temp <- p.init
              p.mat <- matrix(NA, num.iter + 1, length(p.init))
              p.mat[1,] <- p.init

              ## Start Iterations
              if (verbose > 0)
                  cat(paste("\n Starting EM iterations (total", num.iter,
                            ").\n This may take a while\n\n"))

              notdone <- TRUE ## not used now, perhaps some convergence criteria
              iter <- 0
              zhat <- matrix(0, sum(all.pos), length(hypotheses))

              while ({iter <- iter + 1;  (iter <= num.iter && notdone)})
              {

                  if (verbose > 0)
                      cat(paste("\t Starting iteration", iter, "...\n"))
                  
                  ## E step
                  zhat[,] <- 0

                  ## NOTE: f0 returns the log value of the predictive
                  ## probability density function. Thus to get the log of the
                  ## product of the predictive probability densities, we add
                  ## (log(PA=1 * PA=2) = log(PA=1) + log(PA=2)).

                  for (i in seq(along = hypotheses))
                  {
                      foo <- 0
                      for (j in seq(along = hypotheses[[i]]))
                      {
                          foo <- 
                              foo + 
                                  family@f0(theta = theta.temp,
                                            args = c(f0.args$common.args,
                                            f0.args$pattern.args[[i]][[j]]))
                      }
                      zhat[, i] <- foo
                  }

                  zhat <- .Call("makeProbVect", zhat, p.temp, PACKAGE = "EBarrays")

                  p.temp <- colSums(zhat)
                  p.temp <- p.temp / sum(p.temp)
                  if (verbose > 1) {
                      cat("\n\n      p.temp: ")
                  }


                  ## M step
                  zhatlogp <- sum(zhat %*% log(p.temp))
                  theta.temp <-
                      optim(par = theta.temp,
                            fn = complete.loglik,
                            method = c("L-BFGS-B"),
                            lower = family@lower.bound,
                            upper = family@upper.bound,
                            control = optim.control,
                            zhat = zhat,
                            zhatlogp = sum(zhat %*% log(p.temp)),
                            hypotheses = hypotheses,
                            f0 = family@f0,
                            f0.args = f0.args)$par

                  theta.mat[iter + 1,] <- theta.temp
                  p.mat[iter + 1,] <- p.temp

                  if (verbose > 1) {
                      cat("\n\n  theta.temp: ")
                      print(theta.temp)
                  }
              }

              ################################
              rm(zhat)
              ################################

              ## Finish up

              colnames(theta.mat) <- names(theta.temp) <-
                  c(paste("theta", 1:(length(theta.temp)), sep=""))
              colnames(p.mat) <- names(p.temp) <-
                  c(paste("p", 1:(length(p.temp)), sep=""))

              if (verbose > 1) {
                  cat("\n\n")
                  print("model parameter estimates:")
                  print(theta.temp)
                  print("Estimates of mixing proportions:")
                  print(p.temp)
              }

              if (verbose && exists("proc.time")) {
                  endTime <- proc.time()
                  cat(sprintf("\n\n Fit used %5.2f seconds user time\n",
                              endTime[1] - startTime[1]))
              }

              new("ebarraysEMfit", 
                   family = family,
                   hypotheses = patternObj,
                   thetaEst = rbind(theta.temp,NULL),
                   probEst = rbind(p.temp,NULL),
                   ic = 0)
}


#posterior probability calculation
setGeneric("postprob",
           function(fit,data, ...) standardGeneric("postprob"))

setMethod("postprob",
          signature(fit = "ebarraysEMfit",
                    data = "matrix"),

          function(fit,
                   data,...) {

    data[data <= 0] <-min(data[data > 0])
    patternObj<-fit@hypotheses
    hypotheses <- patternObj@patterns
    family<-fit@family

    n.cluster<-dim(fit@thetaEst)[1]
    
    f0.args <-family@f0.arglist(data,hypotheses,...)
    zhat<-array(0,dim=c(dim(data)[1], n.cluster,length(hypotheses)))
                  
    ## NOTE: f0 returns the log value of the predictive
    ## probability density function. Thus to get the log of the
    ## product of the predictive probability densities, we add
    ## (log(PA=1 * PA=2) = log(PA=1) + log(PA=2)).
    for (k in 1:n.cluster)
    {
        for (i in seq(along = hypotheses))
        {
            foo <- 0
            for (j in seq(along = hypotheses[[i]]))
            {
                foo <- 
                    foo + 
                        family@f0(theta = fit@thetaEst[k,],
                                  args = c(f0.args$common.args,
                                  f0.args$pattern.args[[i]][[j]]))
            }
            
            if (patternObj@ordered & length(hypotheses[[i]])>1)
            {
                zhat[,k,i] <- foo+family@deflate(theta = fit@thetaEst[k,],f0.args$pattern.args[[i]])
            } else {
                zhat[,k,i] <- foo
            }
        }
    }

    #update posterior prob
    zhat <- .Call("makeProbVectArr", zhat, fit@probEst, PACKAGE = "EBarrays")

    #complete.lik<-zhat #value unimportant, just need the same dimension
    #for (i in 1:dim(data)[1])
    #{
    #    complete.lik[i,,]<-exp(zhat[i,,])*fit@probEst
    #}    
    #post.prob<-complete.lik/apply(complete.lik,1,sum)

    rownames(zhat)<-rownames(data)

    return(list(joint=zhat,cluster=apply(zhat,c(1,2),sum),pattern=apply(zhat,c(1,3),sum)))
})
Any scripts or data that you put into this service are public.
EBarrays documentation built on Nov. 8, 2020, 8:27 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
EBarrays
Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

R/em.algorithm.R
In EBarrays: Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

Defines functions emfitmv emfit3 emfit2 emfit1 emfit0

Documented in emfit0 emfit1 emfit2 emfit3 emfitmv

Try the EBarrays package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EBarrays Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

R/em.algorithm.R In EBarrays: Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

Defines functions emfitmv emfit3 emfit2 emfit1 emfit0

Documented in emfit0 emfit1 emfit2 emfit3 emfitmv

Try the EBarrays package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

EBarrays
Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification

R/em.algorithm.R
In EBarrays: Unified Approach for Simultaneous Gene Clustering and Differential Expression Identification