R/EM_mnmm.R

Defines functions mv2vec print.EMmvnmm print.EMnmm EM_nmm EM_mnmm

## EM for MNMM (Multi-variate Normal Mixture Model) with Nc components
## if init is not speciefied, then knn is used to initialize means,
## cluster weights and covariance matrices (taken from the knn
## determined clusters)

EM_mnmm <- function(
  X,
  Nc,
  mix_init,
  Ninit = 50,
  verbose = FALSE,
  Niter.max = 500,
  tol,
  Neps,
  eps = c(w = 0.005, m = 0.005, s = 0.005)
) {
  ## in case X is no matrix, interpret it as uni-variate case
  if (!is.matrix(X)) {
    X <- matrix(X, ncol = 1)
  }

  N <- dim(X)[1]
  Nd <- dim(X)[2]
  assert_that(N + Nc >= Ninit)

  ## initialize normal EM using a Student-t EM which is very robust
  ## against outliers
  if (missing(mix_init)) {
    mix_init <- EM_msmm(
      X,
      Nc,
      Ninit = Ninit,
      verbose = verbose,
      Niter.max = round(Niter.max / 2),
      tol = 0.1
    )
  }
  pEst <- mix_init$p
  muEst <- mix_init$center
  covEst <- mix_init$cov

  ## take current estimates and transform to scale on which
  ## convergence is assessed
  est2par <- function(p, mu, cov) {
    est <- rbind(
      logit(p),
      matrix(sapply(1:Nc, function(i) mv2vec(mu[i, ], cov[i, , ])), ncol = Nc)
    )
    est[(1 + Nd + 1):(1 + 2 * Nd), ] <- log(est[(1 + Nd + 1):(1 + 2 * Nd), ])
    est
  }
  ## in case tolerance is not specified, then this criteria is
  ## ignored
  if (missing(tol)) {
    checkTol <- FALSE
    tol <- -1
  } else {
    checkTol <- TRUE
  }

  if (missing(Neps)) {
    ## in case tolerance has been declared, but Neps not, we flag
    ## to disable checking of running mean convergence check
    checkEps <- FALSE
    Neps <- 5
  } else {
    checkEps <- TRUE
  }

  ## if nothing is specified, we declare convergence based on a
  ## running mean of differences in parameter estimates
  if (!checkTol & !checkEps) {
    checkEps <- TRUE
  }

  assert_that(Neps > 1)
  assert_that(ceiling(Neps) == floor(Neps))

  ## eps can also be given as a single integer which is interpreted
  ## as number of digits
  if (length(eps) == 1) eps <- rep(10^(-eps), 3)

  ## degrees of freedom
  ## covariance matrix df per component
  cov.df <- (Nd - 1) * Nd / 2 + Nd
  df <- Nc * (Nd + cov.df) + Nc - 1
  df.comp <- cov.df + Nd + 1

  ## expand eps according to the dimensionality
  eps <- c(
    eps[1],
    rep(eps[2], Nd),
    rep(eps[3], Nd),
    rep(eps[2], (Nd - 1) * Nd / 2)
  )

  iter <- 0
  logN <- log(N)
  traceMix <- list()
  traceLli <- c()
  Dlli <- Inf
  runMixPar <- array(-Inf, dim = c(Neps, df.comp, Nc))
  runOrder <- 0:(Neps - 1)
  if (Nc == 1) Npar <- df else Npar <- df + 1

  ## initialize component and element wise log-likelihood matrix
  lli <- array(-20, dim = c(N, Nc))

  if (verbose) {
    message("EM multi-variate normal with Nc =", Nc, ":\n")
  }

  while (iter < Niter.max) {
    ## calculate responsabilities from the likelihood terms;
    ## calculations are done in log-space to avoid numerical
    ## difficulties if some points are far away from some
    ## component and hence recieve very low density
    for (i in seq(Nc)) {
      lli[, i] <- log(pEst[i]) +
        dmvnorm(
          X,
          muEst[i, ],
          as.matrix(covEst[i, , ]),
          log = TRUE,
          checkSymmetry = FALSE
        )
    }
    ## ensure that the log-likelihood does not go out of numerical
    ## reasonable bounds
    lli <- apply(lli, 2, pmax, -30)
    ## lnresp <- apply(lli, 1, log_sum_exp)
    lnresp <- matrixStats::rowLogSumExps(lli)
    ## the log-likelihood is then given by the sum of lresp
    lliCur <- sum(lnresp)
    ## record current state
    traceMix <- c(traceMix, list(list(p = pEst, mean = muEst, sigma = covEst)))
    traceLli <- c(traceLli, lliCur)
    if (iter > 1) {
      ## Dlli is the slope of the log-likelihood evaulated with
      ## a second order method
      Dlli <- (traceLli[iter + 1] - traceLli[iter - 1]) / 2
    }
    if (Nc > 1) {
      smean <- apply(
        runMixPar[order(runOrder), , , drop = FALSE],
        c(2, 3),
        function(x) mean(abs(diff(x)))
      )
      eps.converged <- sum(sweep(smean, 1, eps, "-") < 0)
    } else {
      smean <- apply(
        runMixPar[order(runOrder), -1, , drop = FALSE],
        c(2, 3),
        function(x) mean(abs(diff(x)))
      )
      eps.converged <- sum(sweep(smean, 1, eps[-1], "-") < 0)
    }
    if (is.na(eps.converged)) eps.converged <- 0
    if (verbose) {
      message(
        "Iteration ",
        iter,
        ": log-likelihood = ",
        lliCur,
        "; Dlli = ",
        Dlli,
        "; converged = ",
        eps.converged,
        " / ",
        Npar,
        "\n",
        sep = ""
      )
    }
    if (checkTol & Dlli < tol) {
      break
    }
    if (iter >= Neps & checkEps & eps.converged == Npar) {
      break
    }
    ## ... and the responseability matrix follows from this by
    ## appropiate normalization.
    lresp <- sweep(lli, 1, lnresp, "-")
    ## resp <- exp(lresp)

    ## mean probability to be in a specific mixture component -> updates
    ## pEst
    ## lzSum <- apply(lresp, 2, log_sum_exp)
    lzSum <- colLogSumExps(lresp)
    ## zSum <- exp(lzSum)
    pEst <- exp(lzSum - logN)

    ## make sure it is scale to exactly 1 which may not happen due
    ## to small rounding issues
    pEst <- pEst / sum(pEst)

    ## now obtain new estimates for each component of the mixtures
    ## of their mu vector and covariance matrices
    for (i in seq(Nc)) {
      upd <- cov.wt(X, exp(lresp[, i] - lzSum[i]), method = "ML")
      ## upd <- cov.wt(X, resp[,i]/zSum[i], method="ML")
      muEst[i, ] <- upd$center
      covEst[i, , ] <- upd$cov
      ## ensure that diagonal stays non-zero
      for (j in 1:Nd) {
        covEst[i, j, j] <- max(covEst[i, j, j], .Machine$double.eps)
      }
    }

    ind <- 1 + iter %% Neps
    runMixPar[ind, , ] <- est2par(pEst, muEst, covEst)
    runOrder[ind] <- iter

    iter <- iter + 1
  }
  if (iter + 1 == Niter.max) {
    warning("Maximum number of iterations reached.")
  }

  ## sort by largest weight
  o <- order(pEst, decreasing = TRUE)
  pEst <- pEst[o]
  muEst <- muEst[o, , drop = FALSE]
  covEst <- covEst[o, , , drop = FALSE]

  ## if(Nd != 1) {
  ##    rhoEst <- array(apply(covEst, 1, cov2cor), c(Nd,Nd,Nc))
  ##    rhoEst <- apply(rhoEst, 3, function(x) x[lower.tri(x)])
  ##    tauEst <- sqrt(t(apply(covEst, 1, diag)))
  ## } else {
  ##    rhoEst <- NULL
  ##    tauEst <- sqrt(as.vector(covEst))
  ## }

  ## mixEst <- list(p=pEst, mean=muEst, sigma=covEst)

  mixEst <- do.call(
    mixmvnorm,
    lapply(
      1:Nc,
      function(i)
        c(pEst[i], muEst[i, , drop = TRUE], matrix(covEst[i, , ], Nd, Nd))
    )
  )

  ## give further details
  attr(mixEst, "df") <- df
  attr(mixEst, "nobs") <- N
  attr(mixEst, "lli") <- lliCur

  attr(mixEst, "Nc") <- Nc

  convert <- function(est)
    suppressWarnings(do.call(
      mixmvnorm,
      lapply(
        1:Nc,
        function(i)
          c(
            est$p[i],
            est$mean[i, , drop = FALSE],
            matrix(est$sigma[i, , ], Nd, Nd)
          )
      )
    ))

  attr(mixEst, "tol") <- tol
  attr(mixEst, "traceLli") <- traceLli
  attr(mixEst, "traceMix") <- lapply(traceMix, convert)
  attr(mixEst, "x") <- X

  class(mixEst) <- c("EM", "EMmvnmm", "mvnormMix", "mix")

  mixEst
}

## uni-variate case
EM_nmm <- function(
  X,
  Nc,
  mix_init,
  verbose = FALSE,
  Niter.max = 500,
  tol,
  Neps,
  eps = c(w = 0.005, m = 0.005, s = 0.005)
) {
  if (is.matrix(X)) {
    assert_matrix(X, any.missing = FALSE, ncols = 1)
  }
  mixEst <- EM_mnmm(
    X = X,
    Nc = Nc,
    mix_init = mix_init,
    verbose = verbose,
    Niter.max = Niter.max,
    tol = tol,
    Neps = Neps,
    eps = eps
  )
  rownames(mixEst) <- c("w", "m", "s")
  class(mixEst) <- c("EM", "EMnmm", "normMix", "mix")
  attr(mixEst, "traceMix") <- lapply(
    attr(mixEst, "traceMix"),
    function(x) {
      class(x) <- class(mixEst)
      rownames(x) <- rownames(mixEst)
      x
    }
  )
  likelihood(mixEst) <- "normal"
  mixEst
}

#' @export
print.EMnmm <- function(x, ...) {
  cat(
    "EM for Normal Mixture Model\nLog-Likelihood = ",
    logLik(x),
    "\n\n",
    sep = ""
  )
  NextMethod()
}

#' @export
print.EMmvnmm <- function(x, ...) {
  cat(
    "EM for Multivariate Normal Mixture Model\nLog-Likelihood = ",
    logLik(x),
    "\n\n",
    sep = ""
  )
  NextMethod()
}

## given a vector of means and a covariance matrix for a multi-variate
## normal (and optionally a vector of df), return a vector of
## coefficients with a deterministic mapping
mv2vec <- function(mean, sigma, df, label = TRUE) {
  Nd <- length(mean)
  if (Nd != 1) {
    rho <- cov2cor(sigma)[lower.tri(sigma)]
    tau <- sqrt(diag(sigma))
  } else {
    rho <- NULL
    tau <- sqrt(sigma)
  }
  if (missing(df)) df <- NULL
  res <- c(mean, tau, rho, df)
  if (label) {
    tauN <- paste("sd", 1:Nd, sep = "")
    cols <- names(mean)
    if (is.null(cols)) {
      cols <- paste("mu", 1:Nd, sep = "")
    }
    if (Nd == 1) {
      corNames <- NULL
    } else if (length(cols) == 2) {
      corNames <- "cor"
    } else {
      corNames <- outer(cols, cols, paste, sep = "_")
      corNames <- paste("cor", corNames[lower.tri(corNames)], sep = ".")
    }
    if (!is.null(df)) {
      dfNames <- paste("df", 1:Nd, sep = "")
    } else {
      dfNames <- NULL
    }
    names(res) <- c(cols, tauN, corNames, dfNames)
  }
  return(res)
}

## vec2mv <- function(vec) {
##     ## calculate dimension from the number of parameters
##     Nd <- -3/2 + sqrt( 9/4 + 2*length(vec) )
##     mean <- vec[1:Nd]
##     Tau <- diag(vec[(Nd+1):(2*Nd)])
##     sigma <- diag(Nd)/2
##     L <- lower.tri(sigma)
##     E <- sum(L)
##     sigma[L] <- vec[(2*Nd+1):(2*Nd+E)]
##     sigma <- sigma + t(sigma)
##     sigma <- Tau %*% sigma %*% Tau
##     list(mean=mean, sigma=sigma)
## }
##
## ## extracts results in a flattened form
## extractMVNmix <- function(emFit) {
##     pMix <- emFit$p
##     cols <- colnames(emFit$center)
##     if(is.null(cols))
##         cols <- paste("X", 1:ncol(emFit$center), sep="")
##     if(length(cols) == 1)
##         corNames <- NULL
##     else if(length(cols) == 2)
##         corNames <- "cor"
##     else {
##         corNames <- outer(cols, cols, paste, sep="_")
##         corNames <- paste("cor", corNames[lower.tri(corNames)], sep=".")
##     }
##     MAPmix <- do.call(cbind, list(emFit$center, emFit$tau, emFit$rho))
##     colnames(MAPmix) = c(paste("mean", cols, sep="."), paste("sd", cols, sep="."), corNames)
##     MAPmix <- as.data.frame(t(MAPmix))
##     colnames(MAPmix) <- paste("Mix", 1:length(pMix), sep="")
##     names(pMix) <- names(MAPmix)
##     list(mvn=MAPmix, pMix=pMix)
## }
##
## ## utility functions for multi-variate normal mixtures
## rmvnormMix <- function(n, p, m, sigma){
##   ind <- sample.int(length(p), n, replace = TRUE, prob = p)
##   d <- nrow(m)
##   samp <- matrix(0, nrow=n, ncol=d)
##   for(i in seq(n))
##       samp[i,] <- rmvnorm(1, m[ind[i],], as.matrix(sigma[ind[i],,]))
##   samp
## }
##
## dmvnormMix <- function(x, p, m, sigma) {
##     nc <- length(p)
##     ## logic is to replicate the original data vector such that each
##     ## item appears nc times which allows easy vectorized calls to
##     ## dnorm. Then we cast the result into a matrix with as many rows
##     ## as nc components which we sum together with a fast colSums call.
##     dens <- rep.int(0, nrow(x))
##     for(i in seq_along(p))
##         dens <- dens + p[i] * dmvnorm(x, m[i,], sigma[i,,])
##     dens
## }
##
## ## utility function to plot BVN mixtures
## bicontourNMM <- function(X, bvn, title="", ng=50) {
##     ## some pretty colors
##     ##library(colorspace)
##     k <- 15
##     ##my.cols <- diverge_hcl(k, c = c(100, 0), l = c(50, 90), power = 1.3)
##     my.cols <- c("#4A6FE3", "#6D84E1", "#8898E1", "#A0AAE2", "#B5BBE3", "#C7CBE3", "#D7D9E3",
##                  "#E2E2E2", "#E4D6D8", "#E6C4CA", "#E6AFB9", "#E498A7", "#E07E93", "#DB627F",
##                  "#D33F6A")
##
##     r <- apply(X, 2, range)
##     xg <- seq(r[1,1],r[2,1], length=ng)
##     yg <- seq(r[1,2],r[2,2], length=ng)
##     Z <- outer(xg,yg, function(x,y) dmvnormMix(cbind(x,y), bvn$p, bvn$center, bvn$cov))
##
##     Nc <- length(bvn$p)
##
##     plot(X, pch=19, cex=.2)
##     contour(xg, yg, Z, drawlabels=FALSE, nlevels=k, col=my.cols, add=TRUE, lwd=2)
##     abline(h=bvn$center[,2], v=bvn$center[,1], lwd=1, col=1:Nc)
##     title(title)
##     legend("bottomleft", legend=paste("Mix", 1:Nc, " ", round(100*bvn$p,1), "%", sep=""), text.col=1:Nc)
## }
##

Try the RBesT package in your browser

Any scripts or data that you put into this service are public.

RBesT documentation built on June 8, 2025, 10:05 a.m.