Nothing
## EM for MSMM (Multi-variate Student t Mixture Model) with Nc components
## if init is not specified, then knn is used to initialize means,
## cluster weights and covariance matrices (taken from the knn
## determined clusters)
## this function follows the details given in ref. "Robust mixture
## modelling using the t distribution". D. Peel and G.J. McLachlan,
## Statistics and Computing (2000), 10, 339-348,
EM_msmm <- function(X, Nc, init, Ninit=50, verbose=TRUE, Niter.max=500, tol=1e-1) {
## in case X is no matrix, interpret it as uni-variate case
if(!is.matrix(X))
X <- matrix(X,ncol=1)
N <- dim(X)[1]
Nd <- dim(X)[2]
## initialize randomly
if(missing(init)) {
## assume that the sample is ordered randomly
ind <- seq(1,N-Nc,length=Ninit)
knnInit <- list(mu=matrix(0,nrow=Nc,ncol=Nd), p=(1/seq(1,Nc))/sum(seq(1,Nc)))
for(k in seq(Nc))
knnInit$mu[k,] <- colMeans(X[ind+k-1,,drop=FALSE])
## use k means clustering with K=Nc as init; ignore warnings
## as we may hit the maximal number of iterations
suppressWarnings(KNN <- knn(X, K=Nc, init=knnInit, verbose=verbose, Niter.max=50))
pEst <- KNN$p
cmin <- which.min(pEst)
muEst <- KNN$center
##nuEst <- rlnorm(Nc, log(20), log(5)/1.96) ## avoid randomness during initialization
nuEst <- rep(12, times=Nc)
covEst <- array(0, dim=c(Nc, Nd, Nd))
Xtau <- sqrt(colVars(X))
for(i in seq(Nc)) {
if(i == cmin) next
ind <- KNN$cluster==i
if(sum(ind) > 10) {
covKNN <- as.matrix(cov(X[ind,,drop=FALSE]))
R <- cov2cor(covKNN)
tau <- sqrt(diag(covKNN))
## set variances below or equal to 0 to the sample variance
tau[tau <= 0] <- Xtau[tau <= 0]
} else {
R <- diag(Nd)
tau <- Xtau
}
##tauR <- rlnorm(Nd, log(tau), log(3)/1.96)
##covEst[i,,] <- diag(tauR, Nd, Nd) %*% R %*% diag(tauR, Nd, Nd)
## ensure that the smallest variance is not less than the global
## variance divided by 100... which is to stabilize things
tau <- pmax(tau, Xtau / 100)
covEst[i,,] <- diag(tau, Nd, Nd) %*% R %*% diag(tau, Nd, Nd)
}
##tauR <- rlnorm(Nd, log(Xtau), log(3)/1.96)
##covEst[cmin,,] <- diag(tauR, Nd, Nd) %*% diag(Nd) %*% diag(tauR, Nd, Nd)
covEst[cmin,,] <- diag(Xtau, Nd, Nd) %*% diag(Nd) %*% diag(Xtau, Nd, Nd)
muEst[cmin] <- sum(pEst * muEst)
} else {
pEst <- init$p
muEst <- init$center
nuEst <- init$nu
covEst <- init$cov
}
iter <- 0
logN <- log(N)
traceLli <- c()
Dlli <- Inf
## initialize component and element wise log-likelihood matrix
lli <- array(-20, dim=c(N,Nc))
## and the log(U) matrix which is the inverse mahalanobis distances
## decorated with nuEst
lU <- array(0, dim=c(N,Nc))
if(verbose) {
message("EM multi-variate student t with Nc =", Nc, ":\n")
}
nu_ml <- function(c1) {
function(nu) {
(log(nu/2) - digamma(nu/2) + c1 + digamma((nu+Nd)/2) - log((nu+Nd)/2))^2
}
}
while(iter < Niter.max) {
## calculate responsabilities from the likelihood terms;
## calculations are done in log-space to avoid numerical
## difficulties if some points are far away from some
## component and hence recieve very low density
for(i in seq(Nc)) {
lli[,i] <- log(pEst[i]) + mvtnorm::dmvt(X, muEst[i,], as.matrix(covEst[i,,]), nuEst[i], log=TRUE)
}
## ensure that the log-likelihood does not go out of numerical
## reasonable bounds
lli <- apply(lli, 2, pmax, -30)
##lnresp <- apply(lli, 1, log_sum_exp)
lnresp <- matrixStats::rowLogSumExps(lli)
## the log-likelihood is then given by the sum of lresp
lliCur <- sum(lnresp)
traceLli <- c(traceLli, lliCur)
if(iter > 1) {
## Dlli is the slope of the log-likelihood evaulated with
## a second order method
Dlli <- (traceLli[iter+1] - traceLli[iter - 1])/2
}
if(verbose)
message("Iteration", iter, ": log-likelihood", lliCur, "; Dlli =", Dlli, "\n")
if(Dlli < tol) {
break
}
## ... and the responisbility matrix follows from this by
## appropiate normalization.
lresp <- sweep(lli, 1, lnresp, "-") ## Eq. 16
resp <- exp(lresp)
## calculate additional weights of the U matrix aka latent
## tail mass of a point
for(i in seq(Nc)) {
Xc_i <- sweep(X, 2L, muEst[i,])
Sigma_i <- as.matrix(covEst[i,,])
## in rare cases the covariance matrix becomes (almost)
## singular in which case the alternative cholesky
## factorization gives more stable results
maha_dist <- tryCatch(
mahalanobis(Xc_i, FALSE, Sigma_i),
error = function(e) {
## see https://stats.stackexchange.com/questions/147210/efficient-fast-mahalanobis-distance-computation
## also adding eps to the diagonal to further stabilize the computation
L_i <- t(chol(Sigma_i + diag(5 * .Machine$double.eps, Nd, Nd)))
y_i <- forwardsolve(L_i, t(Xc_i))
colSums(y_i^2)
}
)
lU[,i] <- log(nuEst[i] + Nd) - log(nuEst[i] + maha_dist) ## Eq. 20
}
## mean probability to be in a specific mixture component ->
## updates pEst
##lzSum <- apply(lresp, 2, log_sum_exp)
lzSum <- colLogSumExps(lresp)
zSum <- exp(lzSum)
##zSum <- colSums(resp)
##pEst <- zSum/N ## Eq. 29
pEst <- exp(lzSum - logN)
## make sure it is scale to exactly 1 which may not happen due
## to small rounding issues
pEst <- pEst/sum(pEst)
## product of u weight and responsabilities
lW <- lresp + lU ## intermediate formed, i.e. tau_ij * u_ij
##wSum <- exp(apply(lW, 2, log_sum_exp))
wSum <- exp(colLogSumExps(lW))
##wSum <- colSums(W)
## now obtain new estimates for each component of the mixtures
## of their mu vector and covariance matrices
for(i in seq(Nc)) {
##xc <- sqrt(W[,i]) * sweep(X, 2, muEst[i,], check.margin = FALSE)
##covEst[i,,] <- crossprod(xc) / zSum[i] ## Eq. 31
##muEst[i,] <- colSums(W[,i] * X) / wSum[i] ## Eq. 30
##xc <- exp(0.5 * lW[,i]) * sweep(X, 2, muEst[i,], check.margin = FALSE)
##covEst[i,,] <- crossprod(xc) / zSum[i] ## Eq. 31
xc <- exp(0.5 * (lW[,i] - lzSum[i])) * sweep(X, 2, muEst[i,], check.margin = FALSE)
covEst[i,,] <- crossprod(xc) ## Eq. 31 (divisor moved to xc)
muEst[i,] <- colSums(exp(lW[,i]) * X) / wSum[i] ## Eq. 30
## ensure that diagonal stays non-zero
for(j in 1:Nd)
covEst[i,j,j] <- max(covEst[i,j,j], .Machine$double.eps)
}
## finally get the new nu estimates via numerical solution
## first calculate necessary constants which don't involve v_i
##c1 <- 1 + colSums(resp * (log(U) - U)) / zSum
c1 <- 1 + colSums(resp * (lU - exp(lU))) / zSum
for(i in seq(Nc)) {
nuEstML <- optimize(nu_ml(c1[i]), c(0,150)) # Eq. 32
if(is.na(nuEstML$objective)) {
warning("Component ", i, " in iteration ", iter, " failed convergence.")
} else if(nuEstML$objective > 1e-3 & nuEstML$minimum < 50) {
## only warn if we had trouble finding the minimum
## when below 50, larger values are anyway normals
warning("Component ", i, " in iteration ", iter, " had convergence problems.\nObjective function = ",nuEstML$objective,"\n")
}
nuEst[i] <- nuEstML$minimum
}
iter <- iter + 1
}
if(iter == Niter.max)
warning("Maximum number of iterations reached.")
## degrees of freedom
## covariance matrix df per component
cov.df <- (Nd-1)*Nd/2 + Nd
df <- Nc * (Nd + cov.df) + Nc + Nc-1
## sort by largest weight
o <- order(pEst, decreasing=TRUE)
pEst <- pEst[o]
muEst <- muEst[o,,drop=FALSE]
covEst <- covEst[o,,,drop=FALSE]
nuEst <- nuEst[o,drop=FALSE]
if(Nd != 1) {
rhoEst <- array(apply(covEst, 1, cov2cor), c(Nd,Nd,Nc))
rhoEst <- apply(rhoEst, 3, function(x) x[lower.tri(x)])
tauEst <- sqrt(t(apply(covEst, 1, diag)))
} else {
rhoEst <- NULL
tauEst <- sqrt(as.vector(covEst))
}
invisible(list(cov=covEst, center=muEst, nu=nuEst, p=pEst, rho=rhoEst, tau=tauEst, lli=lliCur, df=df, Dlli=Dlli, niter=iter))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.