R/EM_bmm_ab.R

Defines functions print.EMbmm EM_bmm_ab

## EM for Beta Mixture Models (BMM) with Nc components

EM_bmm_ab <- function(x, Nc, mix_init, Ninit=50, verbose=FALSE, Niter.max=500, tol, Neps, eps=c(w=0.005,a=0.005,b=0.005), constrain_gt1=TRUE) {
    N <- length(x)
    assert_that(N+Nc >= Ninit)

    assert_logical(constrain_gt1, any.missing=FALSE, len=1)

    ## check data for 0 and 1 values which are problematic, but may be
    ## valid, depending on a and b. Moving these to eps or 1-eps
    ## ensures proper handling during fit.
    x0 <- x==0
    if(any(x0)) {
        message("Detected ", sum(x0), " value(s) which are exactly 0.\nTo avoid numerical issues during EM such values are moved to smallest eps on machine.")
        x[x0] <- .Machine$double.eps
    }
    x1 <- x==1
    if(any(x1)) {
        message("Detected ", sum(x1), " value(s) which are exactly 1.\nTo avoid numerical issues during EM such values are moved to one minus smallest eps on machine.")
        x[x1] <- 1-.Machine$double.eps
    }

    ## temporaries needed during EM
    Lx <- matrix(log(x), ncol=Nc, nrow=N)
    LxC <- matrix(log1p(-x), ncol=Nc, nrow=N)

    xRep <- rep(x, each=Nc)

    ## initialize randomly using KNN
    if(missing(mix_init)) {
        ##abmEst <- matrix(1+rlnorm(Nc*3, 0, log(5)/1.96), nrow=Nc)
        ##abmEst[,1] <- 1/Nc
        ## assume that the sample is ordered randomly
        ind <- seq(1,N-Nc,length=Ninit)
        knnInit <- list(mu=matrix(0,nrow=Nc,ncol=1), p=rep(1/Nc, times=Nc))
        for(k in seq(Nc))
            knnInit$mu[k,1] <- mean(x[ind+k-1])
        KNN <- suppressWarnings(knn(x, K=Nc, init=knnInit, Niter.max=50))
        muInit <- rep(mean(x), times=Nc)
        varInit <- rep(1.5*var(x), times=Nc)
        for(k in 1:Nc) {
            kind <- KNN$cluster == k
            if(sum(kind) > 10) {
                muInit[k] <- KNN$center[k]
                varInit[k] <- var(x[kind])
            }
        }
        nInit <- pmax(muInit*(1-muInit)/varInit - 1, 1, na.rm=TRUE)
        ## place the component which recieved the least weight at the
        ## data center with roughly the variance of the sample
        cmin <- which.min(KNN$p)
        muInit[cmin] <- sum(KNN$p * KNN$center)
        ## muInit[cmin] <- mean(x) ## could be considered here
        nInit[cmin] <- pmax(muInit[cmin]*(1-muInit[cmin])/var(x) - 1, 1, na.rm=TRUE)
        ##Nmax <- max(2, max(nInit))
        ## ensure n is positive for each cluster; if this is not the
        ## case, sample uniformly from the range of n we have
        ##Nneg <- nInit <= .Machine$double.eps
        ##Nsmall <- nInit <= 0.5
        ##if(any(Nsmall))
        ##    nInit[Nsmall] <- runif(sum(Nsmall), 0.5, Nmax)
        ##nInitR <- 0.5 + rlnorm(Nc, log(nInit), log(5)/1.96)
        mixEst <- rbind(KNN$p, nInit*muInit, nInit*(1-muInit))
        dlink(mixEst) <- identity_dlink
        rownames(mixEst) <- c("w", "a", "b")
    } else {
        mixEst <- mix_init
    }

    ## mixEst parametrization during fitting
    mixEstPar <- mixEst
    mixEstPar[1,] <- logit(mixEst[1,,drop=FALSE])
    mixEstPar[2,] <- log(mixEst[2,])
    mixEstPar[3,] <- log(mixEst[3,])
    rownames(mixEstPar) <-  c("w", "la", "lb")

    ## constrain to a>=1 & b>=1 if requested... thus we subtract 1
    if(constrain_gt1) {
        mixEstPar[2,]  <- log(pmax(mixEst[2,] - 1, rep(1E-8, times=Nc)))
        mixEstPar[3,]  <- log(pmax(mixEst[3,] - 1, rep(1E-8, times=Nc)))

        mixEst[2,]  <- 1 + exp(mixEstPar[2,])
        mixEst[3,]  <- 1 + exp(mixEstPar[3,])
    }

    if(verbose) {
        message("EM for beta mixture model.\n")
        message("Initial estimates:\n")
        print(mixEst)
    }

    ## in case tolerance is not specified, then this criteria is
    ## ignored
    if(missing(tol)) {
        checkTol <- FALSE
        tol <- -1
    } else
        checkTol <- TRUE

    if(missing(Neps)) {
        ## in case tolerance has been declared, but Neps not, we flag
        ## to disable checking of running mean convergence check
        checkEps <- FALSE
        Neps <- 5
    } else
        checkEps <- TRUE

    ## if nothing is specified, we declare convergence based on a
    ## running mean of differences in parameter estimates
    if(!checkTol & !checkEps) {
        checkEps <- TRUE
    }

    assert_that(Neps > 1)
    assert_that(ceiling(Neps) == floor(Neps))

    ## eps can also be given as a single integer which is interpreted
    ## as number of digits
    if(length(eps) == 1) eps <- rep(10^(-eps), 3)

    iter <- 0
    logN <- log(N)
    traceMix <- list()
    traceLli <- c()
    Dlli <- Inf
    runMixPar <- array(-Inf, dim=c(Neps,3,Nc), dimnames=list(NULL, rownames(mixEstPar), NULL ))
    runOrder <- 0:(Neps-1)
    Npar <- Nc + 2*Nc
    if(Nc == 1) Npar <- Npar - 1

    ## find alpha and beta for a given component in log-space
    bmm_ml <- function(c1,c2) {
        function(par) {
            ab <- exp(par)
            if(constrain_gt1)
                ab <- 1 + ab
            s <- digamma(sum(ab))
            eq1 <- digamma(ab[1]) - s
            eq2 <- digamma(ab[2]) - s
            (eq1 - c1)^2 + (eq2 - c2)^2
        }
    }

    bmm_ml_grad  <- function(c1,c2) {
        function(par) {
            ab <- exp(par)
            if(constrain_gt1)
                ab <- 1 + ab
            n <- sum(ab)
            s <- digamma(n)
            eq1 <- digamma(ab[1]) - s
            eq2 <- digamma(ab[2]) - s
            sqTerm1 <- (eq1 - c1)
            sqTerm2 <- (eq2 - c2)

            trig_n  <- trigamma(n)

            grad1 <- 2 * sqTerm1 * (trigamma(ab[1]) * ab[1] - trig_n * ab[1] ) -
                2 * sqTerm2 * (trig_n * ab[1])

            grad2 <- -2 * sqTerm1 * (trig_n * ab[2]) +
                2 * sqTerm2 * (trigamma(ab[2]) * ab[2] - trig_n * ab[2])

            c(grad1, grad2)
        }
    }

    while(iter < Niter.max) {
        ## calculate responsabilities from the likelihood terms;
        ## calculations are done in log-space to avoid numerical
        ## difficulties if some points are far away from some
        ## component and hence recieve very low density
        ##lli <- t(matrix(log(mixEst[1,]) + dbeta(xRep, mixEst[2,], mixEst[3,], log=TRUE), nrow=Nc))

        ## Beta: Gamma(a + b) / (Gamma(a) * Gamma(b)) * x^(a-1) * (1-x)^(b-1)
        w <- mixEst[1,]
        a <- mixEst[2,]
        b <- mixEst[3,]
        ##lli <- sweep( sweep(Lx, 2, a - 1, "*", check.margin=FALSE) + sweep(LxC, 2, b - 1, "*", check.margin=FALSE), 2, log(w) + lgamma(a + b) - lgamma(a) - lgamma(b), "+", check.margin=FALSE)
        lli <- sweep( sweep(Lx, 2, a - 1, "*", check.margin=FALSE) + sweep(LxC, 2, b - 1, "*", check.margin=FALSE), 2, log(w) - lbeta(a, b), "+", check.margin=FALSE)
        ##lli <- t(matrix(log(mixEst[1,]) + dbeta(xRep, mixEst[2,], mixEst[3,], log=TRUE), nrow=Nc))

        ## ensure that the log-likelihood does not go out of numerical
        ## reasonable bounds
        lli <- apply(lli, 2, pmax, -30)

        ##lnresp <- apply(lli, 1, log_sum_exp)
        lnresp  <- matrixStats::rowLogSumExps(lli)
        ## the log-likelihood is then given by the sum of lresp norms
        lliCur <- sum(lnresp)
        ## record current state
        traceMix <- c(traceMix, list(mixEst))
        traceLli <- c(traceLli, lliCur)
        if(iter > 1) {
            ## Dlli is the slope of the log-likelihood evaulated with
            ## a second order method
            Dlli <- (traceLli[iter+1] - traceLli[iter - 1])/2
        }
        if(Nc > 1) {
            smean <- apply(runMixPar[order(runOrder),,,drop=FALSE], c(2,3), function(x) mean(abs(diff(x))))
            eps.converged <- sum(sweep(smean, 1, eps, "-") < 0)
        } else {
            smean <- apply(runMixPar[order(runOrder),-1,,drop=FALSE], c(2,3), function(x) mean(abs(diff(x))))
            eps.converged <- sum(sweep(smean, 1, eps[-1], "-") < 0)
        }
        if(is.na(eps.converged)) eps.converged <- 0
        if(verbose) {
            message("Iteration ", iter, ": log-likelihood = ", lliCur, "; Dlli = ", Dlli, "; converged = ", eps.converged, " / ", Npar, "\n", sep="")
        }
        if(checkTol & Dlli < tol) {
            break
        }
        if(iter >= Neps & checkEps & eps.converged == Npar) {
            break
        }

        ## ... and the (log) responseability matrix follows from this by
        ## appropiate normalization.
        lresp <- sweep(lli, 1, lnresp, "-", FALSE)
        ##resp <- exp(lresp)

        ## mean probability to be in a specific mixture component -> updates
        ## abmEst first colum
        ##lzSum <- apply(lresp, 2, log_sum_exp)
        lzSum <- matrixStats::colLogSumExps(lresp)
        ##zSum <- exp(lzSum)
        mixEst[1,] <- exp(lzSum - logN)

        ##c1 <- colSums(Lx * resp)/zSum
        ##c2 <- colSums(LxC * resp)/zSum

        resp_zscaled  <- exp(sweep(lresp, 2, lzSum, "-", FALSE))
        c1 <- matrixStats::colSums2(Lx * resp_zscaled)
        c2 <- matrixStats::colSums2(LxC * resp_zscaled)

        ## now solve for new alpha and beta estimates jointly for each
        ## component
        for(i in 1:Nc) {
            if(constrain_gt1) {
                theta <- c(log(mixEst[2:3,i] - 1))
            } else {
                theta <- c(log(mixEst[2:3,i]))
            }
            ##Lest <- optim(theta, bmm_ml(c1[i], c2[i]), gr=bmm_ml_grad(c1[i], c2[i]), method="BFGS", control=list(maxit=500))
            ## Default would be Nelder-Mead
            Lest <- optim(theta, bmm_ml(c1[i], c2[i]))
            if(Lest$convergence != 0 & Lest$value > 1E-4) {
                warning("Warning: Component", i, "in iteration", iter, "had convergence problems!")
            }
            if(constrain_gt1) {
                mixEst[2:3,i] <- 1+pmax(exp(Lest$par), c(1E-8, 1E-8))
            } else {
                mixEst[2:3,i] <- exp(Lest$par)
            }
        }

        mixEstPar[1,] <- logit(mixEst[1,,drop=FALSE])
        ind <- 1 + iter %% Neps
        runMixPar[ind,,] <- mixEstPar
        runOrder[ind] <- iter

        iter <- iter + 1
    }
    if(iter == Niter.max)
        warning("Maximum number of iterations reached.")

    mixEst <- mixEst[,order(mixEst[1,], decreasing=TRUE),drop=FALSE]
    colnames(mixEst) <- paste("comp", seq(Nc), sep="")
    likelihood(mixEst) <- "binomial"
    dlink(mixEst) <- identity_dlink
    class(mixEst) <- c("EM", "EMbmm", "betaMix", "mix")

    ## give further details
    attr(mixEst, "df") <- Nc-1 + 2*Nc
    attr(mixEst, "nobs") <- N
    attr(mixEst, "lli") <- lliCur

    attr(mixEst, "Nc") <- Nc

    attr(mixEst, "tol") <- tol
    attr(mixEst, "traceLli") <- traceLli
    attr(mixEst, "traceMix") <- lapply(traceMix, function(x) {class(x) <- c("betaMix", "mix"); x})
    attr(mixEst, "x") <- x

    mixEst
}


#' @export
print.EMbmm <- function(x, ...) {
    cat("EM for Beta Mixture Model\nLog-Likelihood = ", logLik(x), "\n\n",sep="")
    NextMethod()
}

Try the RBesT package in your browser

Any scripts or data that you put into this service are public.

RBesT documentation built on Aug. 22, 2023, 1:08 a.m.