#' @description Computes the variance-covariance matrix of the MLE of
#' m-component normal mixture.
#' @export
#' @title normalmixVcov
#' @name normalmixVcov
#' @param y n by 1 vector of data.
#' @param coefficients parameter vector ordered as (alpha_1, ..., alpha_m, mu_1, ..., mu_m, sigma_1, ..., sigma_m, gam).
#' @param z n by p matrix of regressor associated with gam.
#' @param vcov.method method used to compute the variance-covariance matrix,
#' one of \code{"Hessian"} and \code{"OPG"}. Default is \code{"Hessian"}.
#' When \code{method = "Hessian"}, the variance-covarince matrix is
#' estimated by the Hessian using the formula given in Boldea and Magnus (2009).
#' When \code{method = "OPG"}, the outer product of gradients is used.
#' @return variance-covariance matrix of the MLE of
#' m-component normal mixture given the data and coefficients.
#' @references Boldea, O. and Magnus, J. R. (2009)
#' Maximum Likelihood Estimation of the Multivariate Normal Mixture Model,
#' \emph{Journal of the American Statistical Association},
#' \bold{104}, 1539--1549.
normalmixVcov <- function(y, coefficients, z = NULL, vcov.method = c("Hessian", "OPG"))
{
y <- as.vector(y)
n <- length(y)
len <- length(coefficients)
p <- 0
gam <- NULL
vcov.method <- match.arg(vcov.method)
if (!is.null(z)) {
z <- as.matrix(z)
p <- ncol(z)
gam <- coefficients[(len-p+1):len]
}
m <- (len-p)/3
if (round(m) != m) {
stop("The dimension of the coefficients is incompatible with z. Please check the data.")
}
alpha <- coefficients[1:m]
mu <- coefficients[(m+1):(2*m)]
sigma <- coefficients[(2*m+1):(3*m)]
if (m == 1) {
if (is.null(z)) {
I <- n*diag(c(1/sigma^2, 0.5/sigma^4)) # information matrix when
# the variance is parameterized as sigma^2
} else {
z1 <- cbind(1, z)
I <- matrix(0, nrow=p+2, ncol=p+2)
I[1:(p+1), 1:(p+1)] <- t(z1) %*% z1/sigma^2
I[(p+2), (p+2)] <- n*2/sigma^2
s.1 <- c(1,(p+2), 2:(p+1))
I <- I[s.1, s.1]
}
vcov <- solve(I)
# Because the variance is parameterized as sigma^2, we convert it to sigma
c.mat.vec <- c(1, 0.5/sigma, rep(1, p))
vcov <- diag(c.mat.vec) %*% vcov %*% diag(c.mat.vec)
} else { # end of if (m == 1)
# m >= 2
# Compute posterior probabilities, and adjust y if z is present
sigma0 <- rep(1, m) # dummy
mu0 <- double(m) # dummy
an <- 1/n # penalty term for variance
cn <- 0 # penalty term for tau
h <- 0
tau <- 0.5
k <- 0
epsilon <- 1e-08
maxit = 2
ninits = 1
b <- matrix( rep( coefficients, ninits), ncol = ninits)
if (is.null(z)) {
out.p <- cppNormalmixPMLE(b, y, matrix(0), mu0, sigma0, m, p, an, cn, maxit, ninits, epsilon, tau, h, k)
} else {
out.p <- cppNormalmixPMLE(b, y, z,mu0, sigma0, m, p, an, cn, maxit, ninits, epsilon, tau, h, k)
# Adjust y
y <- y - z %*% gam
}
post <- matrix(out.p$post, nrow=n)
p1 <- seq(1, (2*m-1), by=2) # sequence of odd numbers, 1,3,...,2*m-1
p2 <- seq(2, (2*m), by=2) # sequence of even numbers, 2,4,...,2*m
# Matrices used in computing vcov
a <- diag(1/alpha[-m], nrow=m-1, ncol=m-1)
a <- cbind(a, -1/alpha[m]) # m-1 by m matrix of a_i's
abar <- a %*% t(post) # m-1 by n
Z0 <- t((t(matrix(rep.int(y, m), ncol=m))-mu)/sigma) # normalized data, n by m
f <- t(t(exp(-Z0^2/2)/sqrt(2*pi))/sigma) # pdf, n by m
phi <- t(t(f)*alpha) # n by m
f0 <- rowSums(phi) # data pdf, n by 1
vinv <- 1/(sigma*sigma)
b <- t(t(Z0)/sigma) # n by m
B <- t(vinv - t(b*b)) # n by m
c0 <- array(0, dim=c(n, m, 2))
c0[, , 1] <- b
c0[, , 2] <- -B/2
# Computes Hessian-based I
if (vcov.method == "Hessian") {
other.method = "OPG"
C0 <- array(0, dim=c(n, m, 2, 2))
C0[, , 1, 1] <- t(matrix(vinv, nrow=m, ncol=n)) # n by m
C0[, , 2, 1] <- C0[, , 1, 2] <- t(t(b)*vinv) # n by m
C0[, , 2, 2] <- t((vinv -2*t(B))*vinv)/2 # n by m
Q.pi <- - abar %*% t(abar) # m-1 by m-1
Q.pi.theta <- matrix(0, nrow=m-1, ncol=2*m) # m-1 by 2m
for (i in 1:m){
zi <- a[, i] - abar # m-1 by n
wi <- c0[, i, ]*post[, i] # n by 2
Q.i <- colSums(tKR(wi, t(zi))) # 2*(m-1) vector
# first m-1 elements correspond to mu x pi
# second m-1 elements correspond to sigma x pi
Q.pi.theta[, i] <- Q.i[1:(m-1)]
Q.pi.theta[, m+i] <- Q.i[m:(2*(m-1))]
}
Q.theta <- matrix(0, nrow=2*m, ncol=2*m)
for (i in 2:m){ # off-diagonal blocks
for (j in 1:(i-1)){
wi <- c0[, i, ]*post[, i]
wj <- c0[, j, ]*post[, j]
Q.ij <- - colSums(tKR(wi, wj))
Q.theta[(2*i-1):(2*i), (2*j-1):(2*j)] = t(matrix(Q.ij, nrow=2, ncol=2))
}
}
Q.theta <- Q.theta + t(Q.theta)
for (i in 1:m){ # diagonal blocks
C.ii <- array(C0[, i, , ], dim=c(n, 2, 2))
Q.ii.1 <- apply(C.ii*post[, i], c(2, 3), sum)
w.ii <- tKR(c0[, i, ], c0[, i, ])*post[, i]*(1-post[, i])
Q.ii.2 <- matrix(colSums(w.ii), nrow=2, ncol=2)
Q.theta[(2*i-1):(2*i), (2*i-1):(2*i)] <- -Q.ii.1 + Q.ii.2
}
# odd rows and columns of Q.theta correspond to mu
# even rows and columns of Q.theta correspond to sigma
Q.theta <- Q.theta[c(p1,p2), c(p1,p2)] # first block = wrt mu, second blosk = wrt sigma
dimI <- m-1+2*m
I <- matrix(0, nrow=dimI, ncol=dimI)
I[1:(m-1), 1:(m-1)] <- - Q.pi
I[1:(m-1), m:dimI] <- - Q.pi.theta
I[m:dimI, 1:(m-1)] <- - t(Q.pi.theta)
I[m:dimI, m:dimI] <- - Q.theta
if (!is.null(z)) {
dbar <- z*rowSums(post*b) # n by p
Q.gam.theta <- matrix(0, nrow=p, ncol=2*m) # p by 2*m matrix
for (i in 1:m) {
C.i <- array(C0[, i, 1, ], dim=c(n, 2)) # n by 2
Q.i.1 <- colSums(tKR(-C.i+b[, i]*c0[, i, ], z*post[, i]))
Q.i.2 <- colSums(tKR(c0[, i, ]*post[, i], dbar)) # p*2 vector
Q.gam.theta[, (2*i-1):(2*i)] <- matrix(Q.i.1-Q.i.2, nrow=p, ncol=2)
}
Q.gam.theta <- Q.gam.theta[, c(p1, p2), drop=FALSE] # p by 2*m
w1 <- (post*b)%*%t(a) - rowSums(post*b)*t(abar) # n by m-1
Q.pi.gam.0 <- colSums(tKR(w1, z)) # (m-1)*p vector
Q.pi.gam <- matrix(Q.pi.gam.0, nrow=m-1, ncol=p)
Q.gam <- - t(z) %*% (z*rowSums(post*B)) -
matrix(colSums(tKR(dbar, dbar)), nrow=p, ncol=p)
I <- cbind(I, -rbind(Q.pi.gam, t(Q.gam.theta)))
I <- rbind(I, -cbind(t(Q.pi.gam), Q.gam.theta, Q.gam))
} # end if (!is.null(z))
} else { # compute I with (vcov.method == "OPG")
other.method = "Hessian"
score <- t(abar)
for (j in 1:m) { score <- cbind(score, c0[, j, ]*post[, j]) }
ind <- c(c(1:(m-1)), p1+m-1, p2+m-1)
score <- score[, ind]
I <- t(score) %*% score
if (!is.null(z)) {
dbar <- z*rowSums(post*b) # n by p
score <- cbind(score, dbar)
I <- t(score) %*% score
}
} # end if (vcov.method == "OPG")
vcov <- try(solve(I))
if (class(vcov) == "try-error" || any(diag(vcov) <0) ) {
vcov <- matrix(NaN, nrow = 3*m-1+p, ncol = 3*m-1+p)
warning("Fisher information matrix is singular and/or the
variance is estimated to be negative. Consider using vcov.method=\"",other.method,"\".")
}
# Because the variance is parameterized as sigma^2, we convert is to sigma
c.mat.vec <- c(rep(1, m-1+m), 0.5/sigma, rep(1, p))
vcov <- diag(c.mat.vec) %*% vcov %*% diag(c.mat.vec)
# Add the variance of alpha_m
len <- length(coefficients)
M.mat <- diag(len-1)
M.mat <- rbind(M.mat[1:(m-1), ], c(rep(-1, m-1), rep(0, len-m)),
M.mat[m:(len-1), ])
vcov <- M.mat %*% vcov %*% t(M.mat)
} # end else (i.e., m >= 2)
vcov
} # end function normalmixVcov
#' @description Estimates parameters of a finite mixture of univariate normals by
#' the penalized maximum likelhood estimator.
#' @export
#' @title normalmixPMLE
#' @name normalmixPMLE
#' @param y n by 1 vector of data.
#' @param m number of components in the mixture.
#' @param z n by p matrix of regressor associated with gamma.
#' @param vcov.method method used to compute the variance-covariance matrix, one of \code{"Hessian"} and \code{"OPG"}.
#' Default is \code{"Hessian"}. When \code{method = "Hessian"}, the variance-covarince matrix is
#' estimated by the Hessian using the formula given in Boldea and Magnus (2009).
#' When \code{method = "OPG"}, the outer product of gradients is used.
#' @param ninits number of randomly drawn initial values.
#' @param epsilon convergence criterion. Convergence is declared when the penalized log-likelihood increases by less than \code{epsilon}.
#' @param maxit maximum number of iterations.
#' @param epsilon.short convergence criterion in short EM. Convergence is declared when the penalized log-likelihood increases by less than \code{epsilon.short}.
#' @param maxit.short maximum number of iterations in short EM.
#' @param binit initial value of parameter vector that is included as a candidate parameter vector.
#' @return A list of class \code{normalregMix} with items:
#' \item{coefficients}{vector of parameter estimates. Ordered as \eqn{\alpha_1,\ldots,\alpha_m,\mu_1,\ldots,\mu_m,\sigma_1,\ldots,\sigma_m,gam}.}
#' \item{parlist}{parameter estimates as a list containing alpha, mu, and sigma (and gam if z is included in the model).}
#' \item{vcov}{estimated variance-covariance matrix.}
#' \item{loglik}{maximized value of the log-likelihood.}
#' \item{penloglik}{maximized value of the penalized log-likelihood.}
#' \item{aic}{Akaike Information Criterion of the fitted model.}
#' \item{bic}{Bayesian Information Criterion of the fitted model.}
#' \item{postprobs}{n by m matrix of posterior probabilities for observations.}
#' \item{components}{n by 1 vector of integers that indicates the indices of components
#' each observation belongs to based on computed posterior probabilities.}
#' \item{call}{The matched call.}
#' \item{m}{number of components in the mixture.}
#' @note \code{normalmixPMLE} maximizes the penalized log-likelihood function
#' (Chen et al., 2008)
#' using the EM algorithm with combining short and long runs of EM steps as in Biernacki et al. (2003).
#' \code{normalmixPMLE} first runs the EM algorithm from \code{ninits}\eqn{* 4m(1 + p)} initial values
#' with the convergence criterion \code{epsilon.short} and \code{maxit.short}.
#' Then, \code{normalmixPMLE} uses \code{ninits} best initial values to run the EM algorithm
#' with the convergence criterion \code{epsilon} and \code{maxit}.
#' @references Biernacki, C., Celeux, G. and Govaert, G. (2003)
#' Choosing Starting Values for the EM Algorithm for Getting the
#' Highest Likelihood in Multivariate Gaussian Mixture Models,
#' \emph{Computational Statistics and Data Analysis}, \bold{41}, 561--575.
#'
#' Boldea, O. and Magnus, J. R. (2009)
#' Maximum Likelihood Estimation of the Multivariate Normal Mixture Model,
#' \emph{Journal of the American Statistical Association},
#' \bold{104}, 1539--1549.
#'
#' Chen, J., Tan, X. and Zhang, R. (2008)
#' Inference for Normal Mixtures in Mean and Variance,
#' \emph{Statistica Sinica}, \bold{18}, 443--465.
#'
# McLachlan, G. J. and Peel, D. (2000) \emph{Finite Mixture Models}, John Wiley \& Sons, Inc.
#' @examples
#' data(faithful)
#' attach(faithful)
#'
#' normalmixPMLE(y = eruptions, m = 1)
#' normalmixPMLE(y = eruptions, m = 2)
#'
#' out <- normalmixPMLE(y = eruptions, m = 2)
#' summary(out)
normalmixPMLE <- function (y, m = 2, z = NULL, vcov.method = c("Hessian", "OPG", "none"),
ninits = 25, epsilon = 1e-08, maxit = 2000,
epsilon.short = 1e-02, maxit.short = 500, binit = NULL) {
y <- as.vector(y)
n <- length(y)
p <- 0
gam <- NULL
ninits.short <- ninits*10*(1 + p)*m
vcov.method <- match.arg(vcov.method)
if (!is.null(z)) {
z <- as.matrix(z)
p <- ncol(z)
if (nrow(z) != n) { stop("y and z must have the same number of rows.") }
ls.out <- lsfit(z, y)
sd0 <- sqrt(mean(ls.out$residuals^2))
} else {
sd0 <- sd(y) * sqrt((n - 1) / n)
}
if (m == 1) {
if (!is.null(z)) {
mu <- unname(ls.out$coeff[1])
gam <- unname(ls.out$coeff[2:(1 + p)])
res <- ls.out$residuals
sigma <- sqrt(mean(res*res))
} else {
mu <- mean(y)
sigma <- sd0
}
loglik <- - (n/2) *(1 + log(2*pi) + 2*log(sigma))
aic <- -2*loglik + 2*(m-1+2*m+p)
bic <- -2*loglik + log(n)*(m-1+2*m+p)
penloglik <- loglik
parlist <- list(alpha = 1, mu = mu, sigma = sigma, gam = gam)
coefficients <- c(alpha = 1, mu = mu, sigma = sigma, gam = gam)
postprobs <- rep(1, n)
} else { # m >= 2
# generate initial values
tmp <- normalmixPMLEinit(y = y, z = z, ninits = ninits.short, m = m)
# the following values for (h, k, tau, an) are given by default
# h <- 0 # setting h=0 gives PMLE
# k <- 0 # k is set to 0 because this is PMLE
# tau <- 0.5 # tau is set to 0.5 because this is PMLE
an <- 1/n # penalty term for variance
cn <- 0 # penalty term for tau
sigma0 <- rep(sd0, m)
mu0 <- double(m+1) # dummy
if (is.null(z)) {
ztilde <- matrix(0) # dummy
} else {
ztilde <- z
}
# short EM
b0 <- as.matrix(rbind( tmp$alpha, tmp$mu, tmp$sigma, tmp$gam ))
if (!is.null(binit)) {
b0[ , 1] <- binit
}
out.short <- cppNormalmixPMLE(b0, y, ztilde, mu0, sigma0, m, p, an, cn, maxit.short,
ninits.short, epsilon.short)
# long EM
components <- order(out.short$penloglikset, decreasing = TRUE)[1:ninits]
b1 <- b0[ ,components] # b0 has been updated
out <- cppNormalmixPMLE(b1, y, ztilde, mu0, sigma0, m, p, an, cn, maxit, ninits, epsilon)
index <- which.max(out$penloglikset)
alpha <- b1[1:m,index] # b0 has been updated
mu <- b1[(1+m):(2*m),index]
sigma <- b1[(1+2*m):(3*m),index]
if (!is.null(z)) {
gam <- b1[(3*m+1):(3*m+p),index]
}
penloglik <- out$penloglikset[index]
loglik <- out$loglikset[index]
postprobs <- matrix(out$post[,index], nrow=n)
aic <- -2*loglik + 2*(m-1+2*m+p)
bic <- -2*loglik + log(n)*(m-1+2*m+p)
mu.order <- order(mu)
alpha <- alpha[mu.order]
mu <- mu[mu.order]
sigma <- sigma[mu.order]
postprobs <- postprobs[, mu.order]
colnames(postprobs) <- c(paste("comp", ".", 1:m, sep = ""))
parlist <- list(alpha = alpha, mu = mu, sigma = sigma, gam = gam)
coefficients <- unlist(parlist)
} # end m >= 2
if (vcov.method == "none") {
vcov <- NULL
} else {
vcov <- normalmixVcov(y = y, coefficients = coefficients, z = z , vcov.method = vcov.method)
}
a <- list(coefficients = coefficients, parlist = parlist, vcov = vcov, loglik = loglik,
penloglik = penloglik, aic = aic, bic = bic, postprobs = postprobs,
components = getComponentcomponents(postprobs),
call = match.call(), m = m, label = "PMLE")
class(a) <- "normalregMix"
a
} # end function normalmixPMLE
#' @description Compute ordinary & penalized log-likelihood ratio resulting from
#' the MEM algorithm at k=1,2,3.
#' @title normalmixMaxPhi
#' @name normalmixMaxPhi
#' @param y n by 1 vector of data.
#' @param parlist parameter estimates as a list containing alpha, mu, sigma, and gamma
#' in the form of (alpha = (alpha_1, ..., alpha_m), mu = (mu_1, ..., mu_m),
#' sigma = (sigma_1, ..., sigma_m), gam).
#' @param z n by p matrix of regressor associated with gamma.
#' @param an tuning parameter used in the penalty function on sigma.
#' @param cn tuning parameter used in the penalty function on tau.
#' @param tauset set of initial tau value candidates.
#' @param ninits number of randomly drawn initial values.
#' @param epsilon.short convergence criterion in short EM. Convergence is declared when the penalized log-likelihood increases by less than \code{epsilon.short}.
#' @param epsilon convergence criterion. Convergence is declared when the penalized log-likelihood increases by less than \code{epsilon}.
#' @param maxit.short maximum number of iterations in short EM.
#' @param maxit maximum number of iterations.
#' @param verb Determines whether to print a message if an error occurs.
#' @return A list with items:
#' \item{loglik}{Log-likelihood resulting from MEM algorithm at k=1,2,3.}
#' \item{penloglik}{Penalized log-likelihood resulting from MEM algorithm at k=1,2,3.}
normalmixMaxPhi <- function (y, parlist, z = NULL, an, cn, tauset = c(0.1,0.3,0.5),
ninits = 10, epsilon.short = 1e-02, epsilon = 1e-08,
maxit.short = 500, maxit = 2000,
verb = FALSE) {
# Given a parameter estimate of an m component model and tuning paramter an,
# maximize the objective function for computing the modified EM test statistic
# for testing H_0 of m components against H_1 of m+1 for a univariate normal finite mixture
warn <- options(warn=-1) # Turn off warnings
if (!is.null(z)) {
z <- as.matrix(z)
p <- ncol(z)
} else {
p <- 0
}
m <- length(parlist$alpha)
ninits.short <- ninits*10*(1+p)*m
htau <- t(as.matrix(expand.grid(c(1:m), tauset)))
results <- apply(htau,2,normalmixMaxPhiStep, y, parlist, z, p, an, cn,
ninits, ninits.short, epsilon.short, epsilon,
maxit.short, maxit, verb)
loglik.all <- t(sapply(results, "[[", "loglik"))
penloglik.all <- t(sapply(results, "[[", "penloglik"))
coefficient.all <- t(sapply(results, "[[", "coefficient"))
loglik <- apply(loglik.all, 2, max) # 3 by 1 vector
penloglik <- apply(penloglik.all, 2, max) # 3 by 1 vector
index <- which.max(loglik.all[ ,3]) # a pair (h,m) that gives the highest likelihood at k=3
coefficient <- as.vector(coefficient.all[index,])
out <- list(coefficient = coefficient, loglik = loglik, penloglik = penloglik)
out
} # end normalmixMaxPhi
#' @description Given a pair of h and tau and data, compute ordinary &
#' penalized log-likelihood ratio resulting from MEM algorithm at k=1,2,3,
#' tailored for parallelization.
#' @title normalmixMaxPhiStep
#' @name normalmixMaxPhiStep
#' @param htaupair set of h and tau.
#' @param y n by 1 vector of data.
#' @param parlist parameter estimates as a list containing alpha, mu, sigma, and gamma
#' in the form of (alpha = (alpha_1, ..., alpha_m), mu = (mu_1, ..., mu_m),
#' sigma = (sigma_1, ..., sigma_m), gam).
#' @param z n by p matrix of regressor associated with gamma.
#' @param p dimension of z.
#' @param an tuning parameter used in the penalty function on sigma.
#' @param cn tuning parameter used in the penalty function on tau.
#' @param ninits number of randomly drawn initial values.
#' @param ninits.short number of candidates used to generate an initial phi, in short EM.
#' @param epsilon.short convergence criterion in short EM. Convergence is declared when the penalized log-likelihood increases by less than \code{epsilon.short}.
#' @param epsilon convergence criterion. Convergence is declared when the penalized log-likelihood increases by less than \code{epsilon}.
#' @param maxit.short maximum number of iterations in short EM.
#' @param maxit maximum number of iterations.
#' @param verb Determines whether to print a message if an error occurs.
#' @return A list of phi, log-likelihood, and penalized log-likelihood resulting from MEM algorithm.
normalmixMaxPhiStep <- function (htaupair, y, parlist, z = NULL, p, an, cn,
ninits, ninits.short, epsilon.short, epsilon,
maxit.short, maxit, verb)
{
alpha0 <- parlist$alpha
m <- length(alpha0)
m1 <- m+1
k <- 1
n <- length(y)
h <- as.numeric(htaupair[1])
tau <- as.numeric(htaupair[2])
mu0 <- parlist$mu
mu0h <- c(-1e+10,mu0,1e+10) # m+2 by 1
sigma0 <- parlist$sigma
sigma0h<- c(sigma0[1:h],sigma0[h:m]) # m+1 by 1
gam0 <- parlist$gam
if (is.null(z)) {
ztilde <- matrix(0) # dummy
gam <- NULL
}
else
ztilde <- as.matrix(z)
# generate initial values
tmp <- normalmixPhiInit(y = y, parlist = parlist, z = z, h=h, tau = tau, ninits = ninits.short)
# short EM
b0 <- as.matrix(rbind(tmp$alpha, tmp$mu, tmp$sigma, tmp$gam))
out.short <- cppNormalmixPMLE(b0, y, ztilde, mu0h, sigma0h, m1, p, an, cn, maxit.short, ninits.short,
epsilon.short, tau, h, k)
components <- order(out.short$penloglikset, decreasing = TRUE)[1:ninits]
if (verb && any(out.short$notcg)) {
cat(sprintf("non-convergence rate at short-EM = %.3f\n",mean(out.short$notcg)))
}
# long EM
b1 <- as.matrix(b0[ ,components])
out <- cppNormalmixPMLE(b1, y, ztilde, mu0h, sigma0h, m1, p, an, cn, maxit, ninits, epsilon, tau, h, k)
index <- which.max(out$penloglikset)
alpha <- b1[1:m1,index]
mu <- b1[(1+m1):(2*m1),index]
sigma <- b1[(1+2*m1):(3*m1),index]
if (!is.null(z)) {
gam <- b1[(3*m1+1):(3*m1+p),index]
}
mu.order <- order(mu)
alpha <- alpha[mu.order]
mu <- mu[mu.order]
sigma <- sigma[mu.order]
sigma0h <- sigma0h[mu.order]
b <- as.matrix( c(alpha, mu, sigma, gam) )
# initilization
loglik <- vector("double", 3)
penloglik <- vector("double", 3)
coefficient <- vector("double", length(b))
penloglik[1] <- out$penloglikset[[index]]
loglik[1] <- out$loglikset[[index]]
for (k in 2:3) {
ninits <- 1
maxit <- 2
# Two EM steps
out <- cppNormalmixPMLE(b, y, ztilde, mu0h, sigma0h, m1, p, an, cn, maxit, ninits, epsilon, tau, h, k)
alpha <- b[1:m1,1] # b has been updated
mu <- b[(1+m1):(2*m1),1]
sigma <- b[(1+2*m1):(3*m1),1]
if (!is.null(z)) {
gam <- b[(3*m1+1):(3*m1+p),1]
}
loglik[k] <- out$loglikset[[1]]
penloglik[k] <- out$penloglikset[[1]]
# Check singularity: if singular, break from the loop
if ( any(sigma < 1e-06) || any(alpha < 1e-06) || is.na(sum(alpha)) ) {
loglik[k] <- -Inf
penloglik[k] <- -Inf
break
}
mu.order <- order(mu)
alpha <- alpha[mu.order]
mu <- mu[mu.order]
sigma <- sigma[mu.order]
sigma0h <- sigma0h[mu.order]
}
coefficient <- as.matrix( c(alpha, mu, sigma, gam) ) # at k=3
return (list(coefficient = coefficient, loglik = loglik, penloglik = penloglik))
}
#' @description Generates lists of parameters for initial candidates used by
#' \code{normalmixMaxPhi}.
#' @title normalmixPhiInit
#' @name normalmixPhiInit
#' @param y n by 1 vector of data.
#' @param parlist parameter estimates as a list containing alpha, mu, sigma,
#' and gamma in the form of (alpha = (alpha_1, ..., alpha_m),
#' mu = (mu_1, ..., mu_m), sigma = (sigma_1, ..., sigma_m), gam).
#' @param z n by p matrix of regressor associated with gamma.
#' @param h h used as index for pivoting.
#' @param tau tau used to split the h-th component.
#' @param ninits number of initial values to be generated.
#' @return A list with the following items:
#' \item{alpha}{m+1 by ninits matrix for alpha.}
#' \item{mu}{m+1 by ninits matrix for mu.}
#' \item{sigma}{m+1 by ninits matrix for sigma.}
#' \item{gam}{p by ninits matrix for gamma.}
normalmixPhiInit <- function (y, parlist, z = NULL, h, tau, ninits = 1)
{
if (normalregMixtest.env$normalregMix.test.on) # initial values controlled by normalregMix.test.on
set.seed(normalregMixtest.env$normalregMix.test.seed)
n <- length(y)
p <- ncol(z)
mu0 <- parlist$mu
sigma0 <- parlist$sigma
alpha0 <- parlist$alpha
m <- length(alpha0)
if (is.null(z))
gam <- NULL
else {
gam0 <- parlist$gam
y <- y - z %*% gam0
gam <- matrix(runif(p*ninits,min=0.5,max=1.5),nrow=p)*gam0
}
if (m>=2){
mid <- (mu0[1:(m-1)]+mu0[2:m])/2 # m-1 by 1
lb0 <- c(min(y),mid) # m by 1
lb <- c(lb0[1:h],lb0[h:m]) # m+1 by 1
ub0 <- c(mid,max(y)) # m by 1
ub <- c(ub0[1:h],ub0[h:m]) # m+1 by 1
} else {
lb <- c(min(y),min(y))
ub <- c(max(y),max(y))
}
mu <- matrix(runif((m+1)*ninits,min=lb,max=ub),nrow=m+1)
sigma.hyp <- c(sigma0[1:h],sigma0[h:m]) # m+1 by 1
sigma <- matrix(runif((m+1)*ninits,min=sigma.hyp*0.25,max=sigma.hyp*2),nrow=m+1)
alpha.hyp <- c(alpha0[1:h],alpha0[h:m]) # m+1 by 1
alpha.hyp[h:(h+1)] <- c(alpha.hyp[h]*tau,alpha.hyp[h+1]*(1-tau))
alpha <- matrix(rep.int(alpha.hyp,ninits),nrow=m+1)
list(alpha = alpha, mu = mu, sigma = sigma, gam = gam)
} # end function normalmixPhiInit
#' @description Generate initial values used by \code{normalmixPMLE}.
#' @title normalmixPMLEinit
#' @name normalmixPMLEinit
#' @param y n by 1 vector of data.
#' @param z n by p matrix of regressor associated with gamma.
#' @param ninits number of initial values to be generated.
#' @param m number of components in the mixture.
#' @return A list with the following items:
#' \item{alpha}{m by ninits matrix for alpha.}
#' \item{mu}{m by ninits matrix for mu.}
#' \item{sigma}{m by ninits matrix for sigma.}
#' \item{gam}{p by ninits matrix for gam.}
normalmixPMLEinit <- function (y, z = NULL, ninits = 1, m = 2)
{
if (normalregMixtest.env$normalregMix.test.on) # initial values controlled by normalregMix.test.on
set.seed(normalregMixtest.env$normalregMix.test.seed)
n <- length(y)
p <- ncol(z)
gam <- NULL
if (!is.null(z)){
out <- lsfit(z,y)
gam0 <- out$coef[-1]
gam <- matrix(runif(p*ninits, min=0.5, max=1.5), nrow=p) * gam0
y <- out$residuals + out$coef[1]
}
alpha <- matrix(runif(m * ninits), nrow=m)
alpha <- t(t(alpha) / colSums(alpha))
mu <- matrix(runif(m * ninits, min = min(y), max = max(y)), nrow=m)
sigma <- matrix(runif(m * ninits, min = 0.01, max = 2)*sd(y), nrow=m)
list(alpha = alpha, mu = mu, sigma = sigma, gam = gam)
} # end function normalmixPMLEinit
#' @description Generates mixed normal random variables with regressor x.
#' @export
#' @title rnormregmix
#' @name rnormregmix
#' @param n number of observations.
#' @param alpha m by 1 vector that represents proportions of components.
#' @param mubeta k by m matrix. The first row represents the intercept (mu)
#' for m components, and the second to k-th rows represent the
#' coefficients on x for m components.
#' @param sigma m by 1 vector that represents sd of components.
#' @param x n by k-1 matrix that does NOT include a constant.
#' @return n by 1 vector that is formed by regressor x.
rnormregmix <- function (n, alpha, mubeta, sigma, x = NULL) {
# Generates mixed normal random variables with regressor x
# Input
# n : number of observations
# alpha : m-vector
# mubeta : k by m matrix
# sigma : m-vector
# x : (n by k-1) matrix NOT including a constant
# Output
# y : n by 1 vector
m <- length(alpha)
mubeta <- matrix(mubeta, ncol=m)
if (!is.null(x)){
x <- as.matrix(x)
if (nrow(x) != n) { stop("y and x must have the same number of rows.") }
x1 <- cbind(1,x)
ii <- sample(m, n, replace=TRUE, prob=alpha)
y <- rnorm(n, mean = rowSums(x1*t(mubeta[, ii])), sd = sigma[ii])
} else {
ii <- sample(m, n, replace=TRUE, prob=alpha)
y <- rnorm(n, mean = mubeta[, ii], sd = sigma[ii])
}
y
} # end function rnormregmix
#' @description Computes omega_{j|i} defined in (2.1) of Maitra and Melnykov (2010).
#' @export
#' @title omega.ji
#' @name omega.ji
#' @param phi_i 3 by 1 column consisting of alpha, mu, sigma of ith component
#' @param phi_j 3 by 1 column consisting of alpha, mu, sigma of jth component
#' @return omega_{j|i}
#' @references Maitra, R., and Melnykov, V. (2010)
#' Simulating Data to Study Performance of Finite Mixture Modeling and Model-Based Clustering Algorithms,
#' \emph{Journal of Computational and Graphical Statistica},
#' \bold{19}, 354--376.
# Returns a misclassification rate omega_ji given two components i, j,
# i.e. the probability of choosing component j where
# the true model is ith component.
omega.ji <- function(phi_i, phi_j) {
alpha_i <- phi_i[1]
alpha_j <- phi_j[1]
mu_i <- phi_i[2]
mu_j <- phi_j[2]
sigma_i <- phi_i[3]
sigma_j <- phi_j[3]
a <- (1/sigma_j^2 - 1/sigma_i^2)
b <- mu_i / sigma_i^2 - mu_j / sigma_j^2
c <- mu_j^2 / sigma_j^2 - mu_i^2 / sigma_i^2
if (sigma_i == sigma_j)
if (mu_i > mu_j)
omega_ji = pnorm((2 * log(alpha_j/alpha_i) - c)/(2*b),
mean = mu_i, sd = sigma_i)
else
omega_ji = 1 - pnorm((2 * log(alpha_j/alpha_i) - c)/(2*b),
mean = mu_i, sd = sigma_i)
else {
d <- 2 * log(alpha_j * sigma_i / (alpha_i * sigma_j)) - c + (b^2 / a)
da <- max(d/a, 0)
if (sigma_i > sigma_j)
omega_ji = pnorm(sqrt(da)-b/a, mean = mu_i, sd = sigma_i) -
pnorm(-sqrt(da)-b/a, mean = mu_i, sd = sigma_i)
else
omega_ji = 1 +
pnorm(-sqrt(da)-b/a, mean = mu_i, sd = sigma_i) -
pnorm(sqrt(da)-b/a, mean = mu_i, sd = sigma_i)
}
omega_ji <- unname(omega_ji)
return (omega_ji)
}
#' @description Computes \eqn{\omega_{12}}, the average misclassification rate between two normal mixtures defined in Maitra and Melnykov (2010).
#' @export
#' @title omega.12
#' @name omega.12
#' @param parlist parameter estimates as a list containing alpha, mu, sigma, and gam
#' in the form of (alpha = (alpha_1, ..., alpha_m), mu = (mu_1, ..., mu_m),
#' sigma = (sigma_1, ..., sigma_m), gam = (gam_1, ..., gam_m)).
#' @return misclassification rate \eqn{\omega_{12}}.
#' @references Maitra, R., and Melnykov, V. (2010)
#' Simulating Data to Study Performance of Finite Mixture Modeling and Model-Based Clustering Algorithms,
#' \emph{Journal of Computational and Graphical Statistica},
#' \bold{19}, 354--376.
omega.12 <- function(parlist)
# Computes omega_{12} for testing H_0:m=2 against H_1:m=3
{
phi1 <- c(alpha = parlist$alpha[1], mu = parlist$mu[1], sigma = parlist$sigma[1])
phi2 <- c(alpha = parlist$alpha[2], mu = parlist$mu[2], sigma = parlist$sigma[2])
part1 <- omega.ji(phi1, phi2)
part2 <- omega.ji(phi2, phi1)
omega.12 <- (part1 + part2) / 2
return (omega.12)
} # end function omega.12
#' Computes \eqn{\omega_{12}} and \eqn{\omega_{23}} defined in Maitra and Melnykov (2010).
#' @export
#' @title omega.123
#' @name omega.123
#' @param parlist parameter estimates as a list containing alpha, mu, sigma, and gamma
#' in the form of (alpha = (alpha_1, ..., alpha_m), mu = (mu_1, ..., mu_m),
#' sigma = (sigma_1, ..., sigma_m), gam = (gamma_1, ..., gamma_m)).
#' @return 2 by 1 vector whose first element is \eqn{\omega_{12}} and second element is \eqn{\omega_{23}}.
#' @references Maitra, R., and Melnykov, V. (2010)
#' Simulating Data to Study Performance of Finite Mixture Modeling and Model-Based Clustering Algorithms,
#' \emph{Journal of Computational and Graphical Statistica},
#' \bold{19}, 354--376.
omega.123 <- function(parlist)
{
phi1 <- c(alpha = parlist$alpha[1], mu = parlist$mu[1], sigma = parlist$sigma[1])
phi2 <- c(alpha = parlist$alpha[2], mu = parlist$mu[2], sigma = parlist$sigma[2])
phi3 <- c(alpha = parlist$alpha[3], mu = parlist$mu[3], sigma = parlist$sigma[3])
part1 <- omega.ji(phi1, phi2)
part2 <- omega.ji(phi2, phi1)
w12 <- (part1 + part2)/2
part3 <- omega.ji(phi2, phi3)
part4 <- omega.ji(phi3, phi2)
w23 <- (part3 + part4)/2
return(c(w12, w23))
} # end function omega.123
#' @description Computes \eqn{\omega_{12}}, \eqn{\omega_{23}}, and \eqn{\omega_{34}} defined in Maitra and Melnykov (2010).
#' @export
#' @title omega.1234
#' @name omega.1234
#' @param parlist parameter estimates as a list containing alpha, mu, sigma, and gamma
#' in the form of (alpha = (alpha_1, ..., alpha_m), mu = (mu_1, ..., mu_m),
#' sigma = (sigma_1, ..., sigma_m), gam = (gamma_1, ..., gamma_m)).
#' @return 3 by 1 vector consisting of \eqn{\omega_{12}}, \eqn{\omega_{23}}, and \eqn{\omega_{34}}.
#' @references Maitra, R., and Melnykov, V. (2010)
#' Simulating Data to Study Performance of Finite Mixture Modeling and Model-Based Clustering Algorithms,
#' \emph{Journal of Computational and Graphical Statistica},
#' \bold{19}, 354--376.
omega.1234 <- function(parlist)
{
phi1 <- c(alpha = parlist$alpha[1], mu = parlist$mu[1], sigma = parlist$sigma[1])
phi2 <- c(alpha = parlist$alpha[2], mu = parlist$mu[2], sigma = parlist$sigma[2])
phi3 <- c(alpha = parlist$alpha[3], mu = parlist$mu[3], sigma = parlist$sigma[3])
phi4 <- c(alpha = parlist$alpha[4], mu = parlist$mu[4], sigma = parlist$sigma[4])
part1 <- omega.ji(phi1, phi2)
part2 <- omega.ji(phi2, phi1)
w12 <- (part1 + part2)/2
part3 <- omega.ji(phi2, phi3)
part4 <- omega.ji(phi3, phi2)
w23 <- (part3 + part4)/2
part5 <- omega.ji(phi3, phi4)
part6 <- omega.ji(phi4, phi3)
w34 <- (part5 + part6)/2
return(c(w12, w23, w34))
} # end function omega.1234
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.