#' Performs MEM test given the data for y on the null hypothesis H_0: m = m_0.
#' @export
#' @title mvnmixMEMtest
#' @name mvnmixMEMtest
#' @param y n by d matrix of data
#' @param m The number of components in the mixture defined by a null hypothesis, m_0
#' @param tauset A set of initial tau value candidates
#' @param an a term used for penalty function
#' @param ninits The number of randomly drawn initial values.
#' @param crit.method Method used to compute the variance-covariance matrix, one of \code{"none"},
#' \code{"asy"}, and \code{"boot"}. The default option is \code{"asy"}. When \code{method = "asy"},
#' the p-values are computed based on an asymptotic method. When \code{method = "OPG"},
#' the p-values are generated by bootstrapping.
#' @param nbtsp The number of bootstrap observations; by default, it is set to be 199
#' @param cl Cluster used for parallelization; if it is \code{NULL}, the system will automatically
#' create a new one for computation accordingly.
#' @param parallel Determines what percentage of available cores are used,
#' represented by a double in [0,1]. 0.75 is default.
#' @param LRT.penalized Determines whether penalized likelihood is used in calculation of LRT
#' statistic for likelihood in an alternative hypothesis.
#' @return A list of class \code{mvnmix} with items:
#' \item{coefficients}{A vector of parameter estimates. Ordered as \eqn{\alpha_1,\ldots,\alpha_m,\mu_1,\ldots,\mu_m,\sigma_1,\ldots,\sigma_m,\gamma}.}
#' \item{parlist}{The parameter estimates as a list containing alpha, mu, and sigma (and gam if z is included in the model).}
#' \item{vcov}{The estimated variance-covariance matrix.}
#' \item{loglik}{The maximized value of the log-likelihood.}
#' \item{penloglik}{The maximized value of the penalized log-likelihood.}
#' \item{aic}{Akaike Information Criterion of the fitted model.}
#' \item{bic}{Bayesian Information Criterion of the fitted model.}
#' \item{postprobs}{n by m matrix of posterior probabilities for observations}
#' \item{components}{n by 1 vector of integers that indicates the indices of components
#' each observation belongs to based on computed posterior probabilities}
#' \item{call}{The matched call.}
#' \item{m}{The number of components in the mixture.}
#' @examples
#' data(faithful)
#' attach(faithful)
#' mvnmixMEMtest(y = eruptions, m = 1, crit.method = "asy")
#' mvnmixMEMtest(y = eruptions, m = 2, crit.method = "asy")
mvnmixMEMtest <- function (y, m = 2, an = 1, tauset = c(0.1,0.3,0.5),
ninits = 10,
crit.method = c("asy", "boot", "none"), nbtsp = 199,
cl = NULL,
parallel = 0.75,
LRT.penalized = FALSE) {
# Compute the modified EM test statistic for testing H_0 of m components
# against H_1 of m+1 components for a univariate finite mixture of normals
y <- as.matrix(y)
n <- nrow(y)
d <- ncol(y)
crit.method <- match.arg(crit.method)
pmle.result <- mvnmixPMLE(y=y, m=m, ninits=ninits)
loglik0 <- pmle.result$loglik
# Scale y so that smallest value of the diagonal
# elements of sigma is 0.1.
sigma.matrix <- matrix(pmle.result$parlist$sigma, ncol=m)
IND <- cumsum(c(1,seq(d,2)))
sigma.diags <- sigma.matrix[IND,]
sc <- sqrt(0.1/min(sigma.diags))
# parlist0 contains parameter values passed to MaxPhi,
# mvnmixCrit and mvnmixCritBoot. mu and sigma are scaled.
y <- y*sc
parlist0 <- pmle.result$parlist
parlist0$mu <- parlist0$mu*sc
parlist0$sigma <- parlist0$sigma*sc*sc
par1 <- mvnmixMaxPhi(y=y, parlist=parlist0,
an=an, tauset = tauset, ninits=ninits,
parallel = 0, cl = cl)
# emstat <- 2*(par1$loglik - loglik0)
# Adjust the value of the loglikelihood for scaling.
emstat <- 2*(par1$loglik - loglik0 + log(sc)*n*d)
if (LRT.penalized){
# emstat <- 2*(par1$penloglik - loglik0)
emstat <- 2*(par1$penloglik - loglik0 + log(sc)*n*d)
} # use the penalized log-likelihood.
if (crit.method == "asy"){
result <- mvnmixCrit(y=y, parlist=parlist0, values=emstat)
} else if (crit.method == "boot") {
result <- mvnmixCritBoot(y=y, an=an, parlist= parlist0, values=emstat,
ninits=ninits, nbtsp=nbtsp, parallel = parallel, cl=cl,
LRT.penalized = LRT.penalized)
} else {
result <- list()
result$crit <- result$pvals <- rep(NA,3)
}
a <- list(emstat = emstat, pvals = result$pvals, crit = result$crit, crit.method = crit.method,
parlist = pmle.result$parlist, ll0 = loglik0, ll1 = par1$loglik,
aic = pmle.result$aic, bic = pmle.result$bic, postprobs = pmle.result$postprobs,
call = match.call(), m = m, label = "MEMtest")
class(a) <- "normalregMix"
a
} # end mvnmixMEMtest
#' @description Computes the bootstrap critical values of the modified EM test.
#' @export
#' @title mvnmixCritBoot
#' @name mvnmixCritBoot
#' @param y n by d matrix of data
#' @param parlist The parameter estimates as a list containing alpha, mu, and sigma
#' in the form of (alpha = (alpha_1,...,alpha_m), mu = (mu_1',...,mu_m'),
#' sigma = (vech(sigma_1)',...,vech(sigma_m)')
#' @param values 3 by 1 Vector of length 3 (k = 1, 2, 3) at which the p-values are computed
#' @param ninits The number of initial candidates to be generated
#' @param nbtsp The number of bootstrap observations; by default, it is set to be 199.
#' @param parallel Determines what percentage of available cores are used, represented by a double in [0,1]. 0.75 is default.
#' @param cl Cluster used for parallelization (optional)
#' @return A list with the following items:
#' \item{crit}{3 by 3 matrix of (0.1, 0.05, 0.01 critical values), jth row corresponding to k=j}
#' \item{pvals}{A vector of p-values at k = 1, 2, 3}
mvnmixCritBoot <- function (y, an = 1, parlist, values = NULL, ninits = 10,
nbtsp = 199, parallel = parallel, cl = NULL,
LRT.penalized = FALSE) {
# if (normalregMix.test.on) # initial values controlled by normalregMix.test.on
# set.seed(normalregMix.test.seed)
y <- as.matrix(y)
n <- nrow(y)
d <- ncol(y)
dsig <- d*(d+1)/2
alpha <- parlist$alpha
mu <- parlist$mu
sigma <- parlist$sigma
m <- length(alpha)
# an <- 1
# an <- anFormula(parlist = parlist, m = m, n = n, LRT.penalized = LRT.penalized)
mu.mat <- matrix(mu, nrow=d, ncol=m)
sigma.mat <- matrix(0, nrow=d, ncol=d*m)
for (j in 1:m){
sigma.j <- sigma[((j-1)*dsig+1):(j*dsig)]
sigma.mat[,((j-1)*d+1):(j*d)] <- sigmavec2mat(sigma.j,d)
}
pvals <- NULL
# Generate bootstrap observations
if (m==1){
ybset <- rmvnorm(nbtsp*n, mu=mu, sigma = sigma.mat)
} else {
ybset <- rmvnmix(nbtsp*n, alpha=alpha, mu=mu.mat, sigma=sigma.mat)
}
# ybset <- array(ybset, dim=c(n,d,nbtsp))
ybset <- array(ybset, dim=c(n,nbtsp,d))
num.cores <- max(1,floor(detectCores()*parallel))
if (num.cores > 1) {
if (is.null(cl)) {
cl <- makeCluster(num.cores)
newcluster <- TRUE
}
clusterSetRNGStream(cl = cl, 123456)
out <- parLapply (cl, 1:nbtsp, function(j) mvnmixMEMtest(y=ybset[,j,], m = m, parallel = 0,
an = an, ninits = ninits, crit.method="none", LRT.penalized = LRT.penalized))
if (newcluster) {
on.exit(stopCluster(cl))
} else {
on.exit(cl)
}
}
else
out <- lapply(1:nbtsp, function(j) mvnmixMEMtest(y=ybset[,j,], m = m, parallel = 0,
an = an, ninits = ninits, crit.method="none", LRT.penalized = LRT.penalized))
emstat.b <- sapply(out, "[[", "emstat") # 3 by nbstp matrix
emstat.b <- t(apply(emstat.b, 1, sort))
q <- ceiling(nbtsp*c(0.90,0.95,0.99))
crit <- emstat.b[, q]
if (!is.null(values)) { pvals <- rowMeans(emstat.b > values) }
return(list(crit = crit, pvals = pvals))
} # end function mvnmixCritBoot
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.