Nothing
#' FlexMix Driver for Regularized Multinomial Mixtures
#'
#' This model driver can be used to cluster data using a multinomial
#' distribution.
#'
#' Using a regularization parameter `alpha` greater than zero
#' acts as adding `alpha` observations conforming to the population
#' mean to each component. This can be used to avoid degenerate
#' solutions. It also has the effect
#' that clusters become more similar to each other the larger
#' `alpha` is chosen. For small values it is mostly negligible however.
#'
#' For regularization we compute the MAP estimates for the multinomial
#' distribution using the Dirichlet distribution as prior, which is
#' the conjugate prior. The parameters of this prior are selected to
#' correspond to the marginal distribution of the variable across all
#' observations.
#'
#' @param r Number of different categories. Values are assumed to be
#' integers in `1:r`. Default `NULL` implies that the number of
#' different categories is inferred columnwise by the maximum
#' value observed.
#' @param alpha A non-negative scalar acting as regularization
#' parameter. Can be regarded as adding `alpha` observations equal
#' to the population mean to each component.
#' @param formula A formula which is interpreted relative to the
#' formula specified in the call to [flexmix::flexmix()] using
#' [stats::update.formula()]. Only the left-hand side (response)
#' of the formula is used. Default is to use the original model
#' formula specified in [flexmix::flexmix()].
#' @return An object of class `"FLXC"`.
#' @export
#' @references - Galindo Garre, F, Vermunt, JK (2006). *Avoiding
#' Boundary Estimates in Latent Class Analysis by Bayesian
#' Posterior Mode Estimation* Behaviormetrika, 33, 43-59. -
#' Ernst, D, Ortega Menjivar, L, Scharl, T, GrĂ¼n, B (2025).
#' *Ordinal Clustering with the flex-Scheme.* Austrian Journal of
#' Statistics. _Submitted manuscript_.
#' @export
#' @importFrom utils head
#' @example examples/multinom.R
FLXMCregmultinom = function(formula=.~., r = NULL, alpha=0) {
stopifnot(is.numeric(alpha), length(alpha) == 1, alpha >= 0)
b_alpha <- 0
z <- new("FLXMC", weighted=TRUE, formula=formula,
name="FLXMCregmultinom")
.as01 <- function(y, r) {
# as matrix in 0/1 coding
yd = lapply(seq_len(ncol(y)), \(col) {
uvalues = seq_len(r[col])
cns = paste0(colnames(y)[col], uvalues)
xx = lapply(uvalues, \(value) {
as.integer(y[, col] == value)
}) |> do.call(cbind, args=_) |>
`colnames<-`(cns)
xx
}) |> do.call(cbind, args=_)
attr(yd, "y") <- y
attr(yd, "ymarg") <- colMeans(yd) |> unname()
csr <- cumsum(r)
csrlag <- c(1, head(csr, -1)+1)
attr(yd, "csr") <- csr
attr(yd, "csrlag") <- csrlag
yd
}
z@preproc.y <- function(y) {
if (anyNA(y))
stop("NAs are not allowed")
if (any(y < 1)) {
stop("values < 1 not allowed (values need to be in 1:r)")
}
if (is.null(r)) {
r <- apply(y, 2, max)
} else {
r <- as.integer(r)
r <- rep(r, length = ncol(y))
if (any(apply(y, 2, max) > r)) {
stop("values larger than r not allowed (values need to be in 1:r)")
}
}
stopifnot(r >= 2)
.as01(y, r)
}
z@defineComponent <- function(para) {
predict <- function(x, ...) {
stop("not implemented")
}
logLik <- function(x, y) {
y <- attr(y, "y")
probmat <- lapply(seq_along(para$prob), \(col) {
log(para$prob[[col]])[y[, col]]
}) |> do.call(cbind, args=_) |>
`colnames<-`(colnames(y))
rowSums(probmat)
}
new("FLXcomponent",
parameters=list(p = para$prob),
logLik=logLik, df=para$df,
predict=predict)
}
z@fit <- function(x, y, w) {
if (alpha > 0) {
b_alpha <- attr(y, "ymarg") * alpha
}
p <- (b_alpha + colSums(w*y)) / (alpha + sum(w))
pl <- mapply(\(a,b) {
p[a:b]
}, attr(y, "csrlag"), attr(y, "csr"), SIMPLIFY = FALSE)
para <- list(prob = pl, df = sum(lengths(pl) - 1))
z@defineComponent(para)
}
z
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.