#' EM algorithm for Multinomial Logit
#'
#' @param Y A matrix of multinomial outcomes where rows correspond to observations
#' and columns correspond to choices.
#' @param X A matrix of covariates where columns correspond to variables.
#' The number of rows should match with the number of rows for \code{Y}.
#' @param control A list of control parameters, which can contain the following items:
#' \itemize{
#' \item \code{max_iter} A integer value that specifies the maximum iterations
#' for the EM algorithm. Defaul value is 200.
#' \item \code{tol} A tolerance parameter for assessing the convergence. Default value is 1e-5.
#' \item \code{mu0} A vector of prior means. The dimension of this parameter should match the number of variables (i.e., \code{ncol(X)}).
#' The default value is \code{0}.
#' \item \code{Z0} A matrix for the prior variance covariance matrix.
#' The dimension of this matrix should match with the dimension of \code{X} i.e., the number of variables).
#' The default value is \code{diag(rep(5, ncol(X)))}
#' \item \code{verbose} A boolean argument. If set \code{TRUE}, the function shows the log-posterior for each iteration. Default is \code{FALSE}.
#' \item \code{intercept} A boolean argument. When \code{X} already contains the intercept term (i.e., a column of ones), this option should be \code{FALSE}.
#' Default is \code{TRUE}.
#' \item \code{variance} A boolean argument. If \code{FALSE}, \code{emlogit()} skips the variance calculation.
#' This is useful when calling \code{emlogit()} from other programs. Default is \code{TRUE}.
#' \item \code{initialize} A method for initialization.
#' The package currently supports \code{initialize = "random"} (random) or \code{initialize = "ls"} (least square). Default is \code{"ls"}.
#' \item \code{initial_value} A matrix of coefficients used for the initial value where the first column should be zeros for identification.
#' This matrix should be of dimension K by J where K is the total number of covariates (including the intercept) and J is the total number of choices.
#' If supplied, \code{initial_value} overwrites the initial value generated by \code{initialize} option.
#' }
#' @export
emlogit <- function(Y, X, control = list()) {
## setup ------------------------------------------------------------
X <- as.matrix(X)
control <- input_check(control)
if (isTRUE(control$intercept)) {
X <- cbind(rep(1, nrow(X)), X)
}
control <- specify_prior(control, ncol(X))
if (exists("initial_value", control)) {
## check input
if (!all(dim(control$initial_value) == c(ncol(X), ncol(Y))))
stop("Dimension of initial value does not match.")
B <- control$initial_value
} else {
B <- coef_initialize(Y, X, control)
}
## estimate coefficients using EM -----------------------------------
coef <- emlogit_run(
Y = Y, X = X, B = B,
tol = control$tol, max_iter = control$max_iter,
mu0 = control$mu0, Z0 = control$Z0, verbose = control$verbose
)
## compute variance of coefficients ---------------------------------
if (isTRUE(control$variance)) {
var <- emlogit_var(Y, X, coef, control$mu0, control$Z0, control$robust)
} else {
var <- NULL
}
## obtain the in-sample fit -----------------------------------------
prob <- predict_prob(X, coef)
fit <- list(coef = coef, var = var, prob = prob, control = control,
x_name = colnames(X), y_name = colnames(Y))
class(fit) <- c("emlogit", "emlogit.est")
return(fit)
}
#' Input check
#' A function to set the default values of \code{control}.
#' @param control A list of control parameters.
#' @keywords internal
input_check <- function(control) {
if (!exists("max_iter", control)) {
control$max_iter <- 200
}
if (!exists("tol", control)) {
control$tol <- 1e-6
}
if (!exists("verbose", control)) {
control$verbose = FALSE
}
if (!exists("intercept", control)) {
control$intercept <- TRUE
}
if (!exists("robust", control)) {
control$robust <- FALSE
}
if (!exists("variance", control)) {
control$variance <- TRUE
}
if (!exists("initialize", control)) {
control$initialize <- "ls"
} else {
if (!(control$initialize %in% c("ls", "random"))) {
stop("Not a supported initialization method")
}
}
return(control)
}
#' Specify the prior
#' @keywords internal
specify_prior <- function(control, n_cov) {
if (!exists("mu0", control)) {
control$mu0 <- rep(0, n_cov)
}
if (!exists("Z0", control)) {
control$Z0 <- diag(rep(5, n_cov))
}
return(control)
}
#' Initialize coefficients randomly from \code{rnorm()}.
#' @importFrom stats rnorm
#' @keywords internal
coef_initialize <- function(Y, X, control) {
n_cov <- ncol(X)
J <- ncol(Y)
if (control$initialize == "random") {
B <- cbind(rep(0, n_cov),
matrix(rnorm(n_cov * (J-1)), nrow = n_cov, ncol = J-1))
B <- as.matrix(B)
} else if (control$initialize == "ls") {
B <- solve(t(X) %*% X, t(X) %*% Y);
B[,1] <- 0
}
return(B)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.