emlogit: Implementing the ECM algorithm for multinomial logit model

Documented in coef_initialize emlogit input_check specify_prior

#' EM algorithm for Multinomial Logit
#'
#' @param Y A matrix of multinomial outcomes where rows correspond to observations
#'  and columns correspond to choices.
#' @param X A matrix of covariates where columns correspond to variables.
#'  The number of rows should match with the number of rows for \code{Y}.
#' @param control A list of control parameters, which can contain the following items:
#' \itemize{
#'   \item \code{max_iter} A integer value that specifies the maximum iterations
#'   for the EM algorithm. Defaul value is 200.
#'   \item \code{tol} A tolerance parameter for assessing the convergence. Default value is 1e-5.
#'   \item \code{mu0} A vector of prior means. The dimension of this parameter should match the number of variables (i.e., \code{ncol(X)}).
#'    The default value is \code{0}.
#'   \item \code{Z0} A matrix for the prior variance covariance matrix.
#'      The dimension of this matrix should match with the dimension of \code{X} i.e., the number of variables).
#'      The default value is \code{diag(rep(5, ncol(X)))}
#'   \item \code{verbose} A boolean argument. If set \code{TRUE}, the function shows the log-posterior for each iteration. Default is \code{FALSE}.
#'   \item \code{intercept} A boolean argument. When \code{X} already contains the intercept term (i.e., a column of ones), this option should be \code{FALSE}.
#'      Default is \code{TRUE}.
#'   \item \code{variance} A boolean argument. If \code{FALSE}, \code{emlogit()} skips the variance calculation.
#'      This is useful when calling \code{emlogit()} from other programs. Default is \code{TRUE}.
#'   \item \code{initialize} A method for initialization.
#'       The package currently supports \code{initialize = "random"} (random) or \code{initialize = "ls"} (least square). Default is \code{"ls"}.
#'   \item \code{initial_value} A matrix of coefficients used for the initial value where the first column should be zeros for identification.
#'              This matrix should be of dimension K by J where K is the total number of covariates (including the intercept) and J is the total number of choices.
#'              If supplied, \code{initial_value} overwrites the initial value generated by \code{initialize} option.
#' }
#' @export
emlogit <- function(Y, X, control = list()) {

  ## setup ------------------------------------------------------------
  X <- as.matrix(X)
  control <- input_check(control)
  if (isTRUE(control$intercept)) {
    X <- cbind(rep(1, nrow(X)), X)
  }

  control <- specify_prior(control, ncol(X))

  if (exists("initial_value", control)) {
    ## check input
    if (!all(dim(control$initial_value) == c(ncol(X), ncol(Y))))
      stop("Dimension of initial value does not match.")
    B <- control$initial_value
  } else {
    B <- coef_initialize(Y, X, control)
  }


  ## estimate coefficients using EM -----------------------------------
  coef <- emlogit_run(
      Y = Y, X = X, B = B,
      tol = control$tol, max_iter = control$max_iter,
      mu0 = control$mu0, Z0 = control$Z0, verbose = control$verbose
    )

  ## compute variance of coefficients ---------------------------------
  if (isTRUE(control$variance)) {
    var <- emlogit_var(Y, X, coef, control$mu0, control$Z0, control$robust)
  } else {
    var <- NULL
  }

  ## obtain the in-sample fit -----------------------------------------
  prob <- predict_prob(X, coef)
  fit <- list(coef = coef, var = var, prob = prob, control = control,
              x_name = colnames(X), y_name = colnames(Y))

  class(fit) <- c("emlogit", "emlogit.est")
  return(fit)
}


#' Input check
#' A function to set the default values of \code{control}.
#' @param control A list of control parameters.
#' @keywords internal
input_check <- function(control) {
  if (!exists("max_iter", control)) {
    control$max_iter <- 200
  }

  if (!exists("tol", control)) {
    control$tol <- 1e-6
  }

  if (!exists("verbose", control)) {
    control$verbose = FALSE
  }

  if (!exists("intercept", control)) {
    control$intercept <- TRUE
  }

  if (!exists("robust", control)) {
    control$robust <- FALSE
  }

  if (!exists("variance", control)) {
    control$variance <- TRUE
  }

  if (!exists("initialize", control)) {
    control$initialize <- "ls"
  } else {
    if (!(control$initialize %in% c("ls", "random"))) {
      stop("Not a supported initialization method")
    }
  }

  return(control)
}

#' Specify the prior
#' @keywords internal
specify_prior <- function(control, n_cov) {
  if (!exists("mu0", control)) {
    control$mu0 <- rep(0, n_cov)
  }

  if (!exists("Z0", control)) {
    control$Z0 <- diag(rep(5, n_cov))
  }
  return(control)
}

#' Initialize coefficients randomly from \code{rnorm()}.
#' @importFrom stats rnorm
#' @keywords internal
coef_initialize <- function(Y, X, control) {
  n_cov <- ncol(X)
  J     <- ncol(Y)

  if (control$initialize == "random") {
    B     <- cbind(rep(0, n_cov),
                   matrix(rnorm(n_cov * (J-1)), nrow = n_cov, ncol = J-1))
    B <- as.matrix(B)
  } else if (control$initialize == "ls") {
    B <- solve(t(X) %*% X, t(X) %*% Y);
    B[,1] <- 0
  }

  return(B)
}