Nothing
# ============================================================================
# Main logitr functions
# ============================================================================
#' The main function for estimating logit models
#'
#' Use this function to estimate multinomial (MNL) and mixed logit (MXL)
#' models with "Preference" space or "Willingness-to-pay" (WTP) space utility
#' parameterizations. The function includes an option to run a multistart
#' optimization loop with random starting points in each iteration, which is
#' useful for non-convex problems like MXL models or models with WTP space
#' utility parameterizations. The main optimization loop uses the `nloptr()`
#' function to minimize the negative log-likelihood function.
#' @keywords logitr mnl mxl wtp willingness-to-pay mixed logit
#'
#' @param data The data, formatted as a `data.frame` object.
#' @param outcome The name of the column that identifies the outcome variable,
#' which should be coded with a `1` for `TRUE` and `0` for `FALSE`.
#' @param obsID The name of the column that identifies each observation.
#' @param pars The names of the parameters to be estimated in the model.
#' Must be the same as the column names in the `data` argument. For WTP space
#' models, do not include the `scalePar` variable in `pars`.
#' @param scalePar The name of the column that identifies the scale variable,
#' which is typically "price" for WTP space models, but could be any
#' continuous variable, such as "time". Defaults to `NULL`.
#' @param randPars A named vector whose names are the random parameters and
#' values the distribution: `'n'` for normal, `'ln'` for log-normal, or
#' `'cn'` for zero-censored normal. Defaults to `NULL`.
#' @param randScale The random distribution for the scale parameter: `'n'` for
#' normal, `'ln'` for log-normal, or `'cn'` for zero-censored normal. Only used
#' for WTP space MXL models. Defaults to `NULL`.
#' @param modelSpace This argument is no longer needed as of v0.7.0. The model
#' space is now determined based on the `scalePar` argument:
#' if `NULL` (the default), the model will be in the preference space,
#' otherwise it will be in the WTP space. Defaults to `NULL`.
#' @param weights The name of the column that identifies the weights to be
#' used in model estimation. Defaults to `NULL`.
#' @param panelID The name of the column that identifies the individual (for
#' panel data where multiple observations are recorded for each individual).
#' Defaults to `NULL`.
#' @param clusterID The name of the column that identifies the cluster
#' groups to be used in model estimation. Defaults to `NULL`.
#' @param robust Determines whether or not a robust covariance matrix is
#' estimated. Defaults to `FALSE`. Specification of a `clusterID` or
#' `weights` will override the user setting and set this to `TRUE' (a
#' warning will be displayed in this case). Replicates the functionality of
#' Stata's cmcmmixlogit.
#' @param correlation Set to `TRUE` to account for correlation across random
#' parameters (correlated heterogeneity). Defaults to `FALSE`.
#' @param startValBounds sets the `lower` and `upper` bounds for the starting
#' parameter values for each optimization run, which are generated by
#' `runif(n, lower, upper)`. Defaults to `c(-1, 1)`.
#' @param startVals is vector of values to be used as starting values for the
#' optimization. Only used for the first run if `numMultiStarts > 1`. Defaults
#' to `NULL`.
#' @param numMultiStarts is the number of times to run the optimization loop,
#' each time starting from a different random starting point for each parameter
#' between `startValBounds`. Recommended for non-convex models, such as WTP
#' space models and mixed logit models. Defaults to `1`.
#' @param useAnalyticGrad Set to `FALSE` to use numerically approximated
#' gradients instead of analytic gradients during estimation. For now, using
#' the analytic gradient is faster for MNL models but slower for MXL models.
#' Defaults to `TRUE`.
#' @param scaleInputs By default each variable in `data` is scaled to be
#' between 0 and 1 before running the optimization routine because it usually
#' helps with stability, especially if some of the variables have very large or
#' very small values (e.g. `> 10^3` or `< 10^-3`). Set to `FALSE` to turn this
#' feature off. Defaults to `TRUE`.
#' @param standardDraws By default, a new set of standard normal draws are
#' generated during each call to `logitr` (the same draws are used during each
#' multistart iteration). The user can override those draws by providing a
#' matrix of standard normal draws if desired. Defaults to `NULL`.
#' @param drawType Specify the draw type as a character: `"halton"`
#' (the default) or `"sobol"` (recommended for models with more than 5
#' random parameters).
#' @param numDraws The number of Halton draws to use for MXL models for the
#' maximum simulated likelihood. Defaults to `50`.
#' @param numCores The number of cores to use for parallel processing of the
#' multistart. Set to `1` to serially run the multistart. Defaults to `NULL`,
#' in which case the number of cores is set to `parallel::detectCores() - 1`.
#' Max cores allowed is capped at `parallel::detectCores()`.
#' @param vcov Set to `TRUE` to evaluate and include the variance-covariance
#' matrix and coefficient standard errors in the returned object.
#' Defaults to `FALSE`.
#' @param predict If `FALSE`, predicted probabilities, fitted values, and
#' residuals are not included in the returned object. Defaults to `TRUE`.
#' @param options A list of options for controlling the `nloptr()` optimization.
#' Run `nloptr::nloptr.print.options()` for details.
#' @param price No longer used as of v0.7.0 - if provided, this is passed
#' to the `scalePar` argument and a warning is displayed.
#' @param randPrice No longer used as of v0.7.0 - if provided, this is passed
#' to the `randScale` argument and a warning is displayed.
#' @param choice No longer used as of v0.4.0 - if provided, this is passed
#' to the `outcome` argument and a warning is displayed.
#' @param choiceName No longer used as of v0.2.3 - if provided, this is passed
#' to the `outcome` argument and a warning is displayed.
#' @param obsIDName No longer used as of v0.2.3 - if provided, this is passed
#' to the `obsID` argument and a warning is displayed.
#' @param parNames No longer used as of v0.2.3 - if provided, this is passed
#' to the `pars` argument and a warning is displayed.
#' @param priceName No longer used as of v0.2.3 - if provided, this is passed
#' to the `scalePar` argument and a warning is displayed.
#' @param weightsName No longer used as of v0.2.3 - if provided, this is passed
#' to the `weights` argument and a warning is displayed.
#' @param clusterName No longer used as of v0.2.3 - if provided, this is passed
#' to the `clusterID` argument and a warning is displayed.
#' @param cluster No longer used as of v0.2.3 - if provided, this is passed
#' to the `clusterID` argument and a warning is displayed.
#' @details
#' The the `options` argument is used to control the detailed behavior of the
#' optimization and must be passed as a list, e.g. `options = list(...)`.
#' Below are a list of the default options, but other options can be included.
#' Run `nloptr::nloptr.print.options()` for more details.
#'
#' | Argument | Description | Default |
#' |:---------------|:------------------|:--------------|
#' |`xtol_rel`|The relative `x` tolerance for the `nloptr` optimization loop.|`1.0e-6`|
#' |`xtol_abs`|The absolute `x` tolerance for the `nloptr` optimization loop.|`1.0e-6`|
#' |`ftol_rel`|The relative `f` tolerance for the `nloptr` optimization loop.|`1.0e-6`|
#' |`ftol_abs`|The absolute `f` tolerance for the `nloptr` optimization loop.|`1.0e-6`|
#' |`maxeval`|The maximum number of function evaluations for the `nloptr` optimization loop. |`1000`|
#' |`algorithm`|The optimization algorithm that `nloptr` uses.|`"NLOPT_LD_LBFGS"`|
#' |`print_level`|The print level of the `nloptr` optimization loop.|`0`|
#'
#' @return
#' The function returns a list object containing the following objects.
#'
#' | Value | Description |
#' |:------------|:------------------|
#' |`coefficients`|The model coefficients at convergence.|
#' |`logLik`|The log-likelihood value at convergence.|
#' |`nullLogLik`|The null log-likelihood value (if all coefficients are 0).|
#' |`gradient`|The gradient of the log-likelihood at convergence.|
#' |`hessian`|The hessian of the log-likelihood at convergence.|
#' |`probabilities`|Predicted probabilities. Not returned if `predict = FALSE`.|
#' |`fitted.values`|Fitted values. Not returned if `predict = FALSE`.|
#' |`residuals`|Residuals. Not returned if `predict = FALSE`.|
#' |`startVals`|The starting values used.|
#' |`multistartNumber`|The multistart run number for this model.|
#' |`multistartSummary`|A summary of the log-likelihood values for each multistart run (if more than one multistart was used).|
#' |`time`|The user, system, and elapsed time to run the optimization.|
#' |`iterations`|The number of iterations until convergence.|
#' |`message`|A more informative message with the status of the optimization result.|
#' |`status`|An integer value with the status of the optimization (positive values are successes). Use [statusCodes()] for a detailed description.|
#' |`call`|The matched call to `logitr()`.|
#' |`inputs`|A list of the original inputs to `logitr()`.|
#' |`data`|A list of the original data provided to `logitr()` broken up into components used during model estimation.|
#' |`numObs`|The number of observations.|
#' |`numParams`|The number of model parameters.|
#' |`freq`|The frequency counts of each alternative.|
#' |`modelType`|The model type, `'mnl'` for multinomial logit or `'mxl'` for mixed logit.|
#' |`weightsUsed`|`TRUE` or `FALSE` for whether weights were used in the model.|
#' |`numClusters`|The number of clusters.|
#' |`parSetup`|A summary of the distributional assumptions on each model parameter (`"f"`="fixed", `"n"`="normal distribution", `"ln"`="log-normal distribution").|
#' |`parIDs`|A list identifying the indices of each parameter in `coefficients` by a variety of types.|
#' |`scaleFactors`|A vector of the scaling factors used to scale each coefficient during estimation.|
#' |`standardDraws`|The draws used during maximum simulated likelihood (for MXL models).|
#' |`options`|A list of options for controlling the `nloptr()` optimization. Run `nloptr::nloptr.print.options()` for details.|
#'
#' @export
#' @examples
#' # For more detailed examples, visit
#' # https://jhelvy.github.io/logitr/articles/
#'
#' library(logitr)
#'
#' # Estimate a MNL model in the Preference space
#' mnl_pref <- logitr(
#' data = yogurt,
#' outcome = "choice",
#' obsID = "obsID",
#' pars = c("price", "feat", "brand")
#' )
#'
#' # Estimate a MNL model in the WTP space, using a 5-run multistart
#' mnl_wtp <- logitr(
#' data = yogurt,
#' outcome = "choice",
#' obsID = "obsID",
#' pars = c("feat", "brand"),
#' scalePar = "price",
#' numMultiStarts = 5
#' )
#'
#' # Estimate a MXL model in the Preference space with "feat"
#' # following a normal distribution
#' # Panel structure is accounted for in this example using "panelID"
#' mxl_pref <- logitr(
#' data = yogurt,
#' outcome = "choice",
#' obsID = "obsID",
#' panelID = "id",
#' pars = c("price", "feat", "brand"),
#' randPars = c(feat = "n")
#' )
logitr <- function(
data,
outcome,
obsID,
pars,
scalePar = NULL,
randPars = NULL,
randScale = NULL,
modelSpace = NULL,
weights = NULL,
panelID = NULL,
clusterID = NULL,
robust = FALSE,
correlation = FALSE,
startValBounds = c(-1, 1),
startVals = NULL,
numMultiStarts = 1,
useAnalyticGrad = TRUE,
scaleInputs = TRUE,
standardDraws = NULL,
drawType = 'halton',
numDraws = 50,
numCores = NULL,
vcov = FALSE,
predict = TRUE,
options = list(
print_level = 0,
xtol_rel = 1.0e-6,
xtol_abs = 1.0e-6,
ftol_rel = 1.0e-6,
ftol_abs = 1.0e-6,
maxeval = 1000,
algorithm = "NLOPT_LD_LBFGS"
),
price, # Outdated argument names as of v0.7.0
randPrice, # Outdated argument names as of v0.7.0
choice, # Outdated argument names as of v0.4.0
parNames, # Outdated argument names as of v0.2.3
choiceName,
obsIDName,
priceName,
weightsName,
clusterName,
cluster
) {
call <- match.call()
# Argument names were changed in v0.2.3
calls <- names(sapply(call, deparse))[-1]
if (any("parNames" %in% calls)) {
pars <- parNames
warning(
"The 'parNames' argument is outdated as of v0.2.3. Use 'pars' instead"
)
}
if (any("choiceName" %in% calls)) {
outcome <- choiceName
warning(
"The 'choiceName' argument is outdated as of v0.2.3. Use 'outcome' instead"
)
}
if (any("choice" %in% calls)) {
outcome <- choice
warning(
"The 'choice' argument is outdated as of v0.4.0. Use 'outcome' instead"
)
}
if (any("obsIDName" %in% calls)) {
obsID <- obsIDName
warning(
"The 'obsIDName' argument is outdated as of v0.2.3. Use 'obsID' instead"
)
}
if (any("priceName" %in% calls)) {
price <- priceName
warning(
"The 'priceName' argument is outdated as of v0.2.3. Use 'price' instead"
)
}
if (any("weightsName" %in% calls)) {
weights <- weightsName
warning(
"The 'weightsName' argument is outdated as of v0.2.3. Use 'weights' ",
"instead"
)
}
if (any("clusterName" %in% calls)) {
clusterID <- clusterName
warning(
"The 'clusterName' argument is outdated as of v0.2.3. Use 'clusterID' ",
"instead"
)
}
if (any("cluster" %in% calls)) {
clusterID <- cluster
warning(
"The 'cluster' argument is outdated as of v0.2.3. Use 'clusterID' instead"
)
}
if (any("price" %in% calls)) {
scalePar <- price
warning(
"The 'price' argument is outdated as of v0.7.0. Use 'scalePar' instead"
)
}
if (any("randPrice" %in% calls)) {
randScale <- randPrice
warning(
"The 'randPrice' argument is outdated as of v0.7.0. Use 'randScale' instead"
)
}
if (any("modelSpace" %in% calls)) {
warning(
"The 'modelSpace' argument is no longer needed as of v0.7.0. and will ",
"be ignored. The model space is now determined based on the scalePar ",
"argument: if NULL (the default), the model will be in the preference ",
"space, otherwise it will be in the WTP space."
)
}
data <- as.data.frame(data) # tibbles break things
modelInputs <- getModelInputs(
data, outcome, obsID, pars, randPars, scalePar, randScale,
weights, panelID, clusterID, robust, startValBounds, startVals,
numMultiStarts, useAnalyticGrad, scaleInputs, standardDraws, drawType,
numDraws, numCores, vcov, predict, correlation, call, options
)
allModels <- runMultistart(modelInputs)
if (modelInputs$n$multiStarts > 1) {
summary <- getMultistartSummary(allModels)
model <- getBestModel(allModels, summary)
model$multistartSummary <- summary
} else {
model <- allModels[[1]]
}
model <- appendModelInfo(model, modelInputs)
if (vcov) {
model$vcov <- vcov(model)
model$se <- sqrt(diag(model$vcov))
}
if (predict) {
model$probabilities <- stats::predict(model)
model$fitted.values <- stats::fitted(model, probs = model$probabilities)
model$residuals <- stats::residuals(model, fitted = model$fitted.values)
}
message("Done!")
return(model)
}
getMultistartSummary <- function(allModels) {
summary <- data.frame(
getListVal(allModels, "logLik"),
getListVal(allModels, "iterations"),
getListVal(allModels, "status")
)
colnames(summary) <- c("Log Likelihood", "Iterations", "Exit Status")
return(summary)
}
getListVal <- function(object, val) {
return(unlist(lapply(object, function(x) x[[val]])))
}
getBestModel <- function(allModels, summary) {
summary$index <- seq(nrow(summary))
good <- summary[which(summary$`Exit Status` > 0),]
index <- good[which.max(good$`Log Likelihood`),]$index
if (length(index) == 0) {
# All NA values...none of the models converged, so just return the first
index <- 1
}
return(allModels[[index]])
}
appendModelInfo <- function(model, mi) {
fail <- model$fail
if (! fail) { model$fail <- NULL }
# Add model data
model$data <- mi$data
# Get coefficients (re-scale if needed)
coefficients <- getCoefficients(model, mi, fail)
model$coefficients <- coefficients
# Make unscaled version of model inputs to compute gradient and hessian
mi_unscaled <- mi
mi_unscaled$data_diff <- mi$data_diff_unscaled
if (mi$modelType == 'mxl') {
mi_unscaled$partials <- mi$partials_unscaled
}
model$gradient <- getGradient(coefficients, mi_unscaled, fail)
model$hessian <- getHessian(coefficients, mi_unscaled, fail)
model$nullLogLik <- getNullLogLik(coefficients, mi_unscaled, fail)
return(model)
}
getCoefficients <- function(model, mi, fail) {
parsUnscaled <- model$coefficients
names(parsUnscaled) <- mi$parNames$all
if (fail | (! mi$inputs$scaleInputs)) {
return(parsUnscaled)
}
# Re-scale coefficients
scaleFactors <- mi$scaleFactors
# Adjust any log-normal parameters
lnIDs <- mi$parIDs$ln
if (length(lnIDs) > 0) {
parsUnscaled[lnIDs] <- parsUnscaled[lnIDs] - log(scaleFactors[lnIDs])
scaleFactors[unlist(mi$parIDs$partial_lnIDs)] <- 1
}
return(parsUnscaled / scaleFactors)
}
getGradient <- function(coefficients, mi, fail) {
if (fail) {
gradient <- matrix(coefficients*NA, ncol = 1)
row.names(gradient) <- names(coefficients)
return(gradient)
}
return(-1 * mi$evalFuncs$negGradLL(coefficients, mi))
}
getHessian <- function(coefficients, mi, fail) {
parNames <- mi$parNames$all
if ((fail) | (any(is.na(coefficients)))) {
# Model failed - return a matrix of NA values
hessian <- matrix(NA, nrow = length(parNames), ncol = length(parNames))
} else {
hessian <- mi$evalFuncs$hessLL(coefficients, mi)
}
colnames(hessian) <- parNames
row.names(hessian) <- parNames
return(hessian)
}
getNullLogLik <- function(coefficients, mi, fail) {
if (fail) { return(NA) }
return(-1*mi$evalFuncs$negLL(coefficients*0, mi))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.