Nothing
###############################
## Functions for mlogit ##
###############################
##
## This file contains:
##
## mlogit()
## print.mlogit()
## summary.mlogit()
## print.summary.mlogit()
## logLikeMultinomial()
## gradientMultinomial()
## hessianMultinomial()
## logLikeMultinomial_Rcpp()
## gradientMultinomial_Rcpp()
## hessianMultinomial_Rcpp()
##' @name mlogit
##' @title Multinomial Logistic Regression
##
##' @description Solves a multinomial logistic problem using
##' Newton-Raphson method
##
##' @details The optimization is done using the \code{\link{maxNR}} function from the \code{maxLik} package. The log-likehood function, along with its gradient and hessian, are implemented as C++ functions (via the \code{RcppArmadillo} package).
## Input:
##' @param Y A matrix of the outcomes, with K columns for
##' the K groups. Row sums of the matrix should be equal
##' to one, but entries do not have to be 0/1 (but they
##' should be positive). i.e. this is a matrix of hard or
##' soft assignments to K categories. The first column is
##' used as the reference category.
##' @param X matrix of covariates for regression. Should have
##' the same number of rows (observations) as Y. Coefficients
##' for all parameters in X are computed for K-1 groups.
##' The coefficients corresponding to the first column of Y
##' are assumed to be zero.
##' @param beta starting values for the optimziation. Should be
##' given as a matrix of column vectors, each vector a
##' different starting value. If null, defaults to zeros.
##' @param add.intercept a logical indicator of whether an
##' intercept column should be added to X
##' @param betaOnly logical indicator of whether only the
##' parameter estimates beta should be returned. Otherwise,
##' beta is returned along with fitted objects. See Output.
##' @param tol.zero the tolerance threshold for considering a
##' fitted value as equal to zero. Used for warning about
##' fitted values of 0/1. Is NOT part of the optimization
##' control parameters.
##' @param verbose logical indicator that controls whether text
##' indicating progress is output to display
##' @param suppressFittedWarning indicator of whether or not
##' warnings about fitted values of 1 are returned
##' @param maxNR.print.level numeric value giving the level of
##' output produced by maxNR. see \code{?maxNR} for details.
##' Defaults to 0.
##' @param iterlim iteration limit for maxNR. Defaults to 150.
##' @param checkY indicator for whether Y should be checked to be a valid assignment matrix. Set to \code{FALSE} if using decimal values in Y.
##
##
##' @seealso \code{\link{predkmeans}}
##' @author Joshua Keller
##' @export
##' @family mlogit methods
##' @useDynLib predkmeans
##' @importFrom Rcpp sourceCpp
##
## Output:
##' @return A list containing the following:
##'
##' \item{beta}{ a p x K matrix of parameter estimates corresponding to the K columns of Y and p covariates in X}
##' \item{fitted01}{indicator of whether fitted values of 1 were present.}
##' \item{fitted}{the fitted probabilities}
##' \item{res.best}{ the best result from the maxNR fit}
##' \item{status}{ small data frame summarizing the status of the fits}
##' \item{res. all}{ a list containing the results from all maxNR fits}
##'
##' @examples
##' n <- 2000
##' X <- cbind(1,
##' matrix(rnorm(2*n), nrow=n, ncol=2),
##' rbinom(n, size=1, prob=0.3))
##' beta <- cbind(rep(0, 4),
##' c(0.5, 1, 0, -1),
##' c(0, 2, 2, 0))
##' probs <- exp(X %*% beta)
##' probs <- probs/rowSums(probs)
##' Y <- t(apply(probs, 1, function(p) rmultinom(1, 1, p)))
##' mfit <- mlogit(Y=Y, X=X, betaOnly=TRUE)
##' mfit
##
mlogit <- function(Y, X, beta=NULL, add.intercept=FALSE, betaOnly=FALSE, tol.zero=1e-8, verbose=T, suppressFittedWarning=FALSE, maxNR.print.level=0, iterlim=150, checkY=TRUE){
X <- as.matrix(X)
if (is.null(colnames(X))) colnames(X) <- paste0("X", 1:ncol(X))
# Checking Y matrix
if (!is.matrix(Y)){
ynames <- as.character(sort(unique(Y)))
Y <- model.matrix(~0 + as.factor(Y))
colnames(Y) <- ynames
}
if (checkY){
if(any(rowSums(Y)>1) || !all(Y %in% c(0, 1))){
stop("Y is not matrix of valid assignments. Please provide\na matrix of 0's and 1's, with no more than one 1 per row.\nOr use parameter 'checkY=FALSE' to override this check.")
}
}
if (is.null(colnames(Y))) colnames(Y) <- paste0("Y", 1:ncol(Y))
if (add.intercept) {
X <- cbind(1, X)
colnames(X)[1] <- "Intercept"
cat("Adding intercept to X matrix \n")
}
K <- ncol(Y)
p <- ncol(X)
if (is.null(beta)){
beta <- rep(0, times=(K-1)*p)
}
beta <- as.matrix(beta)
nInits <- ncol(beta) #Number of initial conditions to try
conv <- rep(FALSE, nInits)
convNumDeriv <- conv
res <- vector("list", nInits)
LLvalue <- rep(NA, nInits)
for (i in 1:nInits){
if (verbose){
message(paste0("Optimization using starting value ", i, "/", nInits))
}
beta.start <- beta[,i]
res[[i]] <- maxLik::maxNR(fn=function(beta) logLikeMultinomial_Rcpp(beta, Y, X, K, p), grad=function(beta) gradientMultinomial_Rcpp(beta, Y, X, K, p), hess=function(beta) hessianMultinomial_Rcpp(beta, Y, X, K, p), start= beta.start, print.level= maxNR.print.level, iterlim= iterlim)
# } else {
# res[[i]] <- maxLik::maxNR(fn=function(beta) logLikeMultinomial(beta, Y, X), grad=function(beta) gradientMultinomial(beta, Y, X), hess=function(beta) hessianMultinomial(beta, Y, X), start= beta.start, print.level= maxNR.print.level, iterlim= iterlim)
# }
hessianND <- FALSE
try(hessianND <- all(eigen(res[[i]]$hessian)$values <0), silent=T)
# Added stopping code for relative convergence (code=8)
res[[i]]$NRconv <- res[[i]]$code==1 || res[[i]]$code==2 || res[[i]]$code==8
conv[i] <- res[[i]]$NRconv && hessianND
LLvalue[i] <- res[[i]]$maximum
res[[i]]$conv <- conv[i]
}
best.ind <- which.max(LLvalue)
res.best <- res[[best.ind]]
status <- data.frame(LLvalue=LLvalue, convergence=sapply(res, function(x) x$NRconv), conv=conv)
beta <- cbind(0, matrix(res.best$estimate, nrow=p, ncol=K-1))
rownames(beta) <- colnames(X)
colnames(beta) <- colnames(Y)
# Check that fitted probabilities are 0/1
eta <- X %*% beta
# mu <- exp(eta)/rowSums(exp(eta))
mu <- apply(eta, 2, function(x) 1/rowSums(exp(eta-x)))
prob01check <- all(apply(mu>(1-tol.zero), 1, any))
if (prob01check && !suppressFittedWarning){
warning("All observations have fitted probabilites of 0 and 1")
}
if (betaOnly){
return(beta)
} else {
out <- list(beta=beta, fitted01=prob01check, fitted=mu, res.best=res.best, status=status, res.all=res)
class(out) <- "mlogit"
return(out)
}
}
##' @export
print.mlogit <- function(x, ...){
K <- ncol(x$beta)
p <- nrow(x$beta)
cat("Mlogit optimization with\n", K, "categories and\n", p, "variables\n")
# print(x$beta)
cat("Optimization has", ifelse(x$status$conv," "," not "),"converged\n", sep="")
invisible(x)
}##print.mlogit()
##' @export
summary.mlogit <- function(object, ...){
class(object) <- "summary.mlogit"
object
}## summary.mlogit()
##' @export
print.summary.mlogit <- function(x, betarowthresh=10, ...){
K <- ncol(x$beta)
p <- nrow(x$beta)
cat("Mlogit optimization with\n", K, "categories and\n", p, "variables\n")
if(x$status$conv) {
cat("Method converged with LL value of ", x$status$LLvalue, ".\n",sep="")
} else {
cat("Method did not converge. LL value at exit:", x$status$LLvalue,".\n")
}
cat("Estimated coefficients are:\n")
if (nrow(x$beta) < betarowthresh) {
print(x$beta)
} else {
print(x$beta[1:betarowthresh,])
cat("...\n")
}
}
logLikeMultinomial <- function(beta, Y, X){
# Assumes first column of Y is reference group
K <- ncol(Y)
p <- ncol(X)
beta <- matrix(beta, nrow=p, ncol=K-1)
Xbeta <- cbind(0, X %*% beta)
XbetaMax <- apply(Xbeta, 1, max)
LL <- sum(rowSums(Y * Xbeta) - (XbetaMax + log(rowSums(exp(Xbeta - XbetaMax)))))
LL
}
gradientMultinomial <- function(beta, Y, X){
# Assumes first column of Y is reference group
K <- ncol(Y)
p <- ncol(X)
beta <- matrix(beta, nrow=p, ncol=K-1)
# Compute probabilities
# eta <- exp(X %*% beta)
# prob <- eta/(1 + rowSums(eta))
logeta <- X %*% beta
prob <- apply(logeta, 2, function(x) 1/(exp(-x) + rowSums(exp(logeta-x))))
g <- as.vector(crossprod(X, Y[,-1] - prob))
g
}
hessianMultinomial <- function(beta, Y, X){
# Assumes first column of Y is reference group
K <- ncol(Y)
p <- ncol(X)
beta <- matrix(beta, nrow=p, ncol=K-1)
# Compute probabilities
# eta <- exp(X %*% beta)
# prob <- eta/(1 + rowSums(eta))
logeta <- X %*% beta
prob <- apply(logeta, 2, function(x) 1/(exp(-x) + rowSums(exp(logeta-x))))
H <- matrix(nrow=0, ncol=p*(K -1 ))
for (k1 in 1:(K-1)){
H1 <- matrix(nrow=p, ncol=0)
p1 <- prob[,k1]
for (k2 in 1:(K-1)){
if (k1==k2) { Hn <- crossprod(x=X, y= p1*(1 - prob[,k2])*X)
} else {
Hn <- crossprod(x=X, y=-p1*prob[,k2] * X)
}
H1 <- cbind(H1, Hn)
}
H <- rbind(H, H1)
}
-H
}
logLikeMultinomial_Rcpp <- function(beta, Y, X, K=ncol(Y), p=ncol(X)){
# Assumes first column of Y is reference group
# K <- ncol(Y)
# p <- ncol(X)
beta <- matrix(beta, nrow=p, ncol=K-1)
# LL <- sum(rowSums(Y[,-1] * (X %*% beta)) - log(1 + rowSums(exp(X%*% beta))))
# LL <- loglikeCpp(X, beta, Y[,-1, drop=FALSE])
LL <- loglikeCpp(X, beta, Y[, -1, drop=FALSE], n=as.integer(nrow(Y)))
LL
}
gradientMultinomial_Rcpp <- function(beta, Y, X, K=ncol(Y), p=ncol(X)){
# Assumes first column of Y is reference group
# K <- ncol(Y)
# p <- ncol(X)
beta <- matrix(beta, nrow=p, ncol=K-1)
# Compute probabilities
# eta <- exp(X %*% beta)
# prob <- eta/(1 + rowSums(eta))
## logeta <- X %*% beta
## prob <- apply(logeta, 2, function(x) 1/(exp(-x) + rowSums(exp(logeta-x))))
g <- as.vector(gradientMultinomialCpp(X=X, b=beta, y=Y[,-1, drop=FALSE], k=K))
g
}
hessianMultinomial_Rcpp <- function(beta, Y, X, K=ncol(Y), p=ncol(X)){
# Assumes first column of Y is reference group
# K <- ncol(Y)
# p <- ncol(X)
beta <- matrix(beta, nrow=p, ncol=K-1)
# H <- hessianMultinomialCpp(X=X, b=beta, y=Y[,-1, drop=FALSE], p=p,k=K)
H <- hessianMultinomialCpp(X=X, b=beta, y=Y, p=p,k=K)
-H
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.