knitr::opts_chunk$set(
message=FALSE, warning=FALSE, paged.print=TRUE, collapse = TRUE, cache = TRUE, comment = "#>"
)
library(pack); library(catdata)  # For addiction data
library(mlogit)
devtools::load_all(".")

Multinomial case - Non ordered response

Reference standard case

data(addiction)
summary(addiction)
head(addiction)
# vignette("multinomial-addiction1")
addiction$ill<-as.factor(addiction$ill)
data1 <- addiction[,c("ill","gender","university", "age")]
data2 <- na.omit(data1)

summary(data2)

dist1 <- new(ReferenceF)
(mod1 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("intercept"), explanatory_proportional = c("NA"),
  distribution = "logistic", categories_order = c("0", "1", "2"), dataframe = data2
)) # -724.8664

(mod2 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("intercept"), explanatory_proportional = c("NA"),
  distribution = "logistic", categories_order = c("0", "2", "1"), dataframe = data2
)) 
(mod3 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("intercept"), explanatory_proportional = c("NA"),
  distribution = "logistic", categories_order = c("1", "2", "0"), dataframe = data2
)) 
(mod4 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("NA"), explanatory_proportional = c("intercept"),
  distribution = "logistic", categories_order = c("0", "1", "2"), dataframe = data2
)) 
(mod5 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("NA"), explanatory_proportional = c("intercept"),
  distribution = "logistic", categories_order = c("0", "2", "1"), dataframe = data2
))
(mod6 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("NA"), explanatory_proportional = c("intercept"),
  distribution = "logistic", categories_order = c("1", "2", "0"), dataframe = data2
)) 

(mod7 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("gender"), explanatory_proportional = c("NA"),
  distribution = "logistic", categories_order = c("0", "1", "2"), dataframe = data2
)) 
(mod8 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("gender"), explanatory_proportional = c("NA"),
  distribution = "logistic", categories_order = c("0", "2", "1"), dataframe = data2
)) 
(mod8_2 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("gender"), explanatory_proportional = c("NA"),
  distribution = "logistic", categories_order = c("2", "0", "1"), dataframe = data2
)) 
(mod9 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("gender"), explanatory_proportional = c("NA"),
  distribution = "logistic", categories_order = c("1", "2", "0"), dataframe = data2
)) 

dist_10 <- new(ReferenceF)

(mod10 <- dist_10$GLMref(
  response = "ill", explanatory_complete = c("NA"), explanatory_proportional = c("gender"),
  distribution = "logistic", categories_order = c("0", "1", "2"), dataframe = data2
)) 
(mod11 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("NA"), explanatory_proportional = c("intercept", "gender", "university"),
  distribution = "logistic", categories_order = c("0", "1", "2"), dataframe = data2
)) 
(mod12 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("intercept", "gender", "university"), explanatory_proportional = c("NA"),
  distribution = "logistic", categories_order = c("0", "1", "2"), dataframe = data2
)) 
# NO LO DEJA EL OTRO
(mod13 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("intercept", "gender", "university"), explanatory_proportional = c("age"),
  distribution = "logistic", categories_order = c("1", "2", "0"), dataframe = data2
)) 
# FUNCIONA
(mod13a <- dist1$GLMref(
  response = "ill", explanatory_complete = c("intercept", "age", "gender", "university"), explanatory_proportional = c("NA"),
  distribution = "logistic", categories_order = c("1", "2", "0"), dataframe = data2
)) 
(mod13a_1 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("intercept", "age", "gender", "university"), explanatory_proportional = c("NA"),
  distribution = "logistic", categories_order = c("0", "2", "1"), dataframe = data2
)) 
(mod13b <- dist1$GLMref(
  response = "ill", explanatory_complete = c("gender", "university"), explanatory_proportional = c("intercept", "age"),
  distribution = "logistic", categories_order = c("1", "2", "0"), dataframe = data2
)) 

Explanatory variables as factors:

data2$gender <- as.factor(data2$gender)
data2$university <- as.factor(data2$university)
(mod7 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("gender"), explanatory_proportional = c("NA"),
  distribution = "logistic", categories_order = c("0", "1", "2"), dataframe = data2
))
(mod8 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("gender"), explanatory_proportional = c("NA"),
  distribution = "logistic", categories_order = c("0", "2", "1"), dataframe = data2
))
(mod9 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("gender"), explanatory_proportional = c("NA"),
  distribution = "logistic", categories_order = c("1", "2", "0"), dataframe = data2
))
(mod11 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("NA"), explanatory_proportional = c("intercept", "gender", "university"),
  distribution = "logistic", categories_order = c("0", "1", "2"), dataframe = data2
))
(mod12 <- dist1$GLMref(
  response = "ill", explanatory_complete = c("intercept", "gender", "university"), explanatory_proportional = c("NA"),
  distribution = "logistic", categories_order = c("0", "1", "2"), dataframe = data2
))

Category-specific

Travel Mode

Example 8.4 Tutz:

The choice of travel mode of n = 840 passengers in Australia was investigated by Greene (2003). The data are available from the R package Ecdat. The alternatives of travel mode were air, train, bus, and car, which have frequencies 0.276, 0.300, 0.142, and 0.280. Air serves as the reference category. As category-specific variables we consider travel time in vehicle (timevc) and cost, and as the global variable we consider household income (income). The estimates show that income seems to be influential for the preference of train and bus over airplane. Moreover, time in vehicle seems to matter for the preference of the travel mode. Cost turns out to be non-influential if income is in the predictor.

Load data from mlogit library:

# library(mlogit)
data(ModeChoice, package = "Ecdat")
head(ModeChoice)
travel.long <- mlogit.data(ModeChoice, choice = "mode", shape = "long", alt.levels = c("air", "train", "bus", "car"))
head(travel.long)

With PCGLM

head(travel.long)
choice <- sub(".*\\.", "", rownames(travel.long))
indv <- sub("\\..*", "", rownames(travel.long))
travel.long88 <- as.data.frame(cbind(indv, choice, travel.long))
head(travel.long88, 5)

dist7 <- new(ReferenceF)
(exp_8_3 <- dist7$GLMref_ec(
  response = "choice", actual_response = "mode",
  individuals = "indv",
  explanatory_complete = c("intercept", "hinc"),
  depend_y = c("gc", "invt"),
  distribution = "logistic", categories_order = c("train", "bus", "car", "air"), dataframe = travel.long88,
  design = "tutz"
))
Robustness of Student link function in multinomial choice models

Results from page 8: The log-likelihood obtained with the MNL is −185.91 as obtained by Louviere et al. (2000) page 157.

With PCGLM:

dist3 <- new(ReferenceF)
# (table3 <- dist3$GLMref_ec(
#   response = "choice", actual_response = "mode",
#   individuals = "indv",
#   explanatory_complete = c("intercept", "hinc", "psize"),
#   depend_y = c("gc", "ttme"),
#   distribution = "logistic", categories_order = c("air", "train", "bus", "car"), dataframe = travel.long88,
#   design = "louviere"
# ))
(table4 <- dist3$GLMref_ec(
  response = "choice", actual_response = "mode",
  individuals = "indv",
  explanatory_complete = c("intercept"),
  depend_y = c("ttme"),
  distribution = "logistic", categories_order = c("air", "train", "bus", "car"), dataframe = travel.long88,
  design = "louviere"
))
# (train_1.35 <- dist3$GLMref_ec(
#   response = "choice", actual_response = "mode",
#   individuals = "indv",
#   explanatory_complete = c("intercept", "hinc", "psize"),
#   depend_y = c("gc", "ttme"),
#   distribution = "student", categories_order = c("air", "bus", "car", "train"), dataframe = travel.long88,
#   design = "louviere"
# ))

Heating

Heating is a "horizontal" data.frame with three choice-specific variables (ic: investment cost, oc: operating cost) and some individual-specific variables (income, region, rooms)

With PCGLM:

data("Heating", package = "Ecdat")
H <- mlogit.data(Heating, shape = "wide", choice = "depvar", varying = c(3:12))
choice <- sub(".*\\.", "", rownames(H))
indv <- sub("\\..*", "", rownames(H))

dat4 <- as.data.frame(cbind(indv, choice, H))
head(dat4, 8)
dist3 <- new(ReferenceF)
(A98 <- dist3$GLMref_ec(
  response = "choice", actual_response = "depvar",
  individuals = "indv",
  explanatory_complete = c("intercept", "income"),
  depend_y = c("oc", "ic"),
  distribution = "logistic", categories_order = c("ec", "er", "gc", "gr", "hp"), dataframe = dat4,
  design = "tutz"
))


Multinomial case - Ordered response


Paper script JSS

Model Choice data just whith the selected travel option

{
  library(mlogit)
  data(ModeChoice, package = "Ecdat")
  head(ModeChoice)
  travel.long <- mlogit.data(ModeChoice, choice = "mode", shape = "long", alt.levels = c("air", "train", "bus", "car"))
  head(travel.long)
  choice <- sub(".*\\.", "", rownames(travel.long))
  indv <- sub("\\..*", "", rownames(travel.long))
  travel.long88 <- as.data.frame(cbind(indv, choice, travel.long))
}
travel_dat1 <- travel.long88[travel.long88$mode == T,]
head(travel_dat1)

Invariance under permutations

REFERENCE, LOGISTIC, COMPLETE

dist1 <- new(ReferenceF)
(l_1 <- dist1$GLMref(
  response = "choice",
  explanatory_complete = c("intercept", "hinc", "psize"),
  explanatory_proportional = c("NA"),
  distribution = "logistic",
  categories_order = c("air", "train", "bus", "car"),
  dataframe = travel_dat1
))

REFERENCE, LOGISTIC, PROPORTIONAL

(l_2 <- dist1$GLMref(
  response = "choice",
  explanatory_complete = c("NA"),
  explanatory_proportional = c("intercept", "hinc", "psize"),
  distribution = "logistic",
  categories_order = c("air", "bus", "car", "train"),
  dataframe = travel_dat1
))

REFERENCE, CAUCHIT, COMPLETE

(l_3 <- dist1$GLMref(
  response = "choice",
  explanatory_complete = c("intercept", "hinc", "psize"),
  explanatory_proportional = c("NA"),
  distribution = "cauchit",
  categories_order = c("air", "train", "bus", "car"),
  dataframe = travel_dat1
))

Then we change the reference category (bus as reference) and estimate again the three reference models:

REFERENCE, LOGISTIC, COMPLETE

(l_1prime <- dist1$GLMref(
  response = "choice",
  explanatory_complete = c("intercept", "hinc", "psize"),
  explanatory_proportional = c("NA"),
  distribution = "logistic",
  categories_order = c("air", "train", "car", "bus"),
  dataframe = travel_dat1
))

REFERENCE, LOGISTIC, PROPORTIONAL

(l_2prime <- dist1$GLMref(
  response = "choice",
  explanatory_complete = c("NA"),
  explanatory_proportional = c("intercept", "hinc", "psize"),
  distribution = "logistic",
  categories_order = c("air", "train","car", "bus"),
  dataframe = travel_dat1
))

REFERENCE, CAUCHIT, COMPLETE

(l_3prime <- dist1$GLMref(
  response = "choice",
  explanatory_complete = c("intercept", "hinc", "psize"),
  explanatory_proportional = c("NA"),
  distribution = "cauchit",
  categories_order = c("air", "train","car", "bus"),
  dataframe = travel_dat1
))

The log-likelihoods l_1 and l_1prime are equal (since the canonical model is invariant under all permutations) whereas the log-likelihoods l_2 and l_2prime are different (respectively l_3 and l_3prime).

4. Adjacent models for ordinal response

Equivalence between (adjacent, logistic, complete) and (reference, logistic, complete) models.

REFERENCE, LOGISTIC, COMPLETE

(estimation <- dist1$GLMref(
  response = "choice",
  explanatory_complete = c("intercept", "hinc", "psize"),
  explanatory_proportional = c("NA"),
  distribution = "logistic",
  categories_order = c("air", "bus", "car", "train"),
  dataframe = travel_dat1
))

ADJACENT, LOGISTIC, COMPLETE

dist2 <- new(AdjacentR)
(estimation_prime <- dist2$GLMadj(
  response = "choice",
  explanatory_complete = c("intercept", "hinc", "psize"),
  explanatory_proportional = c("NA"),
  distribution = "logistic",
  categories_order = c("air", "bus", "car", "train"),
  dataframe = travel_dat1
))

Remark that the log-likelihoods l and l_prime are equal but the parameters estimations are different

Invariance under permutations

ADJACENT, CAUCHY, COMPLETE

(estimation_1 <- dist2$GLMadj(
  response = "choice",
  explanatory_complete = c("intercept", "hinc", "psize"),
  explanatory_proportional = c("NA"),
  distribution = "cauchit",
  categories_order = c("air", "bus", "car", "train"),
  dataframe = travel_dat1
))

ADJACENT, GOMPERTZ, PROPORTIONAL

(estimation_2 <- dist2$GLMadj(
  response = "choice",
  explanatory_complete = c("NA"),
  explanatory_proportional = c("intercept", "hinc", "psize"),
  distribution = "gompertz",
  categories_order = c("air", "bus", "car", "train"),
  dataframe = travel_dat1
))

ADJACENT, CAUCHY, COMPLETE (Reverse order)

(estimation_1r <- dist2$GLMadj(
  response = "choice",
  explanatory_complete = c("intercept", "hinc", "psize"),
  explanatory_proportional = c("NA"),
  distribution = "cauchit",
  categories_order = c("train", "car", "bus", "air"),
  dataframe = travel_dat1
))

ADJACENT, GOMPERTZ, PROPORTIONAL (Reverse order)

(estimation_2r <- dist2$GLMadj(
  response = "choice",
  explanatory_complete = c("NA"),
  explanatory_proportional = c("intercept", "hinc", "psize"),
  distribution = "gompertz",
  categories_order = c("train", "car", "bus", "air"),
  dataframe = travel_dat1
))

ADJACENT, GUMBEL, PROPORTIONAL (Reverse order)

(estimation_3r <- dist2$GLMadj(
  response = "choice",
  explanatory_complete = c("NA"),
  explanatory_proportional = c("intercept", "hinc", "psize"),
  distribution = "gumbel",
  categories_order = c("train", "car", "bus", "air"),
  dataframe = travel_dat1
))

Remark that the log-likelihoods l_1 and l_1r are equal since the Cauchy distribution is symmetric whereas the log-likelihoods l_2 and l_2r are different since the Gompertz distribution is not symmetric. Moreover if the Gumbel distribution is used we the reverse order then the log-likelihoods l_2 and l_3r are equal since the Gumbel distribution is the symmetric of the Gompertz distribution. Otherwise, the parameter estimations are reversed.

5. Cumulative models for ordinal response

The equivalence between the (cumulative, Gompertz, proportional) and (sequential, Gompertz, proportional) models has been demonstrated by Läärä and Matthews (1985).



ylleonv/pack_27 documentation built on March 29, 2020, 1:02 a.m.