Code in Chapter 09: Dealing with Heterogeneity I: The Latent Class Logit Model

knitr::opts_chunk$set(echo = TRUE)
print("Ignorance is the foundation of absolute power")
library(dplyr) # A Grammar of Data Manipulation
library(ggplot2) # Create Elegant Data Visualisations Using the Grammar of Graphics
library(gmnl) # Multinomial Logit Models with Random Parameters 
library(gridExtra) # Miscellaneous Functions for "Grid" Graphics 
library(kableExtra) # Construct Complex Table with 'kable' and Pipe Syntax 
library(mlogit) # Multinomial Logit Models 
library(stargazer) # Well-Formatted Regression and Summary Statistics Tables 
library(tibble) # Simple Data Frames
library(tidyr) # Tidy Messy Data
data("RiskyTransport", 
     package = "mlogit")
all_available <- RiskyTransport %>% 
  group_by(chid) %>% 
  summarise(no_rows = length(chid), .groups = 'drop') %>% 
  filter(no_rows == 4) %>% select(chid)
RT <- inner_join(RiskyTransport, 
                 all_available, 
                 by = "chid") %>%
  drop_na()
df <- RT %>% 
  group_by(mode) %>% 
  summarize(proportion = sum(choice),
            `min (cost)` = min(cost), 
            `mean (cost)` = mean(cost), 
            `max(cost)` = max(cost),
            `min (risk)` = min(risk), 
            `mean (risk)` = mean(risk),
            `max (risk)` = max(risk),
            .groups = 'drop') %>%
  mutate(proportion = proportion/sum(proportion)) %>%
  column_to_rownames(var = "mode")
stargazer::stargazer(df, 
                     type ="latex",
                     rownames = TRUE,
                     summary = FALSE,
                     digits =2,
                     header = FALSE,
                     label = "tab:descriptive-statistics",
                     title = "Summary Statistics: Cost and Risk in Risky Transport Data Set")
RT <- RT %>% 
  mutate(`cost:dwage` = cost * dwage,
         `risk:dwage` = risk * dwage,
         dwage2 = dwage^2)
RT <- mlogit.data(RT, 
                   choice = "choice", 
                   shape = "long", 
                   alt.var = "mode",
                   id.var = "id",
                   chid.var = "chid")
mnl.rt0 <- mlogit(choice ~ cost + risk | 0, 
                data = RT)

stargazer::stargazer(mnl.rt0, 
                     header = FALSE,
                     single.row = TRUE,
                     title = "Estimation results: Base Multinomial Logit Model")
lc2 <- gmnl(choice ~ cost + risk | 0 | 0 | 0 | 1, 
           data = RT,
           model = 'lc', 
           Q = 2,
           panel = TRUE,
           method = "bhhh")
lc3 <- gmnl(choice ~ cost + risk | 0 | 0 | 0 | 1, 
           data = RT,
           model = 'lc', 
           Q = 3,
           panel = TRUE,
           method = "bhhh")
# Estimate a constants only model to calculate McFadden's _adjusted_ rho2

names(mnl.rt0$coefficients) <- c("class.1.cost", "class.1.risk")

mnl0.summary <- rownames_to_column(data.frame(summary(mnl.rt0)$CoefTable), "Variable") %>%
  transmute(Variable, Estimate, pval = `Pr...z..`)

lc2.summary <- rownames_to_column(data.frame(summary(lc2)$CoefTable), "Variable") %>%
  transmute(Variable, Estimate, pval = `Pr...z..`)

lc3.summary <- rownames_to_column(data.frame(summary(lc3)$CoefTable), "Variable") %>% 
  transmute(Variable, Estimate, pval = `Pr...z..`)

df <- full_join(mnl0.summary, lc2.summary, by = "Variable") %>% 
  full_join(lc3.summary, by = "Variable") %>%
  mutate(across(starts_with("Estimate"),
                ~ case_when(is.na(.x) ~ "-",
                            TRUE ~ as.character(round(.x, 3)))),
         across(starts_with("pval"),
                ~ case_when(is.na(.x) ~ "-",
                            TRUE ~ ifelse(.x < 0.0001,
                                          "<0.0001", 
                                          as.character(round(.x, 3))))))

kable(df, 
      "latex",
      booktabs= TRUE,
      col.names = c("Variable",
                    "Estimate",
                    "p-value",
                    "Estimate",
                    "p-value",
                    "Estimate",
                    "p-value"),
      caption = "\\label{tab:base-models}Base models: multinomial logit (MNL), latent class Q = 2 (LC2), latent class Q = 3 (LC3)") %>%
  kable_styling() %>%
  add_header_above(c(" " = 1, "MNL" = 2, "LC2" = 2, "LC3" = 2)) %>%
  footnote(general = c(paste0("Log-Likelihood: MNL = ", round(mnl.rt0$logLik[1], digits = 3),
                            "; Latent Class (Q=2) = ", round(lc2$logLik$maximum, digits = 3),
                            "; Latent Class (Q=3) = ", round(lc3$logLik$maximum, digits = 3))))
2 * length(coef(mnl.rt0)) - 2 * mnl.rt0$logLik
2 * length(coef(lc2)) - 2 * lc2$logLik$maximum
2 * length(coef(lc3)) - 2 * lc3$logLik$maximum
as.numeric(exp(((2 * length(coef(lc3)) - 2 * lc3$logLik$maximum) - 
                  (2 * length(coef(mnl.rt0)) - 2 * mnl.rt0$logLik))/2))
as.numeric(lc3$coefficients[2]) / as.numeric(lc3$coefficients[1])
as.numeric(lc3$coefficients[4]) / as.numeric(lc3$coefficients[3])
as.numeric(lc3$coefficients[6]) / as.numeric(lc3$coefficients[5])
1/3 * as.numeric(lc3$coefficients[2]) / as.numeric(lc3$coefficients[1]) +
  1/3 * as.numeric(lc3$coefficients[4]) / as.numeric(lc3$coefficients[3]) +
  1/3 * as.numeric(lc3$coefficients[6]) / as.numeric(lc3$coefficients[5])
mnl.cov <- mlogit(choice ~ cost + risk | dwage + 0, 
                data = RT)

mnl.exp <- mlogit(choice ~ cost + cost:dwage +
                   risk + risk:dwage | 0, 
                data = RT)
mnl.cov.summary <- rownames_to_column(data.frame(summary(mnl.cov)$CoefTable), "Variable") %>%
  transmute(Variable, Estimate, pval = `Pr...z..`)

mnl.exp.summary <- rownames_to_column(data.frame(summary(mnl.exp)$CoefTable), "Variable") %>%
  transmute(Variable, Estimate, pval = `Pr...z..`)

df <- full_join(mnl.cov.summary,
                mnl.exp.summary, by = "Variable") %>%
  mutate(across(starts_with("Estimate"),
                ~ case_when(is.na(.x) ~ "-",
                            TRUE ~ as.character(round(.x, 3)))),
         across(starts_with("pval"),
                ~ case_when(is.na(.x) ~ "-",
                            TRUE ~ ifelse(.x < 0.0001,
                                          "<0.0001", 
                                          as.character(round(.x, 3))))))

kable(df, 
      "latex",
      booktabs = TRUE,
      col.names = c("Variable",
                    "Estimate",
                    "p-value",
                    "Estimate",
                    "p-value"),
      caption = "\\label{tab:model-compared}Models: multinomial logit with covariates (MNL-COV) and multinomial logit with expanded coefficients (MNL-EXP)") %>%
  kable_styling() %>%
  add_header_above(c(" " = 1, "MNL-COV" = 2, "MNL-EXP" = 2)) %>%
  footnote(general = c(paste0("Log-Likelihood: MNL-COV = ", round(mnl.cov$logLik[1], digits = 3),
                            "; MNL-EXP = ", round(mnl.exp$logLik[1], digits = 3))))
as.numeric(2 * length(coef(mnl.cov)) - 2 * mnl.cov$logLik)
as.numeric(2 * length(coef(mnl.exp)) - 2 * mnl.exp$logLik)
as.numeric(exp(((2 * length(coef(lc3)) - 2 * lc3$logLik$maximum) - 
                  (2 * length(coef(mnl.cov)) - 2 * mnl.cov$logLik))/2))
lc2.cov <- gmnl(choice ~ cost + risk | 0 | 0 | 0 | dwage, 
           data = RT,
           model = 'lc', 
           Q = 2,
           panel = TRUE,
           method = "nm",
           iterlim = 1200)
summary(lc2.cov)
as.numeric(2 * length(coef(lc2.cov)) - 2 * lc2.cov$logLik$maximum)
as.numeric(2 * length(coef(lc3)) - 2 * lc3$logLik$maximum)
as.numeric(exp(((2 * length(coef(lc3)) - 2 * lc3$logLik$maximum) - 
                  (2 * length(coef(lc2.cov)) - 2 * lc2.cov$logLik$maximum))/2))
#Create a data frame for plotting:
df <- data.frame(dwage = seq(min(RT$dwage), 
                             to = max(RT$dwage),
                             by = (max(RT$dwage) - min(RT$dwage))/100))

# Use the class-membership model to calculate the membership probabilities
df <- df %>%
  mutate(p_1 = 1 - 
           exp(coef(lc2.cov)["(class)2"] + coef(lc2.cov)["dwage:class2"] * dwage)/
           (1 + exp(coef(lc2.cov)["(class)2"] + coef(lc2.cov)["dwage:class2"] * dwage)),
         p_2 = exp(coef(lc2.cov)["(class)2"] + coef(lc2.cov)["dwage:class2"] * dwage)/
           (1 + exp(coef(lc2.cov)["(class)2"] + coef(lc2.cov)["dwage:class2"] * dwage))) %>%
  # Pivot longer to put all probabilities in a single column, and label by class
  pivot_longer(cols = -dwage,
               names_to = "Class",
               values_to = "p") %>%
  mutate(Class = ifelse(Class == "p_1",
                        "Class 1",
                        "Class 2"))
# Plot
ggplot(df, aes(x = dwage)) + 
  geom_line(aes(x = dwage,
                y = p,
                color = Class))
data("TravelMode", package = "AER")


Try the discrtr package in your browser

Any scripts or data that you put into this service are public.

discrtr documentation built on March 7, 2023, 5:36 p.m.