In paezha/discrtr: A Companion Package for the Book "Discrete Choice Analysis with 'R'"

knitr::opts_chunk$set(echo = TRUE)

print("Too late to cry, too soon to laugh...")

library(discrtr) # A companion package for the book Introduction to Discrete Choice Analysis with `R` 
library(dplyr) # A Grammar of Data Manipulation
library(ggplot2) # Create Elegant Data Visualisations Using the Grammar of Graphics
library(ggridges) # Ridgeline Plots in 'ggplot2'
library(kableExtra) # Construct Complex Table with 'kable' and Pipe Syntax
library(mvord) # Multivariate Ordinal Regression Models
library(ordinal) # Regression Models for Ordinal Data
library(plyr) # Tools for Splitting, Applying and Combining Data
library(tidyr) # Tidy Messy Data

data("mc_attitudes",
     package = "discrtr")

mc_attitudes %>%
  select(Community:Travel_Alone) %>%
  summary()

df <- data.frame(x = rep(seq(-6, 6, 0.1))) %>%
  mutate(f = dlogis(x, 
                    location = 0,
                    scale = 1))

ggplot() +
  geom_area(data = df,
            aes(x = x, 
                y = f),
            color = "black",
            fill = "orange",
            alpha = 0.5)

df <- data.frame(x = rep(seq(-6, 6, 0.1))) %>%
  mutate(f = dlogis(x, 
                    location = 0,
                    scale = 1))

ggplot() +
  geom_area(data = df,
            aes(x = x, 
                y = f),
            color = "black",
            fill = "orange",
            alpha = 0.5) +
  geom_area(data = df %>%
              filter(x <= -3),
            aes(x = x, 
                y = f),
            color = "black",
            fill = "orange",
            alpha = 1) + 
  geom_vline(xintercept = -3)

ggplot() +
  geom_area(data = df,
            aes(x = x, 
                y = f),
            color = "black",
            fill = "orange",
            alpha = 0.5) +
  geom_area(data = df %>%
              filter(x >= -3 & x <= -0.5),
            aes(x = x, 
                y = f),
            color = "black",
            fill = "orange",
            alpha = 1) + 
  geom_vline(xintercept = c(-3, -0.5))

df <- data.frame(x = rep(seq(-6, 6, 0.1))) %>%
  mutate(f = dlogis(x, 
                    location = 0))

lj <- c(-2, -0.5, 1.5, 4)

ggplot() +
  geom_area(data = df,
            aes(x = x, 
                y = f),
            color = "black",
            fill = "orange",
            alpha = 0.5) +
  geom_area(data = df %>%
              filter(x >= lj[3] & x <= lj[4]),
            aes(x = x, 
                y = f),
            color = NA,
            fill = "orange",
            alpha = 1) +
  geom_vline(xintercept = lj) +
  geom_text(aes(label = c("SD", "D", "N", "A", "SA"),
                x = c(-3, lj + 0.25)), 
            y = 0.0075) +
  scale_x_continuous(breaks = lj, 
                     labels = expression(lambda[1], 
                                         lambda[2], 
                                         lambda[3], 
                                         lambda[4]))

mod_community_clm <- clm(Community ~ age + vehicle + 
                           Rate_Non_Canadian + Rate_Public, 
                         data = mc_attitudes)

summary(mod_community_clm)

mod_community_clm$convergence

# Coefficients of the utility function
beta_n <- mod_community_clm$coefficients[5:8]

# Thresholds
l_j <- mod_community_clm$coefficients[1:4]

# Profile 1: Age = 21, No vehicle, other variables at zero
p1 <- c(21, 
        0, 
        0,
        0)

# Age = 21, vehicle, other variables at zero
p2 <- c(21, 
        1, 
        0,
        0)

V_1 <- beta_n %*% p1
V_2 <- beta_n %*% p2

# Calculate the values of the logistic distribution for profile 1 in the range defined by x
dV_1 <- data.frame(x = rep(seq(-2.9, 4.4, 0.1)), 
                   profile = "Profile 1") %>%
  mutate(f = dlogis(x, 
                    location = V_1))

# Calculate the values of the logistic distribution for profile 2 in the range defined by x
dV_2 <- data.frame(x = rep(seq(-2.9, 4.4, 0.1)), 
                   profile = "Profile 2") %>%
  mutate(f = dlogis(x, 
                    location = V_2))

# Bind the distributions for the two profiles
dV <- rbind(dV_1,
            dV_2)

# Create a base plot to compare distributions
base_plot <- ggplot() +
  # Plot the thresholds
  geom_vline(xintercept = mod_community_clm$coefficients[1:4]) +
  # Label the responses
  geom_text(aes(label = c("SD", "D", "N", "A", "SA"),
                x = c(-3, l_j + 0.25)), 
            y = 0.75) +
  # Label the ticks in the x axis with the threshold coefficients
  scale_x_continuous(name = "x",
                     breaks = l_j, 
                     labels = expression(lambda[1], 
                                         lambda[2], 
                                         lambda[3], 
                                         lambda[4]))

```rComparing the distribution of hypothetical profiles 1 and 2"}

Render the plot and annotate the effect of `vehicle`

base_plot + # Plot the distributions as ridges geom_ridgeline(data = dV, aes(x = x, y = profile, fill = profile, group = profile, height = f), alpha = 0.5) + # Mark the center of the two distributions geom_vline(data = data.frame(profile = c("Profile 1", "Profile 2"), x = c(V_1, V_2)), aes(xintercept = x, color = profile, group = profile), linetype = "dashed") + # Annotate the shift in the location of the distribution due to # having access to a vehicle annotate("segment", x = V_1 + 0.3, xend = V_1, y = 1.75, yend = 1.75, arrow = arrow(), color = "black") + annotate("segment", x = V_2 - 0.3, xend = V_2, y = 1.75, yend = 1.75, arrow = arrow(), color = "black")+ annotate("text", parse = TRUE, label = "beta[v]", x = V_1 - 0.15, y = 1.75, color = "black")

```r
profiles <- data.frame(profile = c("Profile 1",
                                   "Profile 2"),
                       age = 21, 
                       vehicle = c("No", "Yes"),
                       Rate_Non_Canadian = 0,
                       Rate_Public = 0)

profiles <- cbind(profiles,
                  predict(mod_community_clm, 
                          newdata = profiles))

profiles %>%
  select(-c(age, vehicle, Rate_Non_Canadian, Rate_Public))

profiles %>%
  select(-c(age, vehicle, Rate_Non_Canadian, Rate_Public)) %>%
  # Pivot longer to gather all probabilities in a sigle column
  pivot_longer(cols = -profile,
               names_to = "Response",
               values_to = "probability") %>%
  # Code the response in order
  mutate(Response = factor(Response,
                           levels = c("fit.STRONGLY DISAGREE",
                                      "fit.DISAGREE",
                                      "fit.NEUTRAL",
                                      "fit.AGREE",
                                      "fit.STRONGLY AGREE"),
                           labels = c("SD", "D", "N", "A", "SA"),
                           ordered = TRUE)) %>%
  ggplot() +
  geom_col(aes(x = Response,
               y = probability,
               fill = profile),
           color = "black",
           position = "dodge") +
  theme(axis.text.x = element_text(angle = 90))

# Define profile 3
p3 <- c(21, 
        1,
        0,
        median(mc_attitudes$Rate_Public))

# Calculate the utility
V_3 <- beta_n %*% p3

# Calculate the values of the logistic distribution for profile 2 in the range defined by x
dV_3 <- data.frame(x = rep(seq(-2.9, 4.4, 0.1)), 
                   profile = "Profile 3") %>%
  mutate(f = dlogis(x, 
                    location = V_3))

# Bind the distributions for the two profiles
dV <- rbind(dV,
            dV_3)

```rComparing the distribution of hypothetical profiles 1, 2, and 3"}

Render the plot and add the distributions

base_plot + # Plot the distributions as ridges geom_ridgeline(data = dV, aes(x = x, y = profile, fill = profile, group = profile, height = f), alpha = 0.5) + # Mark the center of the three distributions geom_vline(data = data.frame(profile = c("Profile 1", "Profile 2", "Profile 3"), x = c(V_1, V_2, V_3)), aes(xintercept = x, color = profile, group = profile), linetype = "dashed")

```r
mod_community_clm$coefficients["Rate_Public"] * median(mc_attitudes$Rate_Public)

profiles <- data.frame(profile = c("Profile 1",
                                   "Profile 2",
                                   "Profile 3"),
                       age = 21, 
                       vehicle = c("No", 
                                   "Yes", 
                                   "Yes"),
                       Rate_Non_Canadian = 0,
                       Rate_Public = c(0, 
                                       0,
                                       median(mc_attitudes$Rate_Public)))

profiles <- cbind(profiles,
                  predict(mod_community_clm, 
                          newdata = profiles))

# Prepare data for plotting
profiles %>%
  select(-c(age, vehicle, Rate_Non_Canadian, Rate_Public)) %>%
  # Pivot longer to gather all probabilities in a single column
  pivot_longer(cols = -profile,
               names_to = "Response",
               values_to = "probability") %>%
  # Code the response in order
  mutate(Response = factor(Response,
                           levels = c("fit.STRONGLY DISAGREE",
                                      "fit.DISAGREE",
                                      "fit.NEUTRAL",
                                      "fit.AGREE",
                                      "fit.STRONGLY AGREE"),
                           labels = c("SD", "D", "N", "A", "SA"),
                           ordered = TRUE)) %>%
  # Plot
  ggplot() +
  geom_col(aes(x = Response,
               y = probability,
               fill = profile),
           color = "black",
           position = "dodge") +
  theme(axis.text.x = element_text(angle = 90))

mod_community_flex_l <- clm(Community ~ age + vehicle +
                              Rate_Non_Canadian + Rate_Public,
                            nominal = ~ gender, 
                            data = mc_attitudes)

summary(mod_community_flex_l)

```rModel with parameterized thresholds for two versions of hypothetical profile 1: woman and man"}

Coefficients of the utility function

beta_n <- mod_community_flex_l$coefficients[9:12]

Thresholds

l_j <- mod_community_flex_l$coefficients[1:8]

Profile 1: Age = 21, No vehicle, other variables at zero

p1 <- c(21, 0, 0, 0)

Calculate the utility of these two hypothetical individuals

V_1 <- beta_n %*% p1

Calculate the values of the logistic distribution for profile 1 in the range defined by x

dV_1 <- data.frame(x = rep(seq(-3.2, 4.7, 0.1))) %>% mutate(f = dlogis(x, location = V_1))

ggplot() + # Plot the distribution geom_area(data = dV_1, aes(x = x, y = f), color = "black", fill = "orange", alpha = 0.5) + # Plot the thresholds geom_vline(data = data.frame(profile = rep(c("Man", "Woman"), each = 4), l_j = c(l_j[1:4], l_j[1:4] + l_j[5:8])), aes(xintercept = l_j, color = profile)) + scale_x_continuous(name = "x", breaks = c(l_j[1:4], l_j[1:4] + l_j[5:8]), labels = expression(lambda[1][m], lambda[2][m], lambda[3][m], lambda[4][m], lambda[1][w], lambda[2][w], lambda[3][w], lambda[4][w])) + theme(axis.text.x = element_text(angle = 90))

```r
mod_community_ncs <- clm(Community ~ age + vehicle +
                           Rate_Non_Canadian +
                           Rate_Public,
                         scale = ~ visa, 
                         data = mc_attitudes)

summary(mod_community_ncs)

exp(mod_community_ncs$coefficients[9])

```rModel with parameterized scale for two versions of hypothetical profile 4: domestic and international student"}

Coefficients of the utility function

beta_n <- mod_community_ncs$coefficients[5:8]

Thresholds

l_j <- mod_community_ncs$coefficients[1:4]

Scale

s_I <- exp(mod_community_ncs$coefficients[9])

Profile 4: Age = 21, No vehicle, other variables at zero

proportion of non-Canadian residents is the in-sample median

p1 <- c(21, 0, median(mc_attitudes$Rate_Non_Canadian), 0)

Calculate the utility of these two hypothetical individuals

V_1 <- beta_n %*% p1

Calculate the values of the logistic distribution for profile 1

when student is Domestic

dV_D <- data.frame(x = rep(seq(-3.2, 4.7, 0.1)), profile = "Domestic") %>% mutate(f = dlogis(x, location = V_1))

Calculate the values of the logistic distribution for profile 1

when student is International

dV_I <- data.frame(x = rep(seq(-3.2, 4.7, 0.1)), profile = "International") %>% mutate(f = dlogis(x, location = V_1, scale = s_I))

Bind the distributions for the two profiles

dV <- rbind(dV_D, dV_I)

Render the plot and add the distributions

ggplot() + # Plot the distributions as ridges geom_ridgeline(data = dV, aes(x = x, y = profile, fill = profile, group = profile, height = f), alpha = 0.5) + # Plot the thresholds geom_vline(xintercept = mod_community_ncs$coefficients[1:4]) + # Label the responses geom_text(aes(label = c("SD", "D", "N", "A", "SA"), x = c(-3, l_j + 0.25)), y = 0.75) + # Label the ticks in the x axis with the threshold coefficients scale_x_continuous(name = "x", breaks = l_j, labels = expression(lambda[1], lambda[2], lambda[3], lambda[4]))

```r
mod_bivariate <- mvord(formula = MMO2(Community, Neighbors) ~ 0 + age + vehicle +
                         Rate_Non_Canadian +
                         Rate_Public, 
                       link = mvlogit(df = 8L),
                       # {mvord} does not like tbl or tbl_df objects:
                       # convert to plain data.frame
                       data = data.frame(mc_attitudes))
summary(mod_bivariate)

# Select an individual in the sample to fit the probabilities
n_ind <- 1

# Create a grid of values for chosen individual
fit_grid_ind <- expand.grid(Community = c("STRONGLY DISAGREE", 
                                          "DISAGREE", 
                                          "NEUTRAL", 
                                          "AGREE", 
                                          "STRONGLY AGREE"),
                            Neighbors = c("STRONGLY DISAGREE", 
                                          "DISAGREE", 
                                          "NEUTRAL", 
                                          "AGREE", 
                                          "STRONGLY AGREE"),
                            # Retrieve the attributes of the n_ind record
                            # in the table
                            age = mc_attitudes$age[n_ind],
                            vehicle = mc_attitudes$vehicle[n_ind],
                            Rate_Non_Canadian = mc_attitudes$Rate_Non_Canadian[n_ind],
                            Rate_Public = mc_attitudes$Rate_Public[n_ind])

# Join the prediction grid to the predicted probabilities
joint.probs <- data.frame(fit_grid_ind %>%
                            select(Community, Neighbors),
                          # Use `predict()` and the prediction grid
                          # to predict joint probabilities
                          joint.prob = predict(mod_bivariate, 
                                               type = "prob", 
                                               newdata = fit_grid_ind)) %>%
  # Revalue the ordinal responses for ease of presentation
  mutate(Community = revalue(Community,
                             c("STRONGLY DISAGREE" = "Community_SD",
                               "DISAGREE" = "Community_D",
                               "NEUTRAL" = "Community_N",
                               "AGREE" = "Community_A",
                               "STRONGLY AGREE" = "Community_SA")),
         Neighbors = revalue(Neighbors,
                             c("STRONGLY DISAGREE" = "Neighbors_SD",
                               "DISAGREE" = "Neighbors_D",
                               "NEUTRAL" = "Neighbors_N",
                               "AGREE" = "Neighbors_A",
                               "STRONGLY AGREE" = "Neighbors_SA"))) %>%
  pivot_wider(names_from = Neighbors, values_from = joint.prob)

joint.probs %>%
  kable("latex",
        digits = 5,
        caption = "\\label{tab:joint-probs-in-sample-individuals}
        Fitted joint probabilities for in-sample individual")

joint.probs %>% 
  select(-Community) %>% 
  sum()

pred_grid <- expand.grid(Community = c("STRONGLY DISAGREE", 
                                       "DISAGREE", 
                                       "NEUTRAL", 
                                       "AGREE", 
                                       "STRONGLY AGREE"),
                         Neighbors = c("STRONGLY DISAGREE", 
                                       "DISAGREE", 
                                       "NEUTRAL", 
                                       "AGREE", 
                                       "STRONGLY AGREE"),
                         # Age = 20 is the value for the first quartile in the sample
                         # and age = 23 is the value for the third quartile
                         age = c(20, 23), 
                         vehicle = levels(mc_attitudes$vehicle),
                         Rate_Non_Canadian = median(mc_attitudes$Rate_Non_Canadian),
                         Rate_Public = median(mc_attitudes$Rate_Public))

# Create a data frame with the prediction grid and 
# the predicted joint probabilities
pred_grid <- data.frame(pred_grid, 
                        joint.prob = predict(mod_bivariate, 
                                             type = "prob", 
                                             newdata = pred_grid)) %>%
  # Convert to factors and revalue for presentation
  mutate(age = factor(age,
                      levels = c(20, 23),
                      labels = c("Age = 20",
                                 "Age = 23")),
         vehicle = revalue(vehicle,
                           c("Yes" = "Vehicle", 
                             "No" = "No Vehicle")),
         Community = revalue(Community,
                             c("STRONGLY DISAGREE" = "SD", 
                               "DISAGREE" = "D", 
                               "NEUTRAL" = "N", 
                               "AGREE" = "A", 
                               "STRONGLY AGREE" = "SA")),
         Neighbors = revalue(Neighbors,
                             c("STRONGLY DISAGREE" = "SD", 
                               "DISAGREE" = "D", 
                               "NEUTRAL" = "N", 
                               "AGREE" = "A", 
                               "STRONGLY AGREE" = "SA")))

pred_grid %>%
  group_by(age, vehicle) %>%
  summarize(prob = sum(joint.prob),
            .groups = "drop")

pred_grid %>%  
  ggplot(aes(x = Community, y = Neighbors)) + 
  geom_tile(aes(fill = joint.prob)) +
  scale_fill_gradient(name = "joint probability",
                      low="white", 
                      high="black") +
  coord_equal() + 
  theme(legend.position = "bottom") +
  facet_grid(vehicle ~ age)

``rPlot of odds ratios by age for No Vehicle/Vehicle status"} pred_grid %>% # Pivot the table to create columns of joint probabilities # forNo VehicleandVehiclepivot_wider(names_from = vehicle, values_from = joint.prob) %>% # Calculate the ratio of the joint probability for the two # profiles mutate(or =No Vehicle`/Vehicle) %>% # Plot as tiles ggplot(aes(x = Community, y = Neighbors)) + geom_tile(aes(fill = or)) + # Use a divergent fill scale with midpoint 1 scale_fill_gradient2(name = "odds ratio (No Vehicle/Vehicle)", midpoint = 1) + coord_equal() + theme(legend.position = "bottom", legend.text = element_text(angle = 90)) + # Facet by age facet_grid(~ age)

```rPlot of odds ratios by age for Age 23/Age 20"}
pred_grid %>% 
  pivot_wider(names_from = age, values_from = joint.prob) %>%
  mutate(or = `Age = 23`/`Age = 20`) %>%
  ggplot(aes(x = Community, y = Neighbors)) + 
  geom_tile(aes(fill = or)) +
  scale_fill_gradient2(name = "odds ratio (Age 23/Age 20)",
                       midpoint = 1) +
  coord_equal() + 
  theme(legend.position = "bottom",
        legend.text = element_text(angle = 90)) +
  facet_grid(~ vehicle)

data("mc_modality",
      package = "discrtr")

paezha/discrtr documentation built on March 1, 2023, 5:25 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

paezha/discrtr
A Companion Package for the Book "Discrete Choice Analysis with 'R'"

In paezha/discrtr: A Companion Package for the Book "Discrete Choice Analysis with 'R'"

Render the plot and annotate the effect of `vehicle`

Render the plot and add the distributions

Coefficients of the utility function

Thresholds

Profile 1: Age = 21, No vehicle, other variables at zero

Calculate the utility of these two hypothetical individuals

Calculate the values of the logistic distribution for profile 1 in the range defined by x

Coefficients of the utility function

Thresholds

Scale

Profile 4: Age = 21, No vehicle, other variables at zero

proportion of non-Canadian residents is the in-sample median

Calculate the utility of these two hypothetical individuals

Calculate the values of the logistic distribution for profile 1

when student is Domestic

Calculate the values of the logistic distribution for profile 1

when student is International

Bind the distributions for the two profiles

Render the plot and add the distributions

R Package Documentation

Browse R Packages

We want your feedback!

paezha/discrtr A Companion Package for the Book "Discrete Choice Analysis with 'R'"

In paezha/discrtr: A Companion Package for the Book "Discrete Choice Analysis with 'R'"

Render the plot and annotate the effect of vehicle

Render the plot and add the distributions

Coefficients of the utility function

Thresholds

Profile 1: Age = 21, No vehicle, other variables at zero

Calculate the utility of these two hypothetical individuals

Calculate the values of the logistic distribution for profile 1 in the range defined by x

Coefficients of the utility function

Thresholds

Scale

Profile 4: Age = 21, No vehicle, other variables at zero

proportion of non-Canadian residents is the in-sample median

Calculate the utility of these two hypothetical individuals

Calculate the values of the logistic distribution for profile 1

when student is Domestic

Calculate the values of the logistic distribution for profile 1

when student is International

Bind the distributions for the two profiles

Render the plot and add the distributions

R Package Documentation

Browse R Packages

We want your feedback!

paezha/discrtr
A Companion Package for the Book "Discrete Choice Analysis with 'R'"

Render the plot and annotate the effect of `vehicle`