knitr::opts_chunk$set(echo = TRUE)
# Function `writeLines()` is similar to `print()` but allows for line breaks
writeLines("Are these the shadows of the things that Will be,
           or are they shadows of the things that May be only?")
library(dplyr) # A Grammar of Data Manipulation
library(evd) # Functions for Extreme Value Distributions 
library(ggplot2) # Create Elegant Data Visualisations Using the Grammar of Graphics
library(kableExtra) # Construct Complex Table with 'kable' and Pipe Syntax 
library(mlogit) # Multinomial Logit Models
library(tidyr) # Tidy Messy Data
data("Heating")
Proportion <- Heating %>% 
  # Group the rows by the value of `depvar`
  group_by(depvar) %>%
  # Count the number of cases of each outcome in `depvar`
  summarise(no_rows = n())

df <- data.frame(System = c("Gas Central", 
                            "Gas Room",
                            "Electric Central", 
                            "Electric Room", 
                            "Heat Pump"),
                 Installation = c(median(Heating$ic.gc), 
                                  median(Heating$ic.gr),
                                  median(Heating$ic.ec), 
                                  median(Heating$ic.er), 
                                  median(Heating$ic.hp)),
                 Operation = c(median(Heating$oc.gc),
                               median(Heating$oc.gr), 
                               median(Heating$oc.ec), 
                               median(Heating$oc.er), 
                               median(Heating$oc.hp)),
                 Proportion = Proportion$no_rows/900)

df %>%
  kable() %>%
  kable_styling()
Heating %>%
  # Select columns 13 to 16
  select(13:16) %>%
  summary()
H <- Heating %>%
  mlogit.data(shape = "wide", 
              # Name of column with the choices
              choice = "depvar", 
              # Numbers of columns with attributes that vary by alternative
              varying = c(3:12))
model1 <- mlogit(depvar ~ ic, 
                 Heating, 
                 shape = "wide", 
                 choice = "depvar", 
                 reflevel = "ec", 
                 varying = c(3:7))

stargazer::stargazer(model1, 
                     type = "latex",
                     header = FALSE,
                     single.row = TRUE,
                     title = "Estimation results: Model 1 (wide data as input)")
model1 <- mlogit(depvar ~ ic,
                 H, 
                 reflevel = "ec")

stargazer::stargazer(model1, 
                     type = "latex",
                     header = FALSE,
                     single.row = TRUE,
                     title = "Estimation results: Model 1 (long data as input)")

```rThe logit probability is not linear on the variables."}

Create a data frame for plotting

df <- data.frame(x =seq(from = -5, to = 5, by = 0.01)) %>% mutate(y = plogis(x))

Plot

logit_plot <- ggplot(data = df, aes(x, y)) + geom_line(color = "orange") + # Plot cumulative distribution function ylim(c(0, 1)) + # Set the limits of the y axis geom_hline(yintercept = 0) + # Add y axis geom_vline(xintercept = 0) # Add x axis

logit_plot + xlab(expression(paste(V[j], " - ", V[i], sep=""))) + # Label the x axis ylab(expression(paste(P[j]))) + # Label the y axis annotate("segment", x = -3.75, xend = -2.5, y = 0.024, yend = 0.024, colour = "blue", linetype = "solid") + annotate("segment", x = -2.5, xend = -2.5, y = 0.024, yend = 0.075, colour = "blue", linetype = "solid") + annotate("segment", x = 0, xend = 1.25, y = 0.5, yend = 0.5, colour = "red", linetype = "dashed") + annotate("segment", x = 1.25, xend = 1.25, y = 0.5, yend = 0.77, colour = "red", linetype = "dashed")

```r
ic_min <- Heating[1,]
ic_mean <- Heating[1,]
ic_max <- Heating[1,]
min_cost <- Heating %>% 
  select(starts_with("ic")) %>% 
  summarise(across(.cols = everything(),
                   min))
mean_cost <- Heating %>% 
  select(starts_with("ic"))%>% 
  summarise(across(.cols = everything(),
                   mean))
max_cost <- Heating %>% 
  select(starts_with("ic"))%>% 
  summarise(across(.cols = everything(),
                   max))
ic_min[3:7] <- min_cost
ic_mean[3:7] <- mean_cost
ic_max[3:7] <- max_cost
head(ic_max)
p_model1_ic_min <- predict(model1, 
                           newdata = mlogit.data(ic_min, 
                                                 shape = "wide", 
                                                 choice = "depvar", 
                                                 varying = 3:7))
p_model1_ic_mean <- predict(model1, 
                            newdata = mlogit.data(ic_mean,
                                                  shape = "wide",
                                                  choice = "depvar", 
                                                  varying = 3:7))
p_model1_ic_max <- predict(model1, 
                           newdata = mlogit.data(ic_max, 
                                                 shape = "wide", 
                                                 choice = "depvar", 
                                                 varying = 3:7))
min_cost
p_model1_ic_min
data.frame(System = c("Electric Central",
                      "Electric Room",
                      "Gas Central",
                      "Gas Room",
                      "Heat Pump"),
           ic_min = ic_min %>% 
             # Select installation costs in the same order as the probabilities
             select(ic.ec, ic.er, ic.gc, ic.gr, ic.hp) %>%
             t() %>% 
             as.numeric() %>%
             round(1),
           p_min = p_model1_ic_min %>% 
             as.numeric() %>%
             round(3),
           ic_mean = ic_mean %>%
             # Select installation costs in the same order as the probabilities
             select(ic.ec, ic.er, ic.gc, ic.gr, ic.hp) %>%
             t() %>% 
             as.numeric() %>%
             round(1),
           p_mean = p_model1_ic_mean %>% 
             as.numeric() %>%
             round(3),
           ic_max = ic_max %>% 
             # Select installation costs in the same order as the probabilities
             select(ic.ec, ic.er, ic.gc, ic.gr, ic.hp) %>%
             t() %>% 
             as.numeric()  %>%
             round(1),
           p_max = p_model1_ic_max %>% 
             as.numeric() %>%
             round(3)) %>%
  kable(col.names = c("System",
                      "Cost",
                      "Probability",
                      "Cost",
                      "Probability",
                      "Cost",
                      "Probability"),
        digits = 3) %>%
  kable_styling() %>%
  add_header_above(c(" " = 1, 
                     "Minimum Cost" = 2, 
                     "Mean Cost" = 2, 
                     "Maximum Cost" = 2))
-0.00168108 * (1 - 0.617)
-0.00168108 * (1 - 0.639)
-0.00168108 * (1 - 0.672)
-(-0.00168108 * 0.617)
-(-0.00168108 * 0.639)
-(-0.00168108 * 0.672)
-0.00168108 * 431.8 * (1 - 0.617)
-0.00168108 * 776.8 * (1 - 0.639)
-0.00168108 * 1158.9 * (1 - 0.672)
-(-0.00168108 * 431.8 * 0.617)
-(-0.00168108 * 776.8 *  0.639)
-(-0.00168108 * 1158.9 * 0.672)
effects(model1,
        # Calculate the marginal effects with respect to attribute "ic"
        covariate = "ic",
        # Type of effects to compute: relative for probability, absolute for attribute 
        type = "ra", 
        data = mlogit.data(ic_min, 
                           shape = "wide", 
                           choice = "depvar", 
                           varying = 3:7))
effects(model1,
        covariate = "ic",
        type = "ra", 
        data = mlogit.data(ic_mean, 
                           shape = "wide", 
                           choice = "depvar", 
                           varying = 3:7))
effects(model1, 
        covariate = "ic", 
        type = "ra",
        data = mlogit.data(ic_max, 
                           shape = "wide",
                           choice = "depvar", 
                           varying = 3:7))
effects(model1, 
        covariate = "ic", 
        type = "rr", 
        data = mlogit.data(ic_min, 
                           shape = "wide",
                           choice = "depvar", 
                           varying = 3:7))
effects(model1, 
        covariate = "ic", 
        type = "rr", 
        data = mlogit.data(ic_mean, 
                           shape = "wide", 
                           choice = "depvar", 
                           varying = 3:7))
effects(model1, 
        covariate = "ic", 
        type = "rr", 
        data = mlogit.data(ic_max, 
                           shape = "wide", 
                           choice = "depvar", 
                           varying = 3:7))
summary(Heating$region)
model2 <- mlogit(depvar ~ ic | region, 
                 Heating, 
                 shape = "wide", 
                 choice = "depvar", 
                 reflevel = "ec", 
                 varying = c(3:7))

stargazer::stargazer(model2, 
                     header = FALSE,
                     single.row = TRUE,
                     title = "Estimation results: Model 2")
ic_mean_region <- ic_mean %>% 
  mutate(count = 4) %>% 
  uncount(count)
ic_mean_region$region <- c("valley", 
                           "scostl",
                           "mountn", 
                           "ncostl")
head(ic_mean_region)
p_region_ic_mean <- data.frame(Region = c("valley", 
                                          "scostl", 
                                          "mountn", 
                                          "ncostl"),
                               predict(model2, 
                                       newdata = mlogit.data(ic_mean_region,
                                                             shape = "wide",
                                                             choice = "depvar", 
                                                             varying = 3:7), 
                                       outcome = FALSE))
p_region_ic_mean
data.frame (Effect = c("valley to scostl", 
                       "valley to mountn",
                       "valley to ncostl"),
            rbind (p_region_ic_mean[2, 2:6] - p_region_ic_mean[1, 2:6],
                   p_region_ic_mean[3, 2:6] - p_region_ic_mean[1, 2:6],
                   p_region_ic_mean[4, 2:6] - p_region_ic_mean[1, 2:6]))
model3 <- mlogit(depvar ~ ic + oc, 
                 Heating, 
                 shape = "wide", 
                 choice = "depvar", 
                 reflevel = "ec", 
                 varying = c(3:12))

stargazer::stargazer(model3, 
                     header = FALSE,
                     single.row = TRUE,
                     title = "Estimation results: Model 3")
H_rebate <- Heating %>% 
  mutate(ic.hp = 0.85 * ic.hp)
data.frame(Policy = c("Do nothing", "15% rebate"),
           rbind(apply(predict(model3, 
                               newdata = mlogit.data(Heating, 
                                                     shape = "wide", 
                                                     choice = "depvar", 
                                                     varying = c(3:12))),
                       2,
                       mean),
                 apply(predict(model3, 
                               newdata = mlogit.data(H_rebate,
                                                     shape = "wide",
                                                     choice = "depvar",
                                                     varying = c(3:12))),
                       2,
                       mean)))
X <- model.matrix(model3)
head(X)
alt <- index(H)$alt
Xn <- X[alt == "ec",]
head(Xn)
Xn[, "ic"] <- Xn[, "ic"] + 200
Xn[, "oc"] <- Xn[, "oc"] * 0.75
head(Xn)
chid <- index(H)$chid
head(chid, 12)
unchid <- unique(index(H)$chid)
head(unchid, 12)
rownames(Xn) <- paste(unchid, 'new', sep = ".")
chidb <- c(chid, unchid)
head(Xn)
X <- rbind(X, Xn)
X <- X[order(chidb), ]
head(X,15)
exp_Xb <- as.numeric(exp(X %*% coef(model3))) # vectors
head(exp_Xb)
# tapply does the sum of th exp_Xb for each chidb
sum_exp_Xb <- as.numeric(tapply(exp_Xb, 
                                sort(chidb),
                                sum)) 
P <- exp_Xb / sum_exp_Xb[sort(chidb)]
P <- data.frame(matrix(P, 
                       ncol = 6, 
                       byrow = TRUE))
P <- transmute(P,   
               ec = P[, 1],
               er = P[, 2], 
               gc = P[, 3], 
               gr = P[, 4],
               hp = P[, 5], 
               new = P[, 6])
summary(rowSums(P))
apply(P, 2, mean)
apply(fitted(model3,
             outcome = FALSE), 
      2, 
      mean)
p_model3 <- fitted(model3,
                   outcome = FALSE) %>%
  data.frame()
apply(p_model3, 
      2, 
      mean)
o_model3 <- p_model3 %>%
  # Group by row
  rowwise() %>%
  # Find the maximum value by row
  mutate(max_p = max(c(ec, er, gc, gr, hp))) %>%
  ungroup() %>%
  # Find the column that matches the highest probability
  transmute(outcome = case_when(max_p == ec ~ "ec",
                             max_p == er ~ "er",
                             max_p == gc ~ "gc",
                             max_p == gr ~ "gr",
                             max_p == hp ~ "hp"))
table(o_model3)


paezha/discrtr documentation built on March 1, 2023, 5:25 p.m.