knitr::opts_chunk$set(echo = TRUE)

```rExample of a Conceptual Model (adapted from Fig. 15.1 in Greed, 2000)", echo=FALSE} knitr::include_graphics(rep("figures/01-Figure-1a.png"))

```rExample of a Conceptual Model (adapted from Fig. 1.2 in Ortúzar and Willumsen, 2011)", echo=FALSE}
knitr::include_graphics(rep("figures/01-Figure-1b.png"))
# Function `print()` displays the argument on the screen. 
# `print()` is a generic method in `R` which means that many types
# of objects can be printed, including a string as in the example
# below:
print("Hello, Discrete Choice Analysis!")
# Function `rm()` removes objects from the _environment_
# that is objects currently in memory. The argument list = ls()
# removes all objects currently in memory
rm(list = ls())
library(discrtr) # A companion package for the book Introduction to Discrete Choice Analysis with `R`
library(dplyr) # A Grammar of Data Manipulation
library(ggplot2) # Create Elegant Data Visualisations Using the Grammar of Graphics
library(mlogit) # Multinomial Logit Models 
library(readr) # Read Rectangular Text Data 
library(stargazer) # Well-Formatted Regression and Summary Statistics Tables
# Read a csv file data and name the object
mc_mode_choice <- read_csv(system.file("extdata", 
                                       "mc_commute.csv", 
                                       package = "discrtr"),
                           show_col_types = FALSE)
# `head()` displays the first few rows of a data object
# Indexing of the object in this example is used to display
# only columns 1 through 4
head(mc_mode_choice[,1:4]) 
# Display the summary statistics of a data object.
# Function `summary()` is also a generic method in `R` and
# can be used to display important elements of the object
summary(mc_mode_choice) 
# `stargazer()` takes as an input a data frame
stargazer(as.data.frame(mc_mode_choice[,1:5]), 
          # change the type to text, html, or latex depending on the desired output
          type = "latex", 
          header = FALSE, # do not print package version info in the output
          title = "Example of a table with summary statistics", # Title of table
          omit.summary.stat = c("N", 
                                "median"), # summary statistics to omit from output
          font.size = "small") # font size can be changed
# Indexing allows us to choose parts of a data object
# In this example, we are extracting the first row of
# column `choice` in table `mc_mode_choice` and then
# The fourth row of the same column
mc_mode_choice$choice[1] - mc_mode_choice$choice[4]
# Function `factor()` is used to convert a variable (which could be character or numeric)
# into a factor, that is, a label or category; when we want a factor to be ordered (treated
# as an ordinal variable) we specify argument ordered = TRUE. Non-ordinal variables by default
# are displayed alphabetically, but changing their order when  specifying the labels changes
# the order they are displayed _without necessarily making them ordinal_
mc_mode_choice$choice <- factor(mc_mode_choice$choice, 
                                labels = c("Cycle", 
                                           "Walk", 
                                           "HSR", 
                                           "Car"))
summary(mc_mode_choice$choice)
mc_mode_choice$choice[1] - mc_mode_choice$choice[4]
summary(mc_mode_choice$timecycle)
# Find the class of an object
class(mc_mode_choice$choice)
class(mc_mode_choice$timecycle)
mc_mode_choice[2, 2]
mc_mode_choice$choice[2]
mc_mode_choice[["choice"]][2]
mc_mode_choice[2:5, 7:8]
time.Cycle.clean <- mc_mode_choice$timecycle[mc_mode_choice$timecycle != 100000]
class(time.Cycle.clean)
summary(time.Cycle.clean)
time.Active.clean <- mc_mode_choice %>% # Pipe data frame `mc_mode_choice`
  select(c("timecycle", # Select columns from the data frame that was piped
           "timewalk")) %>% 
  filter(timecycle != 100000 & timewalk != 100000) # Filter observations that are _not_ 100000
time.Active.clean.the.hard.way <- mc_mode_choice[mc_mode_choice$timecycle != 100000 & 
                                                   mc_mode_choice$timewalk != 100000, 7:8]
summary(time.Active.clean)
summary(time.Active.clean.the.hard.way)
summary(time.Active.clean)
ggplot(data = time.Active.clean) + 
  geom_area(aes(x = timecycle),
            stat = "bin", 
            binwidth = 5, 
            fill = "blue", 
            color = "blue", 
            alpha = 0.6) +
  geom_area(aes(x = timewalk), 
            stat = "bin", 
            binwidth = 5, 
            fill = "yellow", 
            color = "yellow", 
            alpha = 0.6)
# Initialize a `ggplot` object that will use table `time.Active.clean` 
# as an input, and name it `p`
p <- ggplot(data = time.Active.clean)
# By typing the name of a ggplot object, the default 
# behavior is to render it
p
p + 
  # Add a geometric object of type area to the plot
  # Map the variable `timecycle` to the x-axis. Notice
  # that the y-axis is a calculated statistic, the count
  # of cases (returned by stat =bin), so we do not need
  # to specify it
  geom_area(aes(x = timecycle), 
            stat = "bin", 
            # The bindwidth controls the size of the bins
            # needed to count the number of cases at levels
            # of the variable mapped to the x-axis
            binwidth = 5)
p + 
  geom_area(aes(x = timecycle), 
            stat = "bin", 
            binwidth = 5, 
            # fill controls the color of the polygon
            fill = "blue", 
            # color controls the color of the perimeter
            # of the polygon or of lines more generally
            color = "black", 
            alpha = 0.6)
ggplot(data = time.Active.clean) + 
  geom_area(aes(x = timecycle),
            stat = "bin",
            binwidth = 5,
            fill = "blue", 
            color = "black", 
            alpha = 0.6) +
  # We can plot a second geometric element to the x-axis
  # using a different variable from the same table
  geom_area(aes(x = timewalk),
            stat = "bin", 
            binwidth = 5,
            fill = "yellow",
            color = "black",
            alpha = 0.6)
ggplot(data = time.Active.clean) + 
  geom_area(aes(x = timecycle),
            stat = "bin",
            binwidth = 5, 
            fill = "blue", 
            color = "black", 
            alpha = 0.6) +
  geom_area(aes(x = timewalk), 
            stat = "bin", 
            binwidth = 5, 
            fill = "yellow", 
            color = "black",
            alpha = 0.6) +
  xlab("Time (in minutes)")
# The pipe operator `%>%` takes an object and passes it on
# to the next function where it is used as the first argument
mc_mode_choice %>% 
  # `select()` retrieves columns from a data frame
  select(c("choice", "side_den")) %>%
  summary()
# Pipe the table to `ggplot()` where it is assumed to be the
# first argument of the function, i.e., data
mc_mode_choice %>%
  # Map `choice` to the x-axis and `side_den` to the y-axis
ggplot(aes(x = choice, 
           y = side_den)) + 
  # Add a geometric object of type boxplot
  geom_boxplot()
library(mlogit)
data("Mode")


paezha/discrtr documentation built on March 1, 2023, 5:25 p.m.