knitr::opts_chunk$set(echo = TRUE)
library(biscale) # Tools and Palettes for Bivariate Thematic Mapping library(cowplot) # Streamlined Plot Theme and Plot Annotations for 'ggplot2' library(discrtr) # A companion package for the book Introduction to Discrete Choice Analysis with `R` library(dplyr) # A Grammar of Data Manipulation library(ggalluvial) # Alluvial Plots in 'ggplot2' library(ggplot2) # Create Elegant Data Visualisations Using the Grammar of Graphics library(ggspatial) # Spatial Data Framework for ggplot2 library(kableExtra) # Construct Complex Table with 'kable' and Pipe Syntax library(sf) # Simple Features for R library(stargazer) # Well-Formatted Regression and Summary Statistics Tables library(treemapify) # Draw Treemaps in 'ggplot2' data("mc_commute_long")
# Convert the table to simple features and set the # coordinate referencing system to epsg 4326 for long-lat mc_commute_wide.sf <- mc_commute_wide %>% st_as_sf(coords = c("LONG", "LAT"), crs = 4326) # Filter out rural type urban_suburban <- urban_types %>% filter(Type == "Urban" | Type == "Suburban") %>% st_transform(4326) # Select observations that are in urban_suburban mc_commute_wide.sf <- mc_commute_wide.sf[urban_suburban, ] # Extract the bounding box of the spatial data bbox <- st_bbox(mc_commute_wide.sf) # Use the bounding box to crop the spatial data urban_suburban <- urban_suburban %>% st_crop(bbox) # Create a table with a bivariate classification # based on `street_density` and `sidewalk_density` data <- bi_class(mc_commute_wide.sf, x = street_density, y = sidewalk_density, style = "quantile", dim = 2)
# Pipe the table to function `select()` mc_commute_wide %>% # Select columns select(street_density, sidewalk_density, LAT, LONG, choice, gender, shared, family) %>% # Slice (filter) 8 rows at the head of the table slice_head(n = 8) %>% # Use `kable()` to tabluate kable("latex", booktabs = TRUE, digits = 3, caption = "\\label{tab:ch02-commute-table}First eight rows of selected variables in data frame") %>% kable_styling(latex_options = c("scale_down", "striped", "Hold"))
```rMultidimensional data visualization: map"}
map
map <- ggplot() +
# geom_sf()
is used to plot geographical geometric objects
# The input needs to be a simple features object
# Plot the spatial features in object urban_suburban
geom_sf(data = urban_suburban,
# Fill the polygons with color according to Type
aes(fill = Type),
alpha = 0.2) +
# Plot the spatial feature in data
geom_sf(data = data,
# Color the objects using the bivariate classification
# we created earlier
aes(color = bi_class),
size = 2.5,
show.legend = FALSE) +
# Plot the spatial features in data
with size = 2 and shape = 1
geom_sf(data = data,
size = 2,
shape = 1) +
# Select color palette for bivariate scale
bi_scale_color(pal = "DkViolet",
dim = 2) +
# Select color palette for filling polygons
scale_fill_brewer(palette = "Dark2") +
annotation_scale(pad_x = unit(10.5, "cm"),
pad_y = unit(5, "cm")) +
# Select theme for plot
bi_theme() +
# Modify the theme
theme(legend.position = "bottom",
legend.justification = "left",
legend.text = element_text(size = 9),
legend.title = element_blank())
legend <- bi_legend(pal = "DkViolet", dim = 2, xlab = "Higher Street Den ", ylab = "Higher Sidewalk Den ", size = 5)
ggdraw() + draw_plot(map, 0, 0, 1, 1) + draw_plot(legend, 0.65, 0.1, 0.3, 0.3)
```r # Convert data to alluvia: pipe `mc_commute_wide` to `mutate()` mc_commute_alluvia <- mc_commute_wide %>% # Code the living arrangements mutate(living_arrangements = case_when(shared == "Living in Shared Accommodations" ~ "Shared", family == "Living with Family" ~ "Family", TRUE ~ "Other")) %>% # Select three variables select(gender, living_arrangements, choice) %>% # Use `group_by()` to summarize the count of cases group_by(gender, living_arrangements, choice) %>% # Summarize the number of cases summarize(frequency = n(), .groups = "drop")
# Pipe table to `kable()` for tabulation mc_commute_alluvia %>% kable("latex", booktabs = TRUE, caption = "\\label{tab:ch02-commute-table-frequency}Table of frequencies by three categorical variables") %>% kable_styling(latex_options = c("striped", "Hold"))
```rMultidimensional data visualization: alluvial plot"}
mc_commute_alluvia
ggplot(data = mc_commute_alluvia, # Map the frequency to the y-axis aes(y = frequency, # Specify the variables to map to the x-axis (the stratum) axis1 = gender, axis2 = living_arrangements, axis4 = choice)) + # Add geometric objects of type alluvium geom_alluvium(aes(fill = gender), width = 1/12, color = "black") + # Add geometric objects of type stratum geom_stratum(width = 1/3, fill = "black", color = "grey") + # Lable the stratum geom_text(stat = "stratum", aes(label = after_stat(stratum)), color = "white", size = 3) + # Specify parameters for the scale in the x-axis scale_x_discrete(limits = c("Gender", "Living Arrangements", "Choice"), expand = c(.05, .05)) + # Select palette for alluvia scale_fill_brewer(type = "qual", palette = "Dark2") + # Select theme for plot theme_minimal()
```r # Function `print()` displays the argument on the screen. print("Is that a bar chart in your pocket, or are you just happy to see me?")
# Function `rm()` removes objects from the environment # i.e., it clears the working memory rm(list = ls())
library(discrtr) # A companion package for the book Introduction to Discrete Choice Analysis with `R` library(dplyr) # A Grammar of Data Manipulation library(ggplot2) # Create Elegant Data Visualisations Using the Grammar of Graphics library(gplots) # Various R Programming Tools for Plotting Data library(ggmosaic) # Mosaic Plots in the 'ggplot2' Framework
# Load the data set data("mc_commute_wide", package = "discrtr")
# Pipe table `mc_commute_wide` mc_commute_wide <- mc_commute_wide %>% # Function `mutate()` creates new columns in the table; mutate # the table to convert variables `child` and `vehind` to factors # with more informative labels mutate(child = factor(child, levels=c("Yes", "No"), # Give the factor categories more descriptive labels labels=c("Living with a child", "Not living with a child")), # Relabel `vehind` variable vehind = factor(vehind, levels=c("No", "Yes"), # Give the factor categories more descriptive labels labels=c("No ind. vehicle access", "Ind. vehicle access")))
summary(mc_commute_wide)
summary(mc_commute_wide$Shelters_SD)
# Pipe data frame `mc_mode_wide` to next function mc_commute_wide <- mc_commute_wide %>% # Use mutate to create a new variable # Function `case_when()` is a vectorized form of if-else statements mutate(Shelters = case_when(Shelters_SD == 1 ~ -2, Shelters_D == 1 ~ -1, Shelters_A == 1 ~ 1, Shelters_SA == 1 ~ 2, TRUE ~ 0))
mc_commute_wide <- mc_commute_wide %>% # Use `mutate()` to modify the content of an existing variable mutate(Shelters = factor(Shelters, levels = c(-2, -1, 0, 1, 2), labels = c("Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"), # The factor is an ordinal variable ordered = TRUE)) summary(mc_commute_wide$Shelters)
# Pipe table `mc_Commute_wide to `ggplot()` mc_commute_wide %>% # Create a ggplot object with the table that was piped # and map the variable `Shelters` to the x-axis ggplot(aes(x = Shelters)) + # Add a geometric object of type bar; we do not need # to specify the y-axis because the height of the bar # will be the statistic for the corresponding categorical # outcome geom_bar(color = "black", fill = "white") + # The function `labs()` adds labels to part of the plot, for instance the x and y axes labs(x = "Public transport facilities of good quality", y = "Number of respondents")
# Pipe table `mc_commute_wide` mc_commute_wide %>% # Use `group_by()` to group the table by the values # of variable `Shelters` group_by(Shelters) %>% # Summarize: calculate the number n() of cases by # category of `Shelters` summarize(n = n()) %>% # Pipe the result to `ggplot()`; map `Shelters` to the x-axis # and map the number of cases to y; to create segments map # the end of the segment to y = 0 and keep it constant on x, # this will create vertical line ggplot(aes(x = Shelters, xend = Shelters, y = n, yend = 0)) + # Add geometric featues of type point geom_point(color = "black", fill = "white", size = 6) + # Add geometric features of type segment (line segments) geom_segment(size = 1) + # Label the axes labs(x = "Public transport facilities of good quality", y = "Number of respondents")
library(treemapify) # Pipe table mc_commute_wide %>% # Group table based on `choice` group_by(choice) %>% # Count the number of cases by `choice` and pipe to `ggplot()` summarize(n = n()) %>% # Map the color of the rectangles to the variable `choice` and # their area to the number of cases ggplot(aes(fill = choice, area = n)) + # Layer geometric object of type treemap geom_treemap() + # Add labels labs(title = "Trips by mode", fill="Mode")
mc_commute_wide <- mc_commute_wide %>% # Use `mutate()` to convert variable `housing` to a factor mutate(housing = case_when(shared != "No" ~ "shared", family != "No" ~ "family", TRUE ~ "solo"), housing = factor(housing)) summary(mc_commute_wide$housing)
# Pipe table `mc_commute_wide` to `select()` mc_commute_wide %>% # Select variables `choice` and `sidewalk_density` select(choice, sidewalk_density) %>% summary()
mc_commute_wide %>% # Map `choice` to the x-axis and `sidewalk_density` to the y-axis ggplot(aes(x = choice, y = sidewalk_density)) + # Boxplots are useful for visualizing the relationship between # one categorical and one quantitative variable geom_boxplot()
mc_commute_wide %>% ggplot(aes(x = choice, y = sidewalk_density)) + geom_boxplot() + # Label the axes labs(x="Mode", # The expression function allows us to include superscripts # and subscripts in labels and titles y = expression("Sidewalk density (km/km"^2*")"))
mc_commute_wide %>% ggplot(aes(x = choice, y = sidewalk_density, # Map the color of the polygons to `choice` fill = choice)) + # Add a geometric object of type violin geom_violin(trim = FALSE) + # Add geometric object of type boxplot geom_boxplot(width = 0.1, fill = "white") + labs(x="Mode", y = expression("Sidewalk density (km/km"^2*")"), # Add a label for the fill fill = "Mode")
library(ggridges) mc_commute_wide %>% ggplot(aes(x = sidewalk_density, y = choice, # Map the color of the polygons to `choice` fill = choice)) + # Add geometric object of type ridges with jittered points geom_density_ridges(jittered_points = TRUE, bandwidth = 3.5, position = position_points_jitter(width = 0.05, height = 0), point_shape = '|', point_size = 3, point_alpha = 1, alpha = 0.7) + labs(y="Mode", x = expression("Sidewalk density (km/km"^2*")"), # Add a label for the fill fill = "Mode")
summary(mc_commute_wide$child)
# Create a table with the two variables of interest tableau <- table(mc_commute_wide$choice, mc_commute_wide$child) balloonplot(as.table(tableau), # The parameters below control the aspect of the table # Labels xlab = "Mode", ylab = "Dependent minor(s)", # Adjust maximum dot size dotsize = 3/max(strwidth(19), strheight(19)), # Symbol used for the dots dotcolor = "skyblue", text.size = 0.65, # Title of plot main = "Mode as a function of dependent minors in household", # Display the values in the cells label = TRUE, label.size = 0.80, # Scale balloons by volume (or diameter) scale.method = c("volume"), # Scale balloons relative to zero scale.range = c("absolute"), # Space for column/row labels colmar = 1, rowmar = 2, # Display zeros if present show.zeros = TRUE, # Display row and column sums show.margins = TRUE, # Display cumulative margins as cascade plots cum.margins = TRUE)
tableau <- table(mc_commute_wide$choice, mc_commute_wide$housing) balloonplot(as.table(tableau), # The parameters below control the aspect of the table # Labels xlab = "Mode", ylab = "Living arrangement", # Adjust maximum dot size dotsize = 3/max(strwidth(19), strheight(19)), # Symbol used for the dots dotcolor = "skyblue", text.size = 0.65, # Title of plot main = "Mode as a function of living arrangement", # Display the values in the cells label = TRUE, label.size = 0.80, # Scale balloons by volume (or diameter) scale.method = c("volume"), # Scale balloons relative to zero scale.range = c("absolute"), # Space for column/row labels colmar = 1, rowmar = 2, # Display zeros if present show.zeros = TRUE, # Display row and column sums show.margins = TRUE, # Display cumulative margins as cascade plots cum.margins = TRUE)
tableau <- table(mc_commute_wide$child, mc_commute_wide$housing) balloonplot(as.table(tableau), # The parameters below control the aspect of the table # Labels xlab = "Living arrangement", ylab = "Dependent minor(s)", # Adjust maximum dot size dotsize = 3/max(strwidth(19), strheight(19)), # Symbol used for the dots dotcolor = "skyblue", text.size = 0.50, # Title of plot main = "Minors in household and living arrangement", # Display the values in the cells label = TRUE, label.size = 0.80, # Scale balloons by volume (or diameter) scale.method = c("volume"), # Scale balloons relative to zero scale.range = c("absolute"), # Space for column/row labels colmar = 1, rowmar = 2, # Display zeros if present show.zeros = TRUE, # Display row and column sums show.margins = TRUE, # Display cumulative margins as cascade plots cum.margins = TRUE)
mc_commute_wide %>% ggplot() + # Add geometric object of type mosaic # Map the interaction between `choice` and `child` to the x-axis geom_mosaic(aes(x = product(choice, child), fill = choice)) + # Add labels labs(x = "Dependent minor(s)", y = "Mode", fill = "Mode")
ggplot(data = mc_commute_wide) + # Add geometric object of type mosaic # Map the interaction between `choice` and `numna` to the x-axis geom_mosaic(aes(x = product(choice, numna), # Color rectangles based on `choice` fill = choice)) + # Add labels labs(x = "Number of alternatives", y = "Mode", fill = "Mode")
# Pipe table to next function mc_commute_wide %>% # Group observations by `choice` and `housing` group_by(choice, housing) %>% # Calculate number of cases by combination of `choice` and `housing` summarize(n = n(), .groups = "drop") %>% ggplot(aes(x = choice, y = housing)) + # Add geometric object of type tile, map the color # of tiles to `n`, the number of cases geom_tile(aes(fill = n)) + # Add labels labs(x = "Mode", y = "Living arrangement", fill = "Number of respondents")
mc_commute_wide %>% ggplot(aes(x = like_active_neighborhood, fill = choice)) + geom_bar(position = "fill") + labs(y = "Proportion", x = "Like active neighborhood", fill="Mode")
mc_commute_wide %>% ggplot(aes(x = sidewalk_density, # choice is mapped to the y axis y = choice, # choice is also mapped to the fill color! fill = choice)) + # Add geometric object of type density ridges geom_density_ridges(jittered_points = TRUE, bandwidth = 3.5, position = position_points_jitter(width = 0.05, height = 0), point_shape = '|', point_size = 3, point_alpha = 1, alpha = 0.7)+ # Add labels labs(y="Mode", x = expression("Sidewalk density (km/km"^2*")"), fill = "Mode")
ggplot(data = mc_commute_wide, aes(x = sidewalk_density, y = choice, # By mapping the fill color to `vehind` # we introduce an additional data dimension to the plot fill = vehind)) + geom_density_ridges(jittered_points = TRUE, bandwidth = 3.5, position = position_points_jitter(width = 0.05, height = 0), point_shape = '|', point_size = 3, point_alpha = 1, alpha = 0.7) + labs(y="Mode", x = expression("Sidewalk density (km/km"^2*")"), fill = "Individual access to a vehicle")
ggplot(data = mc_commute_wide %>% # Group by choice and gender group_by(choice, gender) %>% # Summarize to obtain the number of responses by choice-gender combination # and the mean of sidewalk density for each group summarize(n = n(), sidewalk_density = mean(sidewalk_density), .groups = "drop"), # Map the area of the rectangles to the number of responses, the fill color # to mean sidewalk density, and group rectangles by choice aes(area = n, fill = sidewalk_density, label = gender, subgroup = choice)) + # Create main treemap geom_treemap() + # Plot borders of subgroups geom_treemap_subgroup_border(size = 5)+ # Add labels geom_treemap_subgroup_text(fontface = "bold", colour = "white", place = "topleft", size = 10, grow = FALSE) + geom_treemap_text(fontface = "italic", colour = "lightgray", place = "bottomright", size = 10, grow = FALSE) + labs(title = "Trips by Mode-Gender and sidewalk density", fill = expression("Sidewalk density (km/km"^2*")"))
# data preparation mc_commute_alluvia <- mc_commute_wide %>% mutate(living_arrangments = case_when(shared == "Living in Shared Accommodations" ~ "Shared", family == "Living with Family" ~ "Family", TRUE ~ "Other")) %>% select(gender, living_arrangments, choice) %>% group_by(gender, living_arrangments, choice) %>% summarize(frequency = n(), .groups = "drop")
# plot mc_commute_alluvia %>% ggplot(aes(y = frequency, axis1 = gender, axis2 = living_arrangments, axis4 = choice)) + geom_alluvium(aes(fill = gender), width = 1/12, color = "black") + geom_stratum(width = 1/3, fill = "black", color = "grey") + geom_text(stat = "stratum", aes(label = after_stat(stratum)), color = "white", size = 3) + scale_x_discrete(limits = c("Gender", "Living Arrangements", "Choice"), expand = c(.05, .05)) + scale_fill_brewer(type = "qual", palette = "Dark2") + theme_minimal()
mc_commute_wide %>% ggplot(aes(x = sidewalk_density, y = choice, fill = vehind)) + geom_density_ridges(jittered_points = TRUE, bandwidth = 3.5, position = position_points_jitter(width = 0.05, height = 0), point_shape = '|', point_size = 3, point_alpha = 1, alpha = 0.7) + labs(y="Mode", x = expression("Sidewalk density (km/km"^2*")"), fill = "Individual access to a vehicle") + # `facet_wrap()` creates subplots after partitioning the data set # by the variable(s) specified, in this case `vehind` facet_wrap(~ vehind)
mc_commute_wide %>% ggplot(aes(x = child, fill = choice)) + geom_bar(position = "fill") + labs(y = "Proportion", x = "", fill="Mode") + # Facet the plots based on `gender` facet_wrap(~ gender) + theme(axis.text.x = element_text(size = 7))
mc_commute_wide %>% ggplot(aes(x = gender, fill = choice)) + geom_bar(position = "fill") + labs(y = "Proportion", x = "Gender", fill="Mode") + # Facet the plots based on `child` facet_wrap(~ child)
mc_commute_wide %>% ggplot(aes(x = gender, fill = choice)) + geom_bar(position = "fill") + labs(y = "Proportion", x = "Gender", fill="Mode") + # `facet_grid()` creates a "matrix" of subplots # with the variable on the left spread on the # x-axis and the one on the right on the y-axis facet_grid(vehind ~ child)
table(mc_commute_wide$vehind, mc_commute_wide$child)
mc_commute_wide %>% ggplot() + (aes(x = gender, fill = choice)) + geom_bar(position = "fill") + labs(y = "Proportion", x = "Getting there is fun", fill = "Mode") + facet_wrap(~ getting_there_fun)
mc_commute_wide %>% ggplot(aes(x = getting_there_fun, fill = choice)) + geom_bar(position = "fill") + labs(y = "Proportion", x = "Getting there is fun", fill = "Mode") + facet_wrap(~ child)
mc_commute_wide %>% ggplot() + geom_mosaic(aes(x = product(choice, getting_there_fun), fill = choice)) + facet_wrap(~ child)+ labs(y = "Proportion", x = "Getting there is fun", fill = "Mode")
names(mc_commute_wide)[names(mc_commute_wide) == "numna"] <- "Alternatives" # Renaming variable mc_commute_wide %>% ggplot(aes(x = available.Walk)) + labs(y = "Proportion", x = "Walk is available") + geom_bar(color="black", fill="white") + facet_wrap(~ Alternatives, labeller = label_both)
mc_commute_wide %>% ggplot(aes(x = Alternatives)) + labs(y = "Proportion", x = "Number of alternatives")+ geom_bar(color = "black", fill = "white") + facet_wrap(~ available.Walk, labeller = label_both)
library(mlogit) data("Car")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.