library(tidyverse) library(ggridges) library(edr)
employment
dataset.glimpse(employment)
ggplot(employment, aes(x = year, y = unemployed)) + geom_line()
ggplot(employment, aes(x = year, y = unemployed)) + geom_line() + scale_x_continuous(breaks = seq(1940, 2010, 10)) + scale_y_continuous( labels = scales::number_format(suffix = "M", scale = 1e-6, accuracy = 1.0) ) + labs( title = "Number of Working Age People Unemployed from 1940 to 2010", x = NULL, y = "Number Unemployed" ) + theme_minimal()
employ_recent <- employment %>% slice_tail(n = 10) ggplot(employ_recent, aes(x = year, y = employed)) + geom_line() + geom_point(size = 2, shape = 21, fill = "white", stroke = 1) + geom_line(aes(x = year, y = nonagriculture), linetype = "dashed", color = "gray") + scale_x_continuous(breaks = seq(2000, 2010, 1), minor_breaks = NULL) + scale_y_continuous( labels = scales::number_format(suffix = "M", scale = 1e-6, accuracy = 1.0) ) + labs( title = "Total Employed and Those Employed in Anything but Agriculture", x = NULL, y = "Number Employed" ) + theme_minimal()
employment
dataset with tidyr's pivot_longer()
functionemploy_recent_tidy <- employment %>% select(year, population, employed, unemployed) %>% slice_tail(n = 10) %>% pivot_longer( cols = population:unemployed, names_to = "type", values_to = "n" ) %>% mutate( type = factor(type) %>% fct_relevel(c("population", "employed", "unemployed")) ) employ_recent_tidy
ggplot(employ_recent_tidy, aes(x = year, y = n)) + geom_line()
linetype
aesthetic to the type
variable of employ_recent_tidy
gives us three separate lines in a single geom_line()
callggplot(employ_recent_tidy) + geom_line(aes(x = year, y = n, linetype = type)) + scale_x_continuous(breaks = seq(2000, 2010, 1), minor_breaks = NULL) + scale_y_continuous( labels = scales::number_format(suffix = "M", scale = 1e-6, accuracy = 1.0) ) + labs( title = "Comparison of Total Population to Employed and Unemployed Citizens", x = NULL, y = "Number of Citizens" ) + theme_minimal()
scale_linetype_manual()
and scale_color_manual()
ggplot(employ_recent_tidy) + geom_line(aes(x = year, y = n, linetype = type, color = type)) + scale_linetype_manual(values = c("solid", "dashed", "dotted")) + scale_color_manual(values = c("black", "steelblue", "red")) + scale_x_continuous(breaks = seq(2000, 2010, 1), minor_breaks = NULL) + scale_y_continuous( labels = scales::number_format(suffix = "M", scale = 1e-6, accuracy = 1.0) ) + labs( title = "Comparison of Total Population to Employed and Unemployed Citizens", x = NULL, y = "Number of Citizens" ) + theme_minimal()
facet_wrap()
and the legend is probably unnecessary in this arrangement (so it's removed)ggplot(employ_recent_tidy) + geom_line(aes(x = year, y = n, linetype = type, color = type)) + scale_linetype_manual(values = c("solid", "dashed", "dotted")) + scale_color_manual(values = c("black", "steelblue", "red")) + scale_x_continuous(breaks = seq(2000, 2010, 1), minor_breaks = NULL) + scale_y_continuous( labels = scales::number_format(suffix = "M", scale = 1e-6, accuracy = 1.0) ) + facet_wrap(vars(type), ncol = 1, scales = "free") + labs( title = "Comparison of Total Population to Employed and Unemployed Citizens", x = NULL, y = "Number of Citizens" ) + theme_minimal() + theme(legend.position = "none")
rainfall
datasetglimpse(rainfall)
geom_area()
ggplot(rainfall, aes(x = year, y = r_vancouver)) + geom_area()
geom_area()
like a line color, a fill color, and some transparency.ggplot(rainfall, aes(x = year, y = r_vancouver)) + geom_area(color = "blue", fill = "lightblue", alpha = 0.4)
theme_bw()
), and useful labels (like a title and a caption) are paid attention to.ggplot(rainfall, aes(x = year, y = r_vancouver)) + geom_area(color = "blue", fill = "lightblue", alpha = 0.4) + labs( title = "Annual Total Precipitation for the City of Vancouver", caption = "Data source: the rainfall dataset from the edr package.", x = NULL, y = "Precipitation, mm" ) + theme_bw() + theme(plot.margin = unit(c(15, 15, 15, 15), "pt"))
geom_col()
instead of geom_bar(stat = "identity")
to make a simple bar chart of total annual rainfall statistics for Vancouver.ggplot(rainfall, aes(x = year, y = r_vancouver)) + geom_col()
rainfall
dataset with tidyr's pivot_longer()
function.rainfall_recent_tidy <- rainfall %>% slice_head(n = 3) %>% pivot_longer( cols = starts_with("r"), names_to = "city", names_prefix = "r_", values_to = "precip" ) %>% mutate( city = factor(city) %>% fct_inorder(), year = factor(year) %>% fct_inseq() ) rainfall_recent_tidy
city
and precip
as new variables. Note the use of position = "dodge"
to avoid stacking the bars.ggplot(rainfall_recent_tidy, aes(x = year, y = precip, fill = city)) + geom_col(position = "dodge")
x
values as the cities instead of the years, where the fill
is now mapped to year
.ggplot(rainfall_recent_tidy, aes(x = city, y = precip, fill = year)) + geom_col(position = "dodge")
ggplot(rainfall_recent_tidy, aes(x = city, y = precip, fill = year)) + geom_col(position = "dodge", width = 0.7) + scale_fill_brewer("Blues") + scale_x_discrete( labels = c( "vancouver" = "Vancouver", "calgary" = "Calgary", "kenora" = "Kenora", "toronto" = "Toronto", "montreal" = "Montréal", "halifax" = "Halifax", "stjohns" = "St. John's" ) ) + coord_cartesian(ylim = c(0, 1500)) + annotate( geom = "text", x = 0.77, y = 1200, label = "2017", hjust = 0, angle = 90, size = 3 ) + annotate( geom = "text", x = 1.00, y = 1350, label = "2018", hjust = 0, angle = 90, size = 3 ) + annotate( geom = "text", x = 1.24, y = 960, label = "2019", hjust = 0, angle = 90, size = 3 ) + labs( title = "Annual Rainfall Totals", subtitle = "Comparison of seven cities for 2017, 2018, and 2019", caption = "Data source: the rainfall dataset from the edr package.", x = NULL, y = "Precipitation, mm" ) + theme_minimal() + theme( panel.grid.major.x = element_blank(), legend.position = "none", axis.text.x = element_text(vjust = 5) )
german_cities
dataset, which we've used previouslyglimpse(german_cities)
german_cities
dataset is an abject failure (the legend is pretty much all we see).german_cities %>% ggplot(aes(x = pop_2015, y = state, fill = name)) + geom_col()
german_cities %>% ggplot(aes(x = pop_2015, y = state, fill = name)) + geom_col(aes(fill = name)) + theme(legend.position = "none")
german_cities
data is required to get a total population by state (total_pop
) column and to reorder factor levels (improving the order of bars and the segments within).german_cities_totals <- german_cities %>% group_by(state) %>% arrange(desc(pop_2015)) %>% mutate(total_pop = sum(pop_2015)) %>% ungroup() %>% mutate( state = state %>% fct_reorder(total_pop), name = name %>% fct_reorder(pop_2015, .desc = TRUE) ) %>% select(-pop_2011) german_cities_totals
german_cities
stacked bar plot: bars are in descending order, cities are stacked by population (increasing, left to right), and the x-axis labels are more readable.german_cities_totals %>% ggplot(aes(x = pop_2015, y = state, fill = name)) + geom_col() + coord_cartesian(xlim = c(0, 9E6)) + scale_x_continuous( labels = scales::number_format(suffix = "M", scale = 1e-6, accuracy = 1.0) ) + theme_minimal() + theme( legend.position = "none", panel.grid.major.y = element_blank() )
geom_text()
.german_cities_largest <- german_cities_totals %>% group_by(state) %>% arrange(desc(pop_2015)) %>% mutate(n = n()) %>% slice(1) %>% select(-pop_2015) german_cities_largest
german_cities_totals %>% ggplot(aes(x = pop_2015, y = state, fill = name)) + geom_col(color = "white", size = 0.2) + scale_fill_grey(end = 0.7, start = 0.1) + geom_text( data = german_cities_largest, aes(y = state, x = total_pop, label = name), size = 3, hjust = 0, nudge_x = 1e5 ) + geom_text( data = german_cities_largest, aes(y = state, x = 0, label = paste0("(", n, ")")), size = 2.5, hjust = 1, nudge_x = -1e5 ) + coord_cartesian(xlim = c(0, 9E6)) + scale_x_continuous( labels = scales::number_format(suffix = "M", scale = 1e-6, accuracy = 1.0), breaks = seq(1e6, 9e6, 1e6), minor_breaks = NULL ) + geom_vline(xintercept = 0, color = "gray50") + labs( title = "Comparison of Most Populous German Cities Across All States ", subtitle = "The largest city in each state is provided at right.\n", caption = "Data source: the german_cities dataset from the edr package.", x = NULL, y = NULL ) + theme_minimal() + theme( legend.position = "none", panel.grid.major.y = element_blank(), plot.title.position = "plot", plot.caption.position = "plot" )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.