knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(glab.library) library(glab.library) library(ggplot2)
head(iris)
Set up mapping aesthetics by mapping continuous variables (columns in our dataset) to x and y axes.
ggplot(data = iris, mapping = aes(x = Sepal.Length, y = Sepal.Width))
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) + geom_point()
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) + geom_point()
Set up mapping aesthetics with discrete x variable and continuous y variable.
ggplot(iris, aes(x = Species, y = Sepal.Width, color = Species)) + geom_point()
ggplot(iris, aes(x = Species, y = Sepal.Width, color = Species)) + geom_boxplot()
ggplot(iris, aes(x = Species, y = Sepal.Width, color = Species)) + geom_violin()
ggplot(iris, aes(x = Species, y = Sepal.Width, color = Species)) + geom_violin() + geom_point()
Note that the order of the layers matters! The first ones are plotted first and new layers are added on top (Only the violins are visible)
ggplot(iris, aes(x = Species, y = Sepal.Width, color = Species)) + geom_point() + geom_violin()
If the points are overlapping each other, try this.
ggplot(iris, aes(x = Species, y = Sepal.Width, color = Species)) + geom_violin() + geom_jitter(width = 0.33, height = 0.05)
Jittered points help but some points may still be overlapping, so let's use Beeswarm Plots.
library(ggbeeswarm) ggplot(iris, aes(x = Species, y = Sepal.Width, color = Species)) + geom_violin() + geom_beeswarm(size = 1.5, cex = 1.5)
Color one specific layer of ggplot
Suppose we only want one layer to be colored, rather than all of them. Here we move mapping of color to a specific layer of the ggplot.
ggplot(iris, aes(x = Species, y = Sepal.Width)) + geom_violin(aes(color = Species)) + geom_beeswarm(size = 1.5, cex = 1.5)
Set up mapping aesthetics by mapping continuous variables to x and y axes and coloring by "Treatment".
head(CO2) gg <- ggplot(CO2, aes(x = conc, y = uptake, color = Treatment)) + geom_point() print(gg)
Our data comes from two different sites. Let's separate the plots by the collection site using facet_wrap.
gg2 <- gg + facet_wrap(~ Type) print(gg2)
gg2 + geom_smooth()
We can try removing the color mapping in favor of a constant mapping to "black". However, this will cause an issue! The color aesthetic distinguished the two lines we want to draw per facet.
gg2 + geom_smooth(color = "black")
gg2 + geom_smooth(aes(group = Treatment), color = "black", se = F)
Although it does not make sense for this data in particular, we can change the method parameter to define what kind of line is fit to the data.
gg2 + geom_smooth(aes(group = Treatment), color = "black", se = F, method = "lm")
head(mtcars)
gg <- ggplot(mtcars, aes(x = factor(gear), fill = factor(carb)))
gg + geom_bar(position = "stack")
ggplot(mtcars, aes(x = factor(gear))) + geom_bar()
gg + geom_bar(position = "fill")
gg + geom_bar(position = "dodge")
If we want to plot the relative frequencies AND still dodge the fill mapping we will need to use dplyr to calculate the values ourselves instead of leaving it up to R.
library(dplyr)
mtcars %>% group_by(gear, carb) %>% summarize(n = n()) %>% mutate(freq = n / sum(n)) %>% ggplot(aes(x = factor(gear), y = freq, fill = factor(carb))) + geom_bar(stat = "identity", position = "dodge")
gg <- ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) + geom_point() + stat_ellipse(show.legend = F) print(gg) gg + theme_classic() gg + theme_minimal() gg + theme_bw()
Some other themes I like to use are theme_bw() and theme_minimal() link to list of themes: https://ggplot2.tidyverse.org/reference/ggtheme.html
library(RColorBrewer)
preset theme + custom theme (custom elements must come after preset!!) setting axis limits with xlim/ylim changing labels changing colors with RColorBrewer
pretty_gg <- gg + theme_classic() + theme(axis.text = element_text(size = 9, color = "black"), legend.position = "bottom", plot.title = element_text(hjust=0.5, size = 10), axis.title = element_text(size = 9)) + ylim(2, 4.25) + labs(x = "Sepal Length", y = "Sepal Width", fill = "Species", title = "Sepal Length vs. Sepal Width across Iris Species") + scale_color_brewer(palette = "Dark2") print(pretty_gg)
Save image as .png using "cairo" graphics device ("cairo" for windows, "Xlib" or "quartz" for mac) you may need to install the Cairo package with install.packages("Cairo") [this may also work for mac]
png(filename = "Iris Sepal Length vs Sepal Width.png", res = 300, type = "Xlib", height = 1200, width = 1200) print(pretty_gg) dev.off()
This particularly affects curved lines and circles
png(filename = "Iris Sepal Length vs Sepal Width no Cairo.png", res = 300, #won't be as clear height = 1200, width = 1200) print(pretty_gg) dev.off()
png(filename = "Iris Sepal Length vs Sepal Width.png", res = 300, type = "cairo", units = "in", height = 4, width = 4) print(pretty_gg) dev.off()
library(ggpubr) ggsave("Iris Sepal Length vs Sepal Width.png", pretty_gg, dpi = 300, type = "cairo", height = 4, width = 4)
ggplot(CO2, aes(x = Type, y = uptake, fill = Treatment)) + geom_boxplot()
This allows us to use more colors if needed.
CO2$interaction <- factor(paste0(CO2$Treatment, CO2$Type), levels = c("nonchilledMississippi", "chilledMississippi", "nonchilledQuebec", "chilledQuebec"))
boxplot_gg <- ggplot(CO2, aes(x = Type, y = uptake, fill = interaction)) + geom_boxplot(color = "black", outlier.size = 0.5) + scale_x_discrete(limits = c("Mississippi", "Quebec")) + scale_fill_manual(values = brewer.pal("Paired", n = 9)[c(1,2,5,6)], labels = c("control (M)", "chilled (M)", "control (Q)", "chilled (Q)")) + labs(x = "", y = expression(CO[2]~Uptake~(mu*mol/m^2~sec)), fill = "Treatment") + theme_classic() + theme(axis.text.x = element_text(size = 9, color = "black", angle = 45, hjust = 1), axis.text.y = element_text(size = 8, color = "black"))
boxplot_gg ggsave("CO2 Uptake by Location by Treatment.png", boxplot_gg, dpi = 300, type = "cairo", height = 3, width = 3)
head(USPersonalExpenditure)
R doesn't like numeric column names, and adds an X to the beginning.
df <- data.frame(USPersonalExpenditure)
library(reshape) USPE_melt <- melt(USPersonalExpenditure)
colnames(USPE_melt) <- c("variable", "year", "value")
Here, coord_fixed() forces the axes to the same scale. geom_label adds text labels to the plot, but they are cutoff by the plot margins.
ggplot(df, aes(x = X1940, y = X1945)) + geom_point() + geom_abline() + geom_label(aes(label = rownames(df))) + coord_fixed() + labs(x = "Expenditures in 1940 (Billions of Dollars)", y = "Expenditures in 1945 (Billions of Dollars)")
THIS ONE IS MUCH BETTER. NO OVERLAP.
library(ggrepel) ggplot(df, aes(x = X1940, y = X1945)) + geom_point() + geom_abline() + geom_label_repel(aes(label = rownames(df)), force = 20) + coord_fixed() + labs(x = "Expenditures in 1940 (Billions of Dollars)", y = "Expenditures in 1945 (Billions of Dollars)")
USPE_melt %>% filter(year %in% c(1940, 1945)) %>% ggplot(aes(x = factor(year), y = value, group = 1)) + geom_line() + geom_point() + facet_wrap(~ variable) + labs(x = "Year", y = "Total US Expenditures (Billions of Dollars)") + theme_bw()
USPE_melt %>% filter(year %in% c(1940, 1945)) %>% ggplot(aes(x = factor(year), y = value, group = 1)) + geom_line() + geom_point() + facet_wrap(~ variable, scale = "free_y") + labs(x = "Year", y = "Total US Expenditures (Billions of Dollars)") + theme_bw()
ggplot(USPE_melt, aes(x = factor(year), y = value, group = 1)) + geom_line() + geom_point() + facet_wrap(~ variable) + labs(x = "Year", y = "Total US Expenditures (Billions of Dollars)") + theme_bw()
These wil be for factor levels you plan on using later while plotting.
mtcars_melt <- melt(mtcars, id.vars = c("vs", "am", "gear", "carb"))
ggplot(mtcars_melt, aes(y = value, x = factor(am))) + geom_violin() + geom_beeswarm(aes(color = factor(vs)), cex = 5) + facet_wrap(~ variable, scale = "free_y") + theme_bw() + scale_x_discrete(limits = factor(c(0,1)), labels = c("Automatic", "Manual")) + labs(x = "", y = "Value", color = "Engine Type", title = "Automatic\nvs.\nManual\nMotor Vehicle Trends") + scale_color_manual(values = c("black", "red"), labels = c("V-shaped", "Straight"))
If we don't some variables can't be viewed properly.
ggplot(mtcars_melt, aes(y = value, x = factor(am))) + geom_violin() + geom_beeswarm(aes(color = factor(vs)), cex = 5) + facet_wrap(~ variable) + theme_bw() + scale_x_discrete(limits = factor(c(0,1)), labels = c("Automatic", "Manual")) + labs(x = "", y = "Value", color = "Engine Type", title = "Automatic\nvs.\nManual\nMotor Vehicle Trends") + scale_color_manual(values = c("black", "red"), labels = c("V-shaped", "Straight"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.