knitr::opts_chunk$set( collapse = TRUE, fig.width=7, fig.height=5, fig.path='Figs/', fig.align="left", warning=FALSE, message=FALSE, comment = "#>" )
library(JTIMLmaster) library(ggplot2)
This report was created to practice visualization of a data using ggplot2 R package. The data set we will use is mpg:mile per gallon
which includes r paste0(names(mpg), sep=", ")
. Let's assume that we are interested in whether bigger engine consumes more gas. The relationship can be positive, negative, linear, or other shape. Variable displ
is a engine size in liter and hwy
is a car's fuel efficiency in mile per gallon(mpg).
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy)) ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, color=class)) ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, size = class)) ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, alpha = class)) ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, shape = class)) ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy), color = "red")
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy)) + facet_wrap(~ class, nrow = 2) ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy)) + facet_grid(drv ~ cyl) ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy)) + facet_grid(. ~ cyl) ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy)) + facet_grid(drv ~ .)
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy)) ggplot(data = mpg) + geom_smooth(mapping = aes(x = displ, y = hwy)) ggplot(data = mpg) + geom_smooth(mapping = aes(x = displ, y = hwy, linetype = drv)) ## drv = drive train, 4=four-wheel drive, f=front-wheel drive, r=rear-wheel drive
ggplot(data = mpg) + geom_smooth(mapping = aes(x = displ, y = hwy)) ggplot(data = mpg) + geom_smooth(mapping = aes(x = displ, y = hwy, group = drv)) ggplot(data = mpg) + geom_smooth(mapping = aes(x = displ, y = hwy, color = drv),show.legend = FALSE) ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy)) + geom_smooth(mapping = aes(x = displ, y = hwy)) ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + geom_point() + geom_smooth() ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + geom_point(mapping = aes(color = class)) + geom_smooth()
ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut)) ## By counts ggplot(data = diamonds) +stat_count(mapping = aes(x = cut)) ## By frequency demo <- dplyr::tribble( ~cut, ~freq, "Fair", 1610, "Good", 4906, "Very Good", 12082, "Premium", 13791, "Ideal", 21551 ) ggplot(data = demo) + geom_bar(mapping = aes(x = cut, y = freq), stat = "identity") ## By proportion ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1)) ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, colour = cut)) ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, fill = cut)) ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, fill = clarity)) ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) + geom_bar(alpha = 1/5, position = "identity") ggplot(data = diamonds, mapping = aes(x = cut, colour = clarity)) + geom_bar(fill = NA, position = "identity") ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, fill = clarity), position = "fill") ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge")
ggplot(data = mpg, mapping = aes(x = class, y = hwy)) + geom_boxplot() ggplot(data = mpg, mapping = aes(x = class, y = hwy)) + geom_boxplot() + coord_flip() nz <- map_data("nz") ggplot(nz, aes(long, lat, group = group)) + geom_polygon(fill = "white", colour = "black") ggplot(nz, aes(long, lat, group = group)) + geom_polygon(fill = "white", colour = "black") + coord_quickmap() bar <- ggplot(data = diamonds) + geom_bar( mapping = aes(x = cut, fill = cut), show.legend = FALSE, width = 1) + theme(aspect.ratio = 1) + labs(x = NULL, y = NULL) bar + coord_flip() bar + coord_polar()
We will use data set called Diabetes
and it includes Pregnancies, Glucose, Blood pressure, Skin thickness, Insulin level, BMI, Diabetes pedigree function, Age, and Diabetes status(Outcome)
. Can you create a QC report showing distributions of variables and explore the data using ggplot2
? You can utilize any of three plots that we just went through.
1) Which variables in diabetes are categorical? Which variables are continuous?
2) Which plot do you want to use for each variable's distribution?
3) Relationship across different variables? BMI vs Glucose, BMI vs Blood pressure, Age vs BMI, and so on
4) Play with the plot that you created in 3). How about adding color=
by outcome variable, pregnancies? How about using color with continuous variable? How are they different?
5) Using facet to split the plot from 3) by different category.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.