knitr::opts_chunk$set(
  collapse = TRUE,
  fig.width=7, 
  fig.height=5, 
  fig.path='Figs/', 
  fig.align="left",
  warning=FALSE, 
  message=FALSE,
  comment = "#>"
)
library(JTIMLmaster)
library(ggplot2)

Introduction

This report was created to practice visualization of a data using ggplot2 R package. The data set we will use is mpg:mile per gallon which includes r paste0(names(mpg), sep=", "). Let's assume that we are interested in whether bigger engine consumes more gas. The relationship can be positive, negative, linear, or other shape. Variable displ is a engine size in liter and hwy is a car's fuel efficiency in mile per gallon(mpg).

1. Scatter plot with smoothing

ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy))

ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, color=class))

ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, size = class))

ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, alpha = class))

ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, shape = class))

ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy), color = "red")
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy)) + facet_wrap(~ class, nrow = 2)

ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy)) + facet_grid(drv ~ cyl)

ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy)) + facet_grid(. ~ cyl)

ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy)) + facet_grid(drv ~ .)
ggplot(data = mpg) +    geom_point(mapping = aes(x = displ, y = hwy))

ggplot(data = mpg) + geom_smooth(mapping = aes(x = displ, y = hwy))

ggplot(data = mpg) + geom_smooth(mapping = aes(x = displ, y = hwy, linetype = drv))  ## drv = drive train, 4=four-wheel drive, f=front-wheel drive, r=rear-wheel drive
ggplot(data = mpg) + geom_smooth(mapping = aes(x = displ, y = hwy))

ggplot(data = mpg) + geom_smooth(mapping = aes(x = displ, y = hwy, group = drv))

ggplot(data = mpg) + geom_smooth(mapping = aes(x = displ, y = hwy, color = drv),show.legend = FALSE)

ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy)) + geom_smooth(mapping = aes(x = displ, y = hwy))

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + geom_point() + geom_smooth()

ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) + geom_point(mapping = aes(color = class)) + geom_smooth()

2. Bar plot

ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut))

## By counts 
ggplot(data = diamonds) +stat_count(mapping = aes(x = cut))

## By frequency 
demo <- dplyr::tribble(
~cut, ~freq,
"Fair", 1610,
"Good", 4906,
"Very Good", 12082,
"Premium", 13791,
"Ideal", 21551
)
ggplot(data = demo) + geom_bar(mapping = aes(x = cut, y = freq), stat = "identity")

## By proportion
ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, y = ..prop.., group = 1))


ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, colour = cut))

ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, fill = cut))

ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, fill = clarity))

ggplot(data = diamonds, mapping = aes(x = cut, fill = clarity)) + geom_bar(alpha = 1/5, position = "identity") 

ggplot(data = diamonds, mapping = aes(x = cut, colour = clarity)) + geom_bar(fill = NA, position = "identity")

ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, fill = clarity), position = "fill")


ggplot(data = diamonds) + geom_bar(mapping = aes(x = cut, fill = clarity), position = "dodge")

3. Boxplot

ggplot(data = mpg, mapping = aes(x = class, y = hwy)) + geom_boxplot()

ggplot(data = mpg, mapping = aes(x = class, y = hwy)) + geom_boxplot() + coord_flip()

nz <- map_data("nz")
ggplot(nz, aes(long, lat, group = group)) + geom_polygon(fill = "white", colour = "black")

ggplot(nz, aes(long, lat, group = group)) + geom_polygon(fill = "white", colour = "black") + coord_quickmap()


bar <- ggplot(data = diamonds) + geom_bar( mapping = aes(x = cut, fill = cut), show.legend = FALSE, width = 1) +
theme(aspect.ratio = 1) + labs(x = NULL, y = NULL)

bar + coord_flip()
bar + coord_polar()

Exercise

We will use data set called Diabetes and it includes Pregnancies, Glucose, Blood pressure, Skin thickness, Insulin level, BMI, Diabetes pedigree function, Age, and Diabetes status(Outcome). Can you create a QC report showing distributions of variables and explore the data using ggplot2 ? You can utilize any of three plots that we just went through.

1) Which variables in diabetes are categorical? Which variables are continuous? 2) Which plot do you want to use for each variable's distribution? 3) Relationship across different variables? BMI vs Glucose, BMI vs Blood pressure, Age vs BMI, and so on 4) Play with the plot that you created in 3). How about adding color= by outcome variable, pregnancies? How about using color with continuous variable? How are they different? 5) Using facet to split the plot from 3) by different category.



jjsayleraxio/JTIMLmaster documentation built on Nov. 4, 2019, 2:57 p.m.