knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
library(dplyr)
library(magrittr)

Looks like if aesthetic is specified in base layer, that is the default.

iris %>% ggplot(aes(x = Sepal.Length, y = Sepal.Width, size = Species)) + geom_point(stat = "sum")

BUT, if it is not specified, the default for the stat kicks in:

iris %>% ggplot(aes(x = Sepal.Length, y = Sepal.Width)) + geom_point(stat = "sum")

Problem is, how do you specify NULL now??? Before, an empty specification meant NULL, but not anymore. Could have reset button prompt use for which default they want: base layer (or) stat default.

Stats

Basic, statistic,

Primitives

Bar

Stat = count (default) [Bar plot]

mpg %>% 
  ggplot(aes(x = class)) + 
  geom_bar()
mpg %>% 
  ggplot(aes(x = class)) + 
  geom_bar(stat = "count")

Computed variables:

Layer data

layer_data(mpg %>% 
  ggplot(aes(x = class)) + 
  geom_bar(stat = "count"))

Default aesthetics

mpg %>% 
  ggplot(aes(x = class)) + 
  geom_bar(stat = "count", aes(y = stat(count)))

Stat = bin [Histogram/Relative frequency dist]

This is essentially a histogram, but with less options (e.g. no binwidth allowed)

mpg %>% 
  ggplot(aes(x = displ)) + 
  geom_bar(stat = "bin")

Computed variables:

mpg %>% 
  ggplot(aes(x = displ)) + 
  geom_bar(stat = "bin", bins = 40)

All stat_bin arguments should work (binwidth giving a warning, but probably shouldn't).

mpg %>% 
  ggplot(aes(x = displ)) + 
  geom_bar(stat = "bin", 
           bins = 30, 
           closed = "right", 
           pad = TRUE)

Default aesthetics.

mpg %>% 
  ggplot(aes(x = displ)) + 
  geom_bar(stat = "bin", 
           aes(y = stat(count)),
           bins = 30)

Relative frequency distribution (use stat(density))

mpg %>% 
  ggplot(aes(x = displ)) + 
  geom_bar(stat = "bin", 
           aes(y = stat(density)),
           bins = 30)

Make sure sums to 1. First, check out layer data.

tmp <- mpg %>% 
  ggplot(aes(x = displ)) + 
  geom_bar(stat = "bin", 
           aes(y = stat(count)),
           bins = 30)
ldata <- layer_data(tmp)
str(ldata)
sum(ldata$density*(ldata$xmax[1]-ldata$xmin[1]))

Stat = identity [Bar plot, manual]

(means <- iris %>% 
  group_by(Species) %>% 
  summarize(mu = mean(Sepal.Length)))

means %>%
  ggplot(aes(x = Species, y = mu)) + 
  geom_bar(stat = "identity")

geom_col also does this

means %>%
  ggplot(aes(x = Species, y = mu)) + 
  geom_col()

Note that you can also use the stat = summary to get this automatically.

Stat = summary [Bar plot, statistics]

iris %>%
  ggplot(aes(x = Species, y = Sepal.Length)) + 
  geom_bar(stat = "summary", fun.y = "mean")

What does the layer data look like?

tmp <- iris %>%
  ggplot(aes(x = Species, y = Sepal.Length)) + 
  geom_bar(stat = "summary", fun.y = "mean")

ldata <- layer_data(tmp)
str(ldata)

Ah, makes sense - will have usual aesthetics. Note that even though the y aesthetic for the geom_bar layer is not really inheriting the aesthetic from the base layer, and is instead using the calculated stat(y) from the layer data, you will get an error if you try and manually specify the y aesthetic as follows:

iris %>%
  ggplot(aes(x = Species, y = Sepal.Length)) + 
  geom_bar(stat = "summary", 
           fun.y = "mean",
           aes(y = stat(y)))

Let's try another one (sum):

iris %>%
  ggplot(aes(x = Species, y = Sepal.Length)) + 
  geom_bar(stat = "summary", fun.y = "sum")

What about counting elements using length? Should be equivalent to stat = count.

mpg %>%
  ggplot(aes(x = class, y = displ)) + 
  geom_bar(stat = "summary", fun.y = "length")

Yep! Only issue is y-axis label.

Line/Area

Doing these together as they are very similar.

Stat = identity (default) [Line plot]

ggplot(economics, aes(date, unemploy)) + 
  geom_line()
ggplot(economics, aes(date, unemploy)) + 
  geom_area()

Stat = count

Useful to connect barplot with lines, but need a grouping constant.

mpg %>% 
  ggplot(aes(x = class)) + 
  geom_bar(stat = "count") + 
  geom_line(stat = "count", aes(group = 1))

Again, y aesthetic is implied:

mpg %>% 
  ggplot(aes(x = class)) + 
  geom_bar(stat = "count") + 
  geom_line(stat = "count", aes(y = stat(count), group = 1))
mpg %>% 
  ggplot(aes(x = class)) + 
  geom_area(stat = "count", aes(y = stat(count), group = 1))

Stat = bin

Same as stat = count, draw line for histogram. In this case, no grouping needed. Also, I added pad = TRUE to match behavior of geom_freqpoly (see below).

mpg %>% 
  ggplot(aes(x = displ)) + 
  geom_bar(stat = "bin", 
           bins = 30) +
  geom_line(stat = "bin",
            bins = 30,
            pad = TRUE)

This is similar to geom_freqpoly. Note that padding happens by default with geom_freqpoly and cannot be changed with the pad argument.

mpg %>% 
  ggplot(aes(x = displ)) + 
  geom_bar(stat = "bin", 
           bins = 30) +
  geom_freqpoly(bins = 30)
mpg %>% 
  ggplot(aes(x = displ)) + 
  geom_bar(stat = "bin", 
           bins = 30) +
  geom_area(stat = "bin",
            bins = 30,
            pad = TRUE)
mpg %>% 
  ggplot(aes(x = displ)) + 
  geom_bar(stat = "bin", 
           bins = 30) +
  geom_area(stat = "bin",
            bins = 30,
            pad = TRUE,
            alpha = 0.3,
            colour = "black")

Stat = density

diamonds %>%
  ggplot(aes(x = carat)) +
  geom_line(stat = "density")
diamonds %>%
  ggplot(aes(x = carat)) +
  geom_area(stat = "density")
diamonds %>%
  ggplot(aes(x = depth, colour = cut)) +
  geom_line(stat = "density") +
  xlim(55, 70)

Compare to geom_density. Note that if you use stat_density, position must be identity to replicate this plot!!

ggplot(diamonds, aes(x = depth, colour = cut)) +
  geom_density() +
  xlim(55, 70)

Let's try to replicate this one:

ggplot(diamonds, aes(depth, fill = cut, colour = cut)) +
  geom_density(alpha = 0.1) +
  xlim(55, 70)
ggplot(diamonds, aes(depth, fill = cut, colour = cut)) +
  geom_area(stat = "density", position = "identity", alpha = 0.1) +
  xlim(55, 70)

Let's try another one:

ggplot(diamonds, aes(x = carat, y = stat(count), colour = cut)) +
  geom_line(stat = "density", position = "stack")

One more:

ggplot(diamonds, aes(x = carat, y = stat(count), colour = cut)) +
  geom_line(stat = "density", position = "fill")

Better with geom_area:

ggplot(diamonds, aes(x = carat, y = stat(count), fill = cut)) +
  geom_area(stat = "density", position = "fill", colour = "black")

Stat = summary

data.frame(
  x = seq(from = 0, to = 4*pi, length.out = 1000)
) %>% 
  ggplot(aes(x = x, y = x)) + 
  geom_line(stat = "summary",
            fun.y = sin)
data.frame(
  x = seq(from = 0, to = 4*pi, length.out = 1000)
) %>% 
  ggplot(aes(x = x, y = x)) + 
  geom_area(stat = "summary",
            fun.y = sin)
data.frame(
  x = seq(from = 0, to = 4*pi, length.out = 1000)
) %>% 
  ggplot(aes(x = x, y = x)) + 
  geom_ribbon(stat = "summary",
              fun.ymin = sin,
              fun.ymax = cos)

Points

Stat = identity (default) [Scatter plot]

iris %>%
  ggplot(aes(x = Sepal.Length, y = Sepal.Width)) + 
  geom_point(stat = "identity")

Stat = count

mpg %>%
  ggplot(aes(x = cyl)) + 
  geom_point(stat = "count",
             aes(y = stat(count)))
mpg %>%
  ggplot(aes(x = cyl)) + 
  geom_point(stat = "count",
             aes(y = stat(prop)))

Stat = sum [geom_col]

Should be similar to geom_count

iris %>%
  ggplot(aes(x = Sepal.Length, y = Sepal.Width)) + 
  geom_point(stat = "sum",
             aes(size = stat(n), weight = 1))
iris %>%
  ggplot(aes(x = Sepal.Length, y = Sepal.Width)) + 
  geom_count()
iris %>%
  ggplot(aes(x = Sepal.Length, y = Sepal.Width)) + 
  geom_point(stat = "sum",
             aes(size = stat(prop)))

Non-primitives

geom_histogram

iris %>%
  ggplot(aes(x = Sepal.Length)) + 
  geom_histogram(bins = 40)
iris %>%
  ggplot(aes(x = Sepal.Length)) + 
  geom_histogram(binwidth = 0.1)
iris %>%
  ggplot(aes(x = Sepal.Length)) + 
  geom_histogram(breaks = c(4, 5, 6, 7, 8))

Geom = Bar, Stat = Bin

iris %>%
  ggplot(aes(x = Sepal.Length)) + 
  geom_bar(stat = "bin", bins = 40)
iris %>%
  ggplot(aes(x = Sepal.Length)) + 
  geom_bar(stat = "bin", binwidth = 0.1)
iris %>%
  ggplot(aes(x = Sepal.Length)) + 
  geom_bar(stat = "bin", breaks = c(4, 5, 6, 7, 8))

Explicit aesthetic assignment:

iris %>%
  ggplot(aes(x = Sepal.Length)) + 
  geom_bar(stat = "bin", 
           aes(y = stat(count)),
           breaks = c(4, 5, 6, 7, 8))

geom_smooth

p <- iris %>% 
  ggplot(aes(x = Sepal.Length, y = Sepal.Width))
p + 
  geom_point() + 
  geom_smooth(method = "lm")

geom_line w/ stat = smooth + geom_ribbon w/ stat = smooth

p + 
  geom_point() + 
  geom_ribbon(stat = "smooth", method = "lm", fill = "grey60", alpha = 0.4) +
  geom_line(stat = "smooth", method = "lm", size = 1, colour = "#3366FF")

Explicit aesthetic assignments:

iris %>% 
  ggplot(aes(x = Sepal.Length, y = Sepal.Width)) + 
  geom_point() + 
  geom_ribbon(stat = "smooth", 
              aes(ymin = stat(ymin), ymax = stat(ymax)),
              method = "lm",  fill = "grey60", alpha = 0.4) +
  geom_line(stat = "smooth", method = "lm", size = 1, colour = "#3366FF")

geom_count

p <- iris %>%
  ggplot(aes(x = Sepal.Length, y = Sepal.Width))
p + 
  geom_count()

geom_point w/ stat = sum

p + 
  geom_point(stat = "sum")

Explicit aesthetic assignments:

p + 
  geom_point(stat = "sum",
             aes(size = stat(n)))

geom_freqpoly

p <- ggplot(diamonds, 
       aes(price, colour = cut))
p +
  geom_freqpoly(binwidth = 500)

geom_line + stat = "bin" + pad = TRUE

p +
  geom_line(stat = "bin", binwidth = 500, pad = TRUE)

Explicit aesthetic assignment:

p +
  geom_line(stat = "bin", 
            aes(y = stat(count)),
            binwidth = 500, pad = TRUE)

Extending:

p +
  geom_line(stat = "bin", binwidth = 500, pad = TRUE) + 
  geom_point(stat = "bin", binwidth = 500, pad = TRUE)

geom_boxplot

p <- ggplot(iris,
            aes(x = Species, y = Sepal.Length))
p + 
  geom_boxplot()
# Not sure how to easily do extreme values yet.  Commented is what I would like to do, but it
#   doesn't work as the y aesthetic is defined twice - once to compute stat, and then second time
#   upon assignment.  Maybe this will work with ggplot2 3.3.0??
p + 
  geom_linerange(stat = "boxplot") +
  geom_crossbar(stat = "summary", fun.data = "median_hilow", fun.args = list(conf.int = 0.5), fill = "white", width = 0.8, fatten = 2.0, size = 0.4) # + 
  # geom_point(stat = "boxplot", aes(y = after_stat(outliers)))


serenity-r/serenity.viz documentation built on Dec. 29, 2020, 4:53 a.m.