ggh4x: Hacks for 'ggplot2'

# Distribution classifier -------------------------------------------------

test_that("class_distri classifies discrete distributions from the stats package", {
  distis <- c("pois", "nbinom", "binom", "geom", "hyper", "signrank", "multinom")
  classes <- character(0)
  for (i in distis) {
    classes <- c(classes, class_distri(i))
  }
  expect_true(all(classes == "discrete"))
})

test_that("class_distri classifies continuous distributions from the stats package", {
  distis <- c("beta", "cauchy", "chisq", "exp", "f", "gamma",
              "lnorm", "norm", "t", "unif", "weibull", "logis")
  classes <- character(0)
  for (i in distis) {
    classes <- c(classes, class_distri(i))
  }
  expect_true(all(classes == "continuous"))
})



test_that("class_distri identifies unknown distributions correctly", {

  rxyz <- function(n, mean = 0, sd = 1) {rnorm(n, mean, sd)}
  rzyx <- function(n, lambda) {rpois(n, lambda)}

  test <- class_distri("xyz")
  expect_equal(test, "continuous")

  test <- class_distri("zyx")
  expect_equal(test, "discrete")
})

# Warning tester ----------------------------------------------------------

basic <- ggplot(iris, aes(x = Sepal.Length, colour = Species)) +
  geom_density()

test_that("stat_theodensity warns about hypergeometric and multinomial distributions", {
  expect_error(basic + stat_theodensity(distri = "hyper"))
  expect_error(basic + stat_theodensity(distri = "multinomial"))
})

test_that("stat_theodensity warns about discrete distributions with continuous data", {
  plot <- basic + stat_theodensity(distri = "binom")
  expect_error(ggplot_build(plot))
})

test_that("stat_theodensity skips groups with less than two datapoints", {
  # Only include 1 setosa observation
  plot <- ggplot(iris[c(1,51:150),], aes(x = Sepal.Length, colour = Species)) +
    stat_theodensity()
  expect_warning(dat <- layer_data(plot))
  tab <- as.numeric(table(dat$group))
  expect_equal(tab, c(1, 512, 512))
  expect_true(all(c(is.na(dat$y[1]),
                    is.na(dat$x[1]),
                    is.na(dat$density[1]),
                    is.na(dat$count[1]),
                    is.na(dat$n[1]),
                    is.na(dat$scaled[1]))))
  expect_false(any(c(is.na(dat$y[-1]),
                    is.na(dat$x[-1]),
                    is.na(dat$density[-1]),
                    is.na(dat$count[-1]),
                    is.na(dat$n[-1]),
                    is.na(dat$scaled[-1]))))
})


# Discrete distributions --------------------------------------------------

dat <- faithful$waiting

dat_by_distri <- function(distri) {
  g <- ggplot(faithful, aes(x = waiting)) +
    stat_theodensity(distri = distri)
  layer_data(g)
}

test_that("stat_theodensity fits poisson", {
  df <- dat_by_distri("pois")
  expect_equal(df$x, seq(min(dat), max(dat), by = 1))
  thismax <- df$x[which.max(df$y)]
  expect_lt(abs(mean(dat) - thismax), 10)
})

test_that("stat_theodensity fits geometric", {
  df <- dat_by_distri("geom")
  expect_equal(df$x, seq(min(dat), max(dat), by = 1))
})

test_that("stat_theodensity fits negative binomial", {
  df <- dat_by_distri("nbinom")
  expect_equal(df$x, seq(min(dat), max(dat), by = 1))
  thismax <- df$x[which.max(df$y)]
  expect_lt(abs(mean(dat) - thismax), 10)
})

test_that("stat_theodensity fits binomial", {
  expect_message(df <- dat_by_distri("binom"))
  expect_equal(df$x, seq(min(dat), max(dat), by = 1))
  thismax <- df$x[which.max(df$y)]
  expect_lt(abs(mean(dat) - thismax), 10)
})

test_that("stat_theodensity fits sign rank", {
  expect_error(dat_by_distri("signrank"))
})

test_that("stat_theodensity fits wilcoxon", {
  expect_error(dat_by_distri("wilcox"))
})


# continuous distributions ------------------------------------------------

test_that("stat_theodensity fits beta", {
  set.seed(1)
  d <- data.frame(x = rbeta(500, 0.5, 0.5))
  g <- ggplot(d, aes(x = x)) +
    stat_theodensity(distri = "beta")
  ld <- layer_data(g)
  expect_equal(nrow(ld), 512)

  # Beta with 0.5, 0.5 shapes should have two maxima
  expect_true(all(diff(ld$y[1:200]) < 0))
  expect_true(all(diff(ld$y[313:512]) > 0))
})

test_that("stat_theodensity fits cauchy", {
  set.seed(1)
  d <- data.frame(x = rcauchy(500, 10, 1))
  g <- ggplot(d, aes(x = x)) +
    stat_theodensity(distri = "cauchy", n = 1e4)
  ld <- layer_data(g)
  expect_equal(nrow(ld), 1e4)
  max <- ld$x[which.max(ld$y)]
  expect_lt(abs(10 - max), 0.5)
  expect_true(all(diff(ld$y[which.max(ld$y):1e4]) < 0))
  expect_true(all(diff(ld$y[1:which.max(ld$y)]) > 0))
})

test_that("stat_theodensity fits chi squared", {
  set.seed(1)
  k <- 3
  d <- data.frame(x = rchisq(500, k, 0))
  g <- ggplot(d, aes(x = x)) +
    stat_theodensity(distri = "chisq")
  ld <- layer_data(g)
  expect_equal(nrow(ld), 512)
  max <- which.max(ld$y)
  # Mode of chisq is max(k - 2, 0)
  expect_lt(abs(k - 2 - ld$x[max]), 0.5)
  expect_true(all(diff(ld$y[max:512]) < 0))
  expect_true(all(diff(ld$y[1:max]) > 0))
})

test_that("stat_theodensity fits exponential", {
  set.seed(1)
  d <- data.frame(x = rexp(500, 1))
  g <- ggplot(d, aes(x = x)) +
    stat_theodensity(distri = "exp")
  ld <- layer_data(g)
  expect_equal(nrow(ld), 512)
  expect_true(all(diff(ld$y) < 0))
})

test_that("stat_theodensity fits F-distribution", {
  set.seed(1)
  d1 <- 10
  d2 <- 30
  d <- data.frame(x = rf(500, d1, d2))
  g <- ggplot(d, aes(x = x)) +
    stat_theodensity(distri = "f", start.arg = list(df1 = 4, df2 = 2))
  ld <- layer_data(g)
  expect_equal(nrow(ld), 512)
  max <- which.max(ld$y)
  expmode <- ((d1 - 2)/d1) * (d2 / (d2 + 2))
  expect_lt(abs(median(d$x) - expmode), 0.5)
  expect_true(all(diff(ld$y[max:512]) < 0))
  expect_true(all(diff(ld$y[1:max]) > 0))
})

test_that("stat_theodensity fits gamma", {
  set.seed(1)
  d <- data.frame(x = rgamma(500, 2, 2))
  g <- ggplot(d, aes(x = x)) +
    stat_theodensity(distri = "gamma")
  ld <- layer_data(g)
  expect_equal(nrow(ld), 512)
  max <- which.max(ld$y)
  expect_lt(abs(median(d$x) - ld$x[max]), 0.5)
  expect_true(all(diff(ld$y[max:512]) < 0))
  expect_true(all(diff(ld$y[1:max]) > 0))
})

test_that("stat_theodensity fits lognormal", {
  set.seed(1)
  d <- data.frame(x = rlnorm(500, 2, 1))
  g <- ggplot(d, aes(x = x)) +
    stat_theodensity(distri = "lnorm")
  ld <- layer_data(g)
  expect_equal(nrow(ld), 512)
  max <- which.max(ld$y)
  expect_lt(abs(mean(log(d$x)) - 2), 0.5)
  expect_true(all(diff(ld$y[max:512]) < 0))
  expect_true(all(diff(ld$y[1:max]) > 0))
})

test_that("stat_theodensity fits normal", {
  set.seed(1)
  d <- data.frame(x = rnorm(500, 2, 2))
  g <- ggplot(d, aes(x = x)) +
    stat_theodensity(distri = "norm")
  ld <- layer_data(g)
  expect_equal(nrow(ld), 512)
  max <- which.max(ld$y)
  expect_lt(abs(median(d$x) - ld$x[max]), 0.5)
  expect_true(all(diff(ld$y[max:512]) < 0))
  expect_true(all(diff(ld$y[1:max]) > 0))
})

test_that("stat_theodensity fits student t", {
  set.seed(1)
  d <- data.frame(x = rt(500, 1.5))
  g <- ggplot(d, aes(x = x)) +
    stat_theodensity(distri = "t", start.arg = list(df = 20))
  ld <- layer_data(g)
  expect_equal(nrow(ld), 512)
  max <- which.max(ld$y)
  expect_lt(abs(median(d$x) - ld$x[max]), 0.5)
  expect_true(all(diff(ld$y[max:512]) < 0))
  expect_true(all(diff(ld$y[1:max]) > 0))
})

test_that("stat_theodensity fits uniform", {
  set.seed(1)
  d <- data.frame(x = rgamma(500, 2, 2))
  g <- ggplot(d, aes(x = x)) +
    stat_theodensity(distri = "unif")
  ld <- layer_data(g)
  expect_equal(nrow(ld), 512)
  expect_true(all(diff(ld$y) == 0))
})

test_that("stat_theodensity fits weibull", {
  set.seed(1)
  d <- data.frame(x = rweibull(500, 2, 2))
  g <- ggplot(d, aes(x = x)) +
    stat_theodensity(distri = "weibull")
  ld <- layer_data(g)
  expect_equal(nrow(ld), 512)
  max <- which.max(ld$y)
  expect_lt(abs(median(d$x) - ld$x[max]), 0.5)
  expect_true(all(diff(ld$y[max:512]) < 0))
  expect_true(all(diff(ld$y[1:max]) > 0))
})

test_that("stat_theodensity fits logistic", {
  set.seed(1)
  d <- data.frame(x = rlogis(500, 2, 2))
  g <- ggplot(d, aes(x = x)) +
    stat_theodensity(distri = "logis")
  ld <- layer_data(g)
  expect_equal(nrow(ld), 512)
  max <- which.max(ld$y)
  expect_lt(abs(median(d$x) - ld$x[max]), 0.5)
  expect_true(all(diff(ld$y[max:512]) < 0))
  expect_true(all(diff(ld$y[1:max]) > 0))
})