tests/testthat/test.stat_sample_slabinterval.R

# Tests for sample distribution plots
#
# Author: mjskay
###############################################################################

suppressWarnings(suppressPackageStartupMessages({
  library(dplyr)
  library(tidyr)
}))


mapped_discrete = getFromNamespace("mapped_discrete", "ggplot2")

test_that("gradientinterval works", {
  skip_if_no_vdiffr()


  set.seed(1234)
  p = tribble(
    ~dist,  ~x,
    "norm", rnorm(100),
    "t",    rt(100, 3)
  ) %>%
    unnest(x) %>%
    ggplot() +
    scale_slab_alpha_continuous(range = c(0,1))

  vdiffr::expect_doppelganger("gradientinterval with two groups",
    p + stat_gradientinterval(aes(x = dist, y = x), n = 15, fill_type = "segments")
  )
  vdiffr::expect_doppelganger("gradientintervalh with two groups",
    p + stat_gradientinterval(aes(y = dist, x = x), n = 15, fill_type = "segments")
  )

})

test_that("fill_type = 'gradient' works", {
  skip_if_no_vdiffr()
  skip_if_no_gradient()


  set.seed(1234)
  p = tribble(
    ~dist,  ~x,
    "norm", rnorm(100),
    "t",    rt(100, 3)
  ) %>%
    unnest(x) %>%
    ggplot() +
    scale_slab_alpha_continuous(range = c(0,1))

  vdiffr::expect_doppelganger("fill_type = gradient with two groups",
    p + stat_gradientinterval(aes(x = dist, y = x), n = 15, fill_type = "gradient"),
    writer = write_svg_with_gradient
  )
  vdiffr::expect_doppelganger("fill_type = auto with two groups, h",
    p + stat_gradientinterval(aes(y = dist, x = x), n = 15, fill_type = "auto"),
    writer = write_svg_with_gradient
  )

})

test_that("histinterval outline works", {
  skip_if_no_vdiffr()


  set.seed(1234)
  p = tribble(
    ~dist,  ~x,
    "norm", rnorm(100),
    "t",    rt(100, 3)
  ) %>%
    unnest(x) %>%
    ggplot()

  vdiffr::expect_doppelganger("histinterval with outline",
    p + stat_histinterval(aes(x = dist, y = x), slab_color = "black")
  )
  vdiffr::expect_doppelganger("histintervalh with outline",
    p + stat_histinterval(aes(y = dist, x = x), slab_color = "black", breaks = seq(-5, 7, length.out = 20))
  )
  vdiffr::expect_doppelganger("histinterval with outlines bw bars",
    p + stat_histinterval(aes(x = dist, y = x), slab_color = "black", outline_bars = TRUE)
  )
})

test_that("slab outline works", {
  skip_if_no_vdiffr()
  skip_if_sensitive_to_density()


  set.seed(1234)
  p = tribble(
    ~dist,  ~x,
    "norm", rnorm(100),
    "t",    rt(100, 3)
  ) %>%
    unnest(x) %>%
    ggplot()

  vdiffr::expect_doppelganger("slab with outline",
    p + stat_slab(aes(x = dist, y = x), n = 20, slab_color = "black")
  )

})

test_that("scale transformation works", {
  skip_if_no_vdiffr()


  p_log = data.frame(x = 10^c(-1, -0.55, -0.35, -0.15, -0.05, -0.01, 0.01, 0.05, 0.15, 0.35, 0.55, 1)) %>%
    ggplot(aes(y = 0, x = x)) +
    scale_x_log10(breaks = 10^seq(-2,2), limits = 10^c(-2,2))

  vdiffr::expect_doppelganger("ccdfintervalh log scale transform",
    p_log + stat_ccdfinterval(point_interval = mean_hdi, n = 100, .width = .5, scale = 1, slab_color = "black") +
      geom_point()
  )

  vdiffr::expect_doppelganger("cdfintervalh log scale transform",
    p_log + stat_cdfinterval(point_interval = mean_hdi, n = 100, .width = .5, scale = 1, slab_color = "black") +
      geom_point()
  )

  vdiffr::expect_doppelganger("histintervalh log scale transform",
    p_log + stat_histinterval(point_interval = median_qi, .width = .5)
  )

})

test_that("scale transformation works on halfeye", {
  skip_if_no_vdiffr()
  skip_if_sensitive_to_density()


  p_log = data.frame(x = 10^c(-1, -0.55, -0.35, -0.15, -0.05, -0.01, 0.01, 0.05, 0.15, 0.35, 0.55, 1)) %>%
    ggplot(aes(y = 0, x = x)) +
    scale_x_log10(breaks = 10^seq(-1,1))

  vdiffr::expect_doppelganger("halfeyeh log scale tri",
    p_log +
      stat_halfeye(
        point_interval = mode_hdci, n = 20,
        density = density_unbounded(kernel = "tri"),
        .width = .5
      ) +
      geom_point(data = data.frame(x = 10^c(-1, 1)))
  )

  vdiffr::expect_doppelganger("halfeyeh log scale tri no trim",
    p_log +
      stat_halfeye(
        point_interval = mode_hdci, n = 20,
        density = density_unbounded(kernel = "tri"), trim = FALSE,
        .width = .5
      ) +
      geom_point(data = data.frame(x = 10^c(-1, 1)))
  )

})

test_that("pdf and cdf aesthetics work", {
  skip_if_no_vdiffr()
  skip_if_sensitive_to_density()


  p = data.frame(
    x = c("a", "b"),
    y = qnorm(ppoints(100), c(1, 2), 2),
    stringsAsFactors = FALSE
  ) %>%
    ggplot(aes(x = x, y = y))

  vdiffr::expect_doppelganger("pdf and cdf on a sample slabinterval",
    p + stat_sample_slabinterval(aes(fill = x, thickness = after_stat(pdf), slab_alpha = after_stat(cdf)), n = 15)
  )
})

test_that("constant distributions work", {
  skip_if_no_vdiffr()


  # constant dist when n != 1
  p = data.frame(
    x = c("constant = 1", "constant = 2", "constant = 3"),
    y = rep(c(0, 1, 2), times = 10),
    stringsAsFactors = FALSE
  ) %>%
    ggplot(aes(x = x, y = y))

  vdiffr::expect_doppelganger("constant dist on halfeye",
    p + stat_halfeye(n = 15, slab_color = "blue")
  )

  vdiffr::expect_doppelganger("constant dist on histinterval",
    p + stat_histinterval(n = 15, slab_color = "blue")
  )

  vdiffr::expect_doppelganger("constant dist on ccdf",
    p + stat_ccdfinterval(trim = FALSE)
  )

  # constant dist when n = 1
  p = tibble(
    x = c("constant = 1", "constant = 2", "constant = 3"),
    y = c(0, 1, 2)
  ) %>%
    ggplot(aes(x = x, y = y))

  vdiffr::expect_doppelganger("constant dist on halfeye with n = 1",
    p + stat_sample_slabinterval(n = 15, slab_color = "blue")
  )

  vdiffr::expect_doppelganger("constant dist on ccdf with n = 1",
    p + stat_ccdfinterval()
  )
})

test_that("side and justification can vary", {
  skip_if_no_vdiffr()

  vdiffr::expect_doppelganger("varying side and just",
    mtcars %>%
      ggplot(aes(x = mpg, y = cyl,
        side = case_when(cyl == 4 ~ "top", cyl == 6 ~ "both", cyl == 8 ~ "bottom"),
        justification = case_when(cyl == 4 ~ 0, cyl == 6 ~ 0.5, cyl == 8 ~ 1),
        scale = case_when(cyl == 4 ~ 0.5, cyl == 6 ~ 1, cyl == 8 ~ 0.5)
      )) +
      stat_histinterval(orientation = "horizontal", normalize = "groups", n = 15)
  )
})

test_that("n is calculated correctly", {
  skip_if_no_vdiffr()

  set.seed(1234)
  df = data.frame(
    g = c("a", "a", "a", "b", "c"),
    x = rnorm(15, c(1, 1, 1, 2, 3)),
    stringsAsFactors = FALSE
  )

  ld = layer_data(
    df %>%
      ggplot(aes(x = x, y = g, thickness = after_stat(pdf*n), fill = after_stat(n))) +
      stat_sample_slabinterval(n = 2)
  )

  expect_equal(
    ld[ld$datatype == "slab", c("y","group","n")],
    data.frame(y = c(1,1,2,2,3,3), group = c(1,1,2,2,3,3), n = c(9,9,3,3,3,3)),
    ignore_attr = c("row.names", "class")
  )

  expect_equal(
    ld[ld$datatype == "interval", c("y","group","n")],
    data.frame(y = c(1,1,2,2,3,3), group = c(1,1,2,2,3,3), n = NA_real_),
    ignore_attr = c("row.names", "class")
  )
})


# missing data is handled correctly ---------------------------------------

test_that("NAs are handled correctly", {
  skip_if_no_vdiffr()

  p = data.frame(x = c(1:5000, NA)) %>%
    ggplot(aes(x = x, y = "a"))

  expect_warning(
    vdiffr::expect_doppelganger("NAs with na.rm = FALSE",
      p + stat_cdfinterval(na.rm = FALSE, n = 5)
    ),
    "Removed 1 row"
  )

  vdiffr::expect_doppelganger("NAs with na.rm = TRUE",
    p + stat_cdfinterval(na.rm = TRUE, n = 5)
  )
})


# trim and expand ---------------------------------------------------------

test_that("trim and expand work", {
  skip_if_no_vdiffr()
  skip_if_sensitive_to_density()

  set.seed(1234)
  df = data.frame(
    g = c("a", "a", "a", "b", "c"),
    x = rnorm(120, c(1, 1, 1, 2, 3)),
    stringsAsFactors = FALSE
  )

  vdiffr::expect_doppelganger("untrimmed and expanded",
    df %>%
      ggplot(aes(x = x, y = g)) +
      stat_sample_slabinterval(n = 15, slab_color = "black", expand = TRUE, trim = FALSE)
  )
})

test_that("expand can take length two vector", {

  df = tibble(
    g = c("a", "a", "b", "b"),
    x = c(1, 2, 2, 3)
  )

  p = df %>%
    ggplot(aes(x = x, y = g)) +
    lims(x = c(0, 4))

  ld = layer_data(p + stat_ccdfinterval(expand = c(TRUE, TRUE)))
  expect_equal(min(ld$x), 0)
  expect_equal(max(ld$x), 4)

  ld = layer_data(p + stat_ccdfinterval(
    expand = c(TRUE, FALSE),
    density = density_bounded(bounder = "range")
  ))
  expect_equal(min(ld$x), 0)
  expect_equal(max(ld$x), 3)

  ld = layer_data(p + stat_ccdfinterval(
    expand = c(FALSE, TRUE),
    density = density_bounded(bounder = "range")
  ))
  expect_equal(min(ld$x), 1)
  expect_equal(max(ld$x), 4)

  ld = layer_data(p + stat_ccdfinterval(
    expand = c(FALSE, FALSE),
    density = density_bounded(bounder = "range")
  ))
  expect_equal(min(ld$x), 1)
  expect_equal(max(ld$x), 3)
})


# discrete distributions --------------------------------------------------

test_that("characters work", {
  p = ggplot_build(
    ggplot() +
      stat_slabinterval(aes(x = c("a","a","a","b","b","c"), group = NA))
  )

  slab_ref = data.frame(
    thickness = c(3,3,3,3,3,3, 2,2,2,2,2,2, 1,1,1,1,1,1)/6,
    pdf = c(3,3,3,3,3,3, 2,2,2,2,2,2, 1,1,1,1,1,1)/6,
    cdf = c(0,0,0, 3,3,3,3,3,3, 5,5,5,5,5,5, 6,6,6)/6,
    f = c(3,3,3,3,3,3, 2,2,2,2,2,2, 1,1,1,1,1,1)/6,
    n = 6,
    datatype = "slab",
    .width = c(NA,NA, .66,.66,.66,.66,.66,.66,.66,.66, .95,.95,.95,.95, NA,NA,NA,NA),
    stringsAsFactors = FALSE
  )
  slab_ref$x = mapped_discrete(c(.5,.5, 1,1, 1.5,1.5,1.5,1.5, 2,2, 2.5,2.5,2.5,2.5, 3,3, 3.5,3.5))
  expect_equal(p$data[[1]][p$data[[1]]$datatype == "slab", names(slab_ref)], slab_ref)

  interval_ref = data.frame(
    datatype = "interval",
    .width = c(0.66, 0.95),
    stringsAsFactors = FALSE
  )
  interval_ref$xmin = mapped_discrete(c(1, 1))
  interval_ref$xmax = mapped_discrete(c(2.15, 2.875))
  interval_ref$x = mapped_discrete(c(1.5, 1.5))
  attr(interval_ref, "row.names") = c(19L, 20L)
  expect_equal(p$data[[1]][p$data[[1]]$datatype == "interval", names(interval_ref)], interval_ref)
})

test_that("logical conditions at bin edges on histograms work", {
  p = ggplot() +
    stat_slab(
      aes(x = c(1,1,2,2,2,3,3,3,3), fill = after_stat(x > 1.5)),
      density = "histogram",
      breaks = breaks_fixed(width = 1),
      align = "center"
    ) +
    scale_fill_manual(values = c("red", "blue"))

  ref = data.frame(
    x = c(0.5, 0.5, 1, 1, 1.5, 1.5, 1.5, 1.5, 2, 2, 2.5, 2.5, 2.5, 2.5,  3, 3, 3.5, 3.5),
    fill = c(rep("red", 7), rep("blue", 11)),
    stringsAsFactors = FALSE
  )
  expect_equal(layer_data(p)[,c("x", "fill")], ref)

  # with outline
  p = ggplot() +
    stat_slab(
      aes(x = c(1,1,2,2,2,3,3,3,3), fill = after_stat(x > 1.5)),
      density = density_histogram(),
      breaks = breaks_fixed(width = 1),
      align = "center",
      outline_bars = TRUE,
      color = "black"
    ) +
    scale_fill_manual(values = c("red", "blue"))

  ref = data.frame(
    x = c(0.5, 0.5, 0.5, 1, 1, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 2, 2, 2.5,  2.5, 2.5, 2.5, 2.5, 2.5, 3, 3, 3.5, 3.5, 3.5),
    fill = c(rep("red", 10), rep("blue", 14)),
    stringsAsFactors = FALSE
  )
  expect_equal(layer_data(p)[,c("x", "fill")], ref)
})


# weights -----------------------------------------------------------------

test_that("sample weights work", {
  df = data.frame(
    x = 1:9/10,
    w = 0:8
  )

  p = ggplot(df, aes(x, weight = w))

  ld_slab = layer_data(p + stat_slab(n = 9, density = "unbounded", .width = .5))
  expect_equal(
    ld_slab$pdf,
    density(df$x, weights = df$w/36, bw = bandwidth_dpi(df$x), n = 9, cut = 0)$y
  )
  expect_equal(ld_slab$cdf, cumsum(df$w/36))
  expect_equal(ld_slab$.width, c(rep(NA, 5), rep(0.5, 3), NA))

  ld_interval = layer_data(p + stat_pointinterval(.width = .5))
  expect_equal(ld_interval$x, 0.7)
  expect_equal(ld_interval$xmin, weighted_quantile(df$x, 0.25, weights = df$w, names = FALSE))
  expect_equal(ld_interval$xmax, weighted_quantile(df$x, 0.75, weights = df$w, names = FALSE))
  ld_interval_mean = layer_data(p + stat_pointinterval(.width = .5, point_interval = mean_qi))
  expect_equal(ld_interval_mean$x, 2/3)
})


# deprecated params -------------------------------------------------------

test_that("slab_type throws appropriate warnings and errors", {
  expect_warning(
    expect_warning(
      layer_data(ggplot() + stat_slab(aes(1:5), slab_type = "xx")),
      "slab_type.*is deprecated"
    ),
    'Unknown `slab_type`: "xx"'
  )
})

Try the ggdist package in your browser

Any scripts or data that you put into this service are public.

ggdist documentation built on July 4, 2024, 9:08 a.m.