In vincenzocoia/distplyr: Manipulation of Univariate Distributions

# Loading in all relevant packages and distplyr
library(devtools)
library(tidyverse)
load_all()

# Setting a seed to make result reproducible
set.seed(2021)

# For Inverse Function

# Making test distribution and inverting said distribution
d1 <- dst_norm(2, 5)
d2 <- invert(d1)

# To test the invert functions, we are going to be making plots
d2_points <- realise(d2, n = 10000)
emp <- dst_empirical(d2_points)
cdfs <- enframe_cdf(d2, at = seq(-5, 5, length.out = 1000), fn_name = "d2") %>%
    mutate(emp = eval_cdf(emp, at = .arg)) %>%
    pivot_longer(c(d2, emp), names_to = "distribution", values_to = "cdf")
ggplot(cdfs, aes(.arg, cdf)) +
    geom_line(aes(group = distribution, colour = distribution)) +
    theme_minimal()

enframe_density(d2, at = seq(-5, 5, length.out = 1000), fn_name = "d2") %>%
ggplot() +
    geom_line(aes(.arg, d2), color = "blue") +
    theme_minimal() +
    geom_density(data = tibble(d2_points = d2_points), mapping = aes(x = d2_points), color = "orange") +
  xlim(-5, 5)


d1 <- dst_pois(6) + 1
d2 <- invert(d1)
d2_points <- realise(d2, n = 5000)
emp <- dst_empirical(d2_points)

# enframing a pois distribution doesnt work keep getting 0
pmf <- enframe_pmf(d2, at = 1 / seq(1, 50), fn_name = "d2") %>%
    mutate(emp = eval_pmf(emp, at = .arg, strict = FALSE)) %>%
    pivot_longer(c(d2, emp), names_to = "distribution", values_to = "pmf")
ggplot(pmf, aes(.arg, pmf)) +
    geom_point(aes(group = distribution, colour = distribution)) +
    theme_minimal()

# For shift distribution
# Making test distribution and shifting said distribution
d1 <- dst_norm(2, 5)
d2 <- d1 + 1

# To test the shifting functions, we are going to be making plots
d2_points <- realise(d2, n = 10000)
emp <- dst_empirical(d2_points)
cdfs <- enframe_cdf(d2, at = seq(-15, 15, length.out = 10000), fn_name = "d2") %>%
    mutate(emp = eval_cdf(emp, at = .arg)) %>%
    pivot_longer(c(d2, emp), names_to = "distribution", values_to = "cdf")
ggplot(cdfs, aes(.arg, cdf)) +
    geom_line(aes(group = distribution, colour = distribution)) +
    theme_minimal()

# Check if correct since the change of parameters leads to high change in shape
enframe_density(d2, at = seq(-15, 15, length.out = 10000), fn_name = "d2") %>%
ggplot() +
    geom_line(aes(.arg, d2), color = "orange") +
    theme_minimal() +
    geom_density(data = tibble(d2_points = d2_points), mapping = aes(x = d2_points), color = "blue") +
  xlim(-15, 15)


d2 <- dst_pois(6) + 1
d2_points <- realise(d2, n = 5000)
emp <- dst_empirical(d2_points)

# enframing a pois distribution doesnt work keep getting 0
pmf <- enframe_pmf(d2, at = seq(1, 50), fn_name = "d2") %>%
    mutate(emp = eval_pmf(emp, at = .arg, strict = FALSE)) %>%
    pivot_longer(c(d2, emp), names_to = "distribution", values_to = "pmf")

ggplot(pmf, aes(.arg, pmf)) +
    geom_point(aes(group = distribution, colour = distribution)) +
    theme_minimal()

For scale distribution

Making test distribution and scaling said distribution

d1 <- dst_norm(2, 2)
d2 <- d1 * 3

# To test the scaling functions, we are going to be making plots
d2_points <- realise(d2, n = 10000)
emp <- dst_empirical(d2_points)
cdfs <- enframe_cdf(d2, at = seq(-15, 25, length.out = 10000), fn_name = "d2") %>%
    mutate(emp = eval_cdf(emp, at = .arg)) %>%
    pivot_longer(c(d2, emp), names_to = "distribution", values_to = "cdf")
ggplot(cdfs, aes(.arg, cdf)) +
    geom_line(aes(group = distribution, colour = distribution)) +
    theme_minimal()

# Check if correct since the change of parameters leads to high change in shape
enframe_density(d2, at = seq(-15, 25, length.out = 10000), fn_name = "d2") %>%
ggplot() +
    geom_line(aes(.arg, d2), color = "orange") +
    theme_minimal() +
    geom_density(data = tibble(d2_points = d2_points), mapping = aes(x = d2_points), color = "blue") +
  xlim(-15, 25)

d2 <- dst_pois(2) * 2 + 1
d2_points <- realise(d2)
emp <- dst_empirical(d2_points)

# enframing a pois distribution doesnt work keep getting 0
pmf <- enframe_pmf(d2, at = -seq(1, 50), fn_name = "d2") %>%
    mutate(emp = eval_pmf(emp, at = .arg, strict = FALSE)) %>%
    pivot_longer(c(d2, emp), names_to = "distribution", values_to = "pmf")

pmf

ggplot(pmf, aes(.arg, pmf)) +
    geom_point(aes(group = distribution, colour = distribution)) +
    theme_minimal()

# For negating distribution
# Making test distribution and negating said distribution
d1 <- dst_norm(2, 5)
d2 <- -d1

# To test the flip functions, we are going to be making plots
d2_points <- realise(d2, n = 10000)
emp <- dst_empirical(d2_points)
cdfs <- enframe_cdf(d2, at = seq(-15, 15, length.out = 10000), fn_name = "d2") %>%
    mutate(emp = eval_cdf(emp, at = .arg)) %>%
    pivot_longer(c(d2, emp), names_to = "distribution", values_to = "cdf")
ggplot(cdfs, aes(.arg, cdf)) +
    geom_line(aes(group = distribution, colour = distribution)) +
    theme_minimal()

# Check if correct since the change of parameters leads to high change in shape
enframe_density(d2, at = seq(-15, 15, length.out = 10000), fn_name = "d2") %>%
ggplot() +
    geom_line(aes(.arg, d2), color = "orange") +
    theme_minimal() +
    geom_density(data = tibble(d2_points = d2_points), mapping = aes(x = d2_points), color = "blue") +
  xlim(-15, 15)


d2 <- -dst_pois(2)
d2_points <- realise(d2, n = 50)
emp <- dst_empirical(d2_points)

# enframing a pois distribution doesnt work keep getting 0
pmf <- enframe_pmf(d2, at = seq(1, 50), fn_name = "d2") %>%
    mutate(emp = eval_pmf(emp, at = .arg, strict = FALSE)) %>%
    pivot_longer(c(d2, emp), names_to = "distribution", values_to = "pmf")

ggplot(pmf, aes(.arg, pmf)) +
    geom_point(aes(group = distribution, colour = distribution)) +
    theme_minimal()

# For transforming a distribution
# Making test distribution and negating said distribution
d1 <- dst_norm(2, 5)
d2 <- invert((-2 * d1) + 6)

# To test the flip functions, we are going to be making plots
d2_points <- realise(d2, n = 10000)
emp <- dst_empirical(d2_points)
cdfs <- enframe_cdf(d2, at = seq(-15, 15, length.out = 10000), fn_name = "d2") %>%
    mutate(emp = eval_cdf(emp, at = .arg)) %>%
    pivot_longer(c(d2, emp), names_to = "distribution", values_to = "cdf")
ggplot(cdfs, aes(.arg, cdf)) +
    geom_line(aes(group = distribution, colour = distribution)) +
    theme_minimal()

# Check if correct since the change of parameters leads to high change in shape
enframe_density(d2, at = seq(-15, 15, length.out = 10000), fn_name = "d2") %>%
ggplot() +
    geom_line(aes(.arg, d2), color = "orange") +
    theme_minimal() +
    geom_density(data = tibble(d2_points = d2_points), mapping = aes(x = d2_points), color = "blue") +
  xlim(-1, 1)

# For transforming a distribution
# Making test distribution and negating said distribution
d1 <- dst_gpd(-2, 5, 2)
d2 <- d1 + 2

# To test the flip functions, we are going to be making plots
d2_points <- realise(d2, n = 100000)
emp <- dst_empirical(d2_points)
cdfs <- enframe_cdf(d2, at = seq(-15, 15, length.out = 100000), fn_name = "d2") %>%
    mutate(emp = eval_cdf(emp, at = .arg)) %>%
    pivot_longer(c(d2, emp), names_to = "distribution", values_to = "cdf")
ggplot(cdfs, aes(.arg, cdf)) +
    geom_line(aes(group = distribution, colour = distribution)) +
    theme_minimal()

# Check if correct since the change of parameters leads to high change in shape
enframe_density(d2, at = seq(-15, 15, length.out = 10000), fn_name = "d2") %>%
ggplot() +
    geom_line(aes(.arg, d2), color = "orange") +
    theme_minimal() +
    geom_density(data = tibble(d2_points = d2_points), mapping = aes(x = d2_points), color = "blue") +
  xlim(-15, 15)

# For transforming a distribution
# Making test distribution and negating said distribution
d1 <- dst_gpd(-2, 5, 4)
d2 <- invert(d1)

# To test the flip functions, we are going to be making plots
d2_points <- realise(d2, n = 100000)
emp <- dst_empirical(d2_points)
cdfs <- enframe_cdf(d2, at = seq(-15, 15, length.out = 100000), fn_name = "d2") %>%
    mutate(emp = eval_cdf(emp, at = .arg)) %>%
    pivot_longer(c(d2, emp), names_to = "distribution", values_to = "cdf")
ggplot(cdfs, aes(.arg, cdf)) +
    geom_line(aes(group = distribution, colour = distribution)) +
    theme_minimal()

# Check if correct since the change of parameters leads to high change in shape
enframe_density(d2, at = seq(-15, 15, length.out = 10000), fn_name = "d2") %>%
ggplot() +
    geom_line(aes(.arg, d2), color = "orange") +
    theme_minimal() +
    geom_density(data = tibble(d2_points = d2_points), mapping = aes(x = d2_points), color = "blue") +
  xlim(-1, 1)