rvnorm: The Variety Normal Distribution
In dkahle/algstat: Algebraic Statistics

rvnorm

R Documentation

The Variety Normal Distribution

Description

Un-normalized density function and random generation for the variety normal distribution with mean equal to poly and "standard deviation" equal to sd. Please see details for caveats.

Usage

rvnorm(
  n,
  poly,
  sd,
  output = "simple",
  chains = 4L,
  warmup = floor(n/2),
  keep_warmup = FALSE,
  thin = 1L,
  inject_direct = FALSE,
  verbose = FALSE,
  cores = getOption("mc.cores", 1L),
  normalized = TRUE,
  w,
  vars,
  numerator,
  denominator,
  refresh,
  code_only = FALSE,
  ...
)

Arguments

`n`	The number of draws desired from each chain after warmup.
`poly`	An mpoly object.
`sd`	The "standard deviation" component of the normal kernel.
`output`	`"simple"`, `"tibble"`, `"stanfit"`.
`chains`	The number of chains to run for the random number generation, see `stan()`.
`warmup`	Number of warmup iterations in `stan()`.
`keep_warmup`	If `TRUE`, the MCMC warmup steps are included in the output.
`thin`	`stan()` `thin` parameter.
`inject_direct`	Directly specify printed polynomial to string inject into the stan code. Requires you specify `vars`, `numerator`, and `denominator`.
`verbose`	`TRUE` or `FALSE`; determines level of messaging.
`cores`	The number of CPU cores to distribute the chains across, see `stan()`.
`normalized`	If `TRUE`, the polynomial is gradient-normalized. This is highly recommended.
`w`	A named list of box constraints for vectors to be passed to Stan, see examples. A If a single number, a box window (-w,w) is applied to all variables.
`vars`	A character vector of the indeterminates in the distribution.
`numerator, denominator`	A character(1) containing the printed numerator of the variety normal distribution.
`refresh`	The `refresh` argument of `stan()`, which governs how much information is provided to the user while sampling.
`code_only`	If `TRUE`, will only formulate and return Stan code.
`...`	Additional parameters to pass to `stan()`.

Details

If the variety you are interested in is connected, this strategy should work well out of the box. If it isn't, you'll likely need to rely on running multiple chains, and it is very likely, if not probable, that the sampling will be biased to one or more of those components and down-sample others. Question: what is the relative likelihood of each component, or an equal unit of length, on different components? How does this generalize to more varieties of varying dimensions?

Value

Either (1) matrix whose rows are the individual draws from the distribution, (2) a tbl_df object with the draws along with additional information, or (3) an object of class stanfit.

Author(s)

David Kahle

Examples


## Not run:  runs rstan

library("tidyverse")
rstan::rstan_options(auto_write = TRUE) # helps avoid recompiles

## basic usage
########################################

# single polynomial
p <- mp("x^2 + y^2 - 1")
samps <- rvnorm(2000, p, sd = .1)
head(samps)
str(samps) # 2000 * (4 chains)
plot(samps, asp = 1)

# returning a data frame
(samps <- rvnorm(2000, p, sd = .1, output = "tibble"))
ggplot(samps, aes(x, y)) + geom_point(size = .5) + coord_equal()

ggplot(samps, aes(x, y, color = g)) +
  geom_point(size = .5) +
  scale_color_gradient2() +
  coord_equal()
  
ggplot(samps, aes(x, y)) + 
  stat_density2d(
    aes(fill = stat(density)), 
    geom = "raster", contour = FALSE
   ) + 
  coord_equal()
  
library("ggdensity")
ggvariety(p) + coord_equal()
ggplot(samps, aes(x, y)) + 
  geom_hdr() +
  coord_equal()



# more than one polynomial, # vars > # eqns, underdetermined system
p <- mp(c("x^2 + y^2 + z^2 - 1", "z"))
(samps <- rvnorm(500, p, sd = .1, output = "tibble"))
ggplot(samps, aes(x, y)) + geom_point(size = .5) + coord_equal()

ggplot(samps, aes(x, y, color = `g[1]`)) + geom_point() +
  scale_color_gradient2(mid = "gray80") + coord_equal()

ggplot(samps, aes(x, y, color = `g[2]`)) + geom_point() +
  scale_color_gradient2(mid = "gray80") + coord_equal()

ggplot(samps, aes(x, z, color = `g[1]`)) + geom_point() +
  scale_color_gradient2(mid = "gray80") + coord_equal()



# more than one polynomial, # vars < # eqns, overdetermined system
p <- mp(c("3 x", "3 y", "2 x + 2 y", "3 (x^2 + y)", "3 (x^2 - y)"))
(samps <- rvnorm(500, p, sd = .1, output = "tibble"))

samps %>% 
  select(x, y, starts_with("g")) %>% 
  pivot_longer(starts_with("g"), "equation", "value") %>% 
  ggplot(aes(x, y, color = value)) + geom_point() +
    scale_color_gradient2(mid = "gray80") + coord_equal() +
    facet_wrap(~ equation)
    

## using refresh to get more info
########################################

rvnorm(2000, p, sd = .1, "tibble", verbose = TRUE)
rvnorm(2000, p, sd = .1, "tibble", refresh = 500)
rvnorm(2000, p, sd = .1, "tibble", refresh = 0) # default
rvnorm(2000, p, sd = .1, "tibble", refresh = -1)



## many chains in parallel
########################################

options(mc.cores = parallel::detectCores())
p <- mp("x^2 + (4 y)^2 - 1")
(samps <- rvnorm(1e4, p, sd = .01, "tibble", verbose = TRUE, chains = 8))
ggplot(samps, aes(x, y)) + geom_bin2d(binwidth = .01*c(1,1)) + coord_equal()
# decrease sd to get more uniform sampling


## windowing for unbounded varieties
########################################
# windowing is needed for unbounded varieties
# in the following, look at the parameters block

p <- mp("x y - 1") # unbounded variety, 1 poly
p <- mp(c("x y - 1", "y - x")) # 2 polys

rvnorm(1e3, p, sd = .01, "tibble", code_only = TRUE)
rvnorm(1e3, p, sd = .01, "tibble", code_only = TRUE, w = 1.15)

window <- list("x" = c(-1.5, 1.25), "y" = c(-2, 1.5))
rvnorm(1e3, p, sd = .01, "tibble", code_only = TRUE, w = window)

window <- list("x" = c(-1.5, 1.5))
rvnorm(1e3, p, sd = .01, "tibble", code_only = TRUE, w = window)



## the importance of normalizing
########################################
# one of the effects of the normalizing is to stabilize variances, making
# them roughly equivalent globally over the variety.

# lemniscate of bernoulli
p <- mp("(x^2 + y^2)^2 - 2 (x^2 - y^2)")

# normalized, good
(samps <- rvnorm(2000, p, .05, "tibble"))
ggplot(samps, aes(x, y)) + geom_point(size = .5) + coord_equal()
ggplot(samps, aes(x, y)) + geom_bin2d(binwidth = .05*c(1,1)) + coord_equal()

# unnormalized, bad
(samps <- rvnorm(2000, p, .05, "tibble", normalized = FALSE))
ggplot(samps, aes(x, y)) + geom_point(size = .5) + coord_equal()
ggplot(samps, aes(x, y)) + geom_bin2d(binwidth = .05*c(1,1)) + coord_equal()



## semi-algebraic sets
########################################
# inside the semialgebraic set x^2 + y^2 <= 1
# this is the same as x^2 + y^2 - 1 <= 0, so that
# x^2 + y^2 - 1 + s^2 == 0 for some slack variable s
# this is the projection of the sphere into the xy-plane.

p <- mp("1 - (x^2 + y^2) - s^2")
samps <- rvnorm(1e4, p, sd = .1, "tibble", chains = 8, refresh = 1e3)
ggplot(samps, aes(x, y)) + geom_bin2d(binwidth = .05*c(1,1)) + coord_equal()

ggplot(sample_n(samps, 2e3), aes(x, y, color = s)) +
  geom_point(size = .5) +
  scale_color_gradient2() +
  coord_equal()

# alternative representation
# x^2 + y^2 - 1 <= 0 iff s^2 (x^2 + y^2 - 1) + 1 == 0
# note that it's gradient is more complicated.
p <- mp("s^2 (x^2 + y^2 - 1) + 1")
samps <- rvnorm(1e4, p, sd = .1, "tibble", chains = 8, w = 2, refresh = 1e3)
ggplot(samps, aes(x, y)) + geom_bin2d(binwidth = .05*c(1,1)) + coord_equal()


## keeping the warmup / the importance of multiple chains
########################################

p <- mp("((x + 1.5)^2 + y^2 - 1) ((x - 1.5)^2 + y^2 - 1)")
ggvariety(p, xlim = c(-3,3)) + coord_equal()

# notice the migration of chains initialized away from the distribution
# (it helps to make the graphic large on your screen)
samps <- rvnorm(500, p, sd = .05, "tibble", chains = 8, keep_warmup = TRUE)
ggplot(samps, aes(x, y, color = iter)) +
  geom_point(size = 1, alpha = .5) + geom_path(alpha = .2) +
  coord_equal() + facet_wrap(~ factor(chain))
  
samps <- rvnorm(2500, p, sd = .05, "tibble", chains = 8, keep_warmup = TRUE)
ggplot(samps, aes(x, y)) + geom_bin2d(binwidth = .05*c(1,1)) + 
  coord_equal() + facet_wrap(~ factor(chain))


## ideal-variety correspondence considerations
########################################

p <- mp("x^2 + y^2 - 1")

samps_1 <- rvnorm(250, p^1, sd = .1, output = "tibble", chains = 8)
samps_2 <- rvnorm(250, p^2, sd = .1, output = "tibble", chains = 8)
# samps_3 <- rvnorm(250, p^3, sd = .1, output = "tibble", chains = 8)
# samps_4 <- rvnorm(250, p^4, sd = .1, output = "tibble", chains = 8)
samps <- bind_rows(mget(apropos("samps_[1-4]")))
samps$power <- rep(seq_along(apropos("samps_[1-4]")), each = 2000)

ggplot(samps, aes(x, y, color = g < 0)) +
  geom_point(size = .5) +
  coord_equal(xlim = c(-3,3), ylim = c(-3,3)) +
  facet_wrap(~ power)


## neat examples
########################################
# an implicit Lissajous region, view in separate window large

# x = cos(m t + p)
# y = sin(n t + q)
(p <- lissajous(3, 2,  -pi/2, 0))
(p <- lissajous(4, 3,  -pi/2, 0))
(p <- lissajous(5, 4,  -pi/2, 0))
(p <- lissajous(3, 3,  0, 0))
(p <- lissajous(5, 5,  0, 0))
(p <- lissajous(7, 7,  0, 0))
ggvariety(p, n = 201) + coord_equal()

p <- plug(p, "x", mp(".5 x"))
p <- plug(p, "y", mp(".5 y"))

# algebraic set
samps <- rvnorm(5e3, p, sd = .01, "tibble", chains = 8, refresh = 100)
ggplot(samps, aes(x, y, color = factor(chain))) +
  geom_point(size = .5) + coord_equal()
ggplot(samps, aes(x, y)) + geom_bin2d(binwidth = .02*c(1,1)) + coord_equal()
  
ggplot(samps, aes(x, y, color = factor(chain))) +
  geom_point(size = .5) + coord_equal() +
  facet_wrap(~ factor(chain))

# semi-algebraic set
samps_normd <- rvnorm(1e4, p + mp("s^2"), sd = .01, "tibble", chains = 8,
  normalized = TRUE, refresh = 100
)
samps_unormd <- rvnorm(1e4, p + mp("s^2"), sd = .01, "tibble", chains = 8,
  normalized = FALSE, refresh = 100
)

bind_rows(
  samps_normd  %>% mutate(normd = TRUE),
  samps_unormd %>% mutate(normd = FALSE)
) %>%
  ggplot(aes(x, y)) +
    geom_bin2d(binwidth = .05*c(1,1)) + 
    facet_grid(normd ~ chain) +
    coord_equal()


ggplot(samps_normd, aes(x, y)) + 
  geom_bin2d(binwidth = .05*c(1,1)) + coord_equal()


## End(Not run)

dkahle/algstat documentation built on May 23, 2023, 12:29 a.m.