vignettes/Points-Inside-Outside-Ellipse.R

## ----echo = FALSE-------------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>", 
                      fig.width = 6, fig.height = 5)


## ----gendata, echo = TRUE-----------------------------------------------------

library(dplyr)
library(magrittr)
library(purrr)
library(SIBER)
library(ggplot2)

# set the random seed generator so we get consistent results each time 
# we run this code.
set.seed(3)

# n random numbers
n <- 100

# some random multivariate data
Y <- generateSiberGroup(n.obs = n)


## ----warpdata, echo = TRUE----------------------------------------------------

# plot this example data with column 2 by column 1
# plot(Y[,2] ~ Y[,1], type = "n", asp = 1,
#      xlim = c(-10, 10),
#      ylim = c(-10, 10))

plot(Y[,2] ~ Y[,1], type = "n", asp = 1)

# add an ellipse, in this case a 95% ellipse
mu <- colMeans(Y) # centre of the ellipse
Sigma <- cov(Y) # covariance matrix of the ellipse

# percentile of the ellipse
p <- 0.95 

# draw the ellipse
tmp <- addEllipse(mu, Sigma, p.interval = p, col = "red", lty = 2)

# Determine which of the samples are inside the ellipse
Z_samp <- pointsToEllipsoid(Y, Sigma, mu) # convert to circle space
inside_samp <- ellipseInOut(Z_samp, p = p) # test if inside

# inside points are marked TRUE which corresponds to 1 in numeric terms, and 
# outside marked FALSE which corresponds to 0. 
# So below I calculate (1 + !inside_test) which makes 1 correspond to inside 
# and coloured black, and 2 correspond to outside and coloured red.
# and plot them with colour coding for whether they are inside or outside
points(Y[,2] ~ Y[,1], col = 1 + !inside_samp)


## ----manually-test-points, eval = TRUE----------------------------------------
# Define a matrix of 5 data points to test against our ellipse.
# For ease of interpretation of this code, I have built this matrix by 
# specifying each row on a separate line and do this by adding the option 
# byrow = FALSE (by default R fills down the rows first of a matrix).
test_these <- matrix(c(-2,  2,
                        0,  0,
                       -5,  2,
                        1, -2,
                        4,  0),
                     byrow = TRUE,
                     ncol = 2, nrow = 5)

# transform these points onto ellipsoid coordinates
Z_test <- pointsToEllipsoid(test_these, Sigma, mu)

# determine whther or not these points are inside or outside the ellipse drawn 
# with same p as above (percentile).
inside_test <- ellipseInOut(Z_test, p = p)

# inside points are marked TRUE which corresponds to 1 in numeric terms, and 
# outside marked FALSE which corresponds to 0. 
# So below I calculate (1 + !inside_test) which makes 1 correspond to inside 
# and coloured black, and 2 correspond to outside and coloured red.
# and plot them with colour coding for whether they are inside or outside
plot(test_these[,2] ~ test_these[,1],
       col = 1 + !inside_test,
       pch = "*",
       cex = 2, 
     xlim = c(-6, 6), 
     ylim = c(-6, 6))

# and add the ellipse same as the one above
tmp <- addEllipse(mu, Sigma, p.interval = p, col = "red", lty = 2)

## -----------------------------------------------------------------------------

# a wrapper function to simulate a population, fit the ellipses and 
# determine the proportion of samples inside.

testEllipse <- function(n, p) {
  
  # generate the samples
  Y <- generateSiberGroup(n.obs = n)
  
  # sample moments
  mu <- colMeans(Y) # centre of the ellipse
  Sigma <- cov(Y) # covariance matrix of the ellipse
  
  # Determine which of the samples are inside the ellipse
  Z_samp <- pointsToEllipsoid(Y, Sigma, mu) # convert to circle space
  
  # test if inside
  inside_samp <- ellipseInOut(Z_samp, p = p)
  
  # return propotion inside
  return(sum(inside_samp) / length(inside_samp))

  
}


## -----------------------------------------------------------------------------

# define the prediciton interval to use
prediction_interval <- 0.95

# specify a range of n
do_these_n <- c(5, 10, 15, 20, 50, 100, 320, 1000)

# how many replicates per n to simulate
reps_per_n <- 200

# replicate a range of n and sort on n
simExperiment <- tibble(n = rep(do_these_n, reps_per_n ),
                        p = prediction_interval) %>% arrange(n)


## -----------------------------------------------------------------------------

simExperiment %<>% mutate(p_inside = pmap(list(n = n, p = p), testEllipse) %>% unlist)

# and summarise the results
summaries_simExperiment <- simExperiment %>% group_by(n) %>% 
  summarise(mu_p_inside = mean(p_inside),
            min_p_inside = min(p_inside),
            max_p_inside = max(p_inside))


## -----------------------------------------------------------------------------
# and plot p_inside against n
g3 <- ggplot(data = simExperiment, mapping = aes(x = n, y = p_inside)) + 
  geom_jitter() + 
  scale_x_log10() + 
  geom_point(data = summaries_simExperiment, mapping = aes(y = mu_p_inside),
             color = "red") + 
  geom_path(data = summaries_simExperiment, mapping = aes(y = mu_p_inside),
             color = "red")

print(g3)


## ----highdim, eval = TRUE-----------------------------------------------------
# set the random seed generator so we get consistent results each time 
# we run this code.
set.seed(2)

# n random numbers
n_d <- 10^3

# number of dimensions
d <- 3

# vector of d means between -1 and +1
mu_d <- stats::runif(d, -1, +1)
  
# a (d x d) covariance matrix
# pull a precision matrix from the wishart distribution and invert it to 
# get the corresponding covariance matrix.
sigma_d <- solve(matrix(stats::rWishart(1, d, diag(d)), d, d))

# n-dimensional multivariate random numbers for this test
Y_d <- mnormt::rmnorm(n_d, mu_d, sigma_d)  

# sample mean and covariance matrix
mu_samp_d <- colMeans(Y_d) # centre of the ellipse
Sigma_samp_d <- cov(Y_d) # covariance matrix of the ellipse

# percentile of the ellipsoid to test
p <- 0.95 

# here i am just going to test whether my actual data points are inside
# or outside the 95% ellipsoid but you could replace with your own 
# data points as above

# transform these points onto ellipsoid coordinates
Z_d <- pointsToEllipsoid(Y_d, Sigma_samp_d, mu_d)

# determine whther or not these points are inside or outside the ellipse drawn 
# with same p as above (percentile).
inside_d <- ellipseInOut(Z_d, p = p)

# how many of our points are inside our ellipse?
p_d_inside <- sum(inside_d) / length(inside_d)
AndrewLJackson/SIBER documentation built on Oct. 21, 2023, 8:09 a.m.