Nothing
## ----echo = FALSE-------------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>",
fig.width = 6, fig.height = 5)
## ----gendata, echo = TRUE-----------------------------------------------------
library(dplyr)
library(magrittr)
library(purrr)
library(SIBER)
library(ggplot2)
# set the random seed generator so we get consistent results each time
# we run this code.
set.seed(3)
# n random numbers
n <- 100
# some random multivariate data
Y <- generateSiberGroup(n.obs = n)
## ----warpdata, echo = TRUE----------------------------------------------------
# plot this example data with column 2 by column 1
# plot(Y[,2] ~ Y[,1], type = "n", asp = 1,
# xlim = c(-10, 10),
# ylim = c(-10, 10))
plot(Y[,2] ~ Y[,1], type = "n", asp = 1)
# add an ellipse, in this case a 95% ellipse
mu <- colMeans(Y) # centre of the ellipse
Sigma <- cov(Y) # covariance matrix of the ellipse
# percentile of the ellipse
p <- 0.95
# draw the ellipse
tmp <- addEllipse(mu, Sigma, p.interval = p, col = "red", lty = 2)
# Determine which of the samples are inside the ellipse
Z_samp <- pointsToEllipsoid(Y, Sigma, mu) # convert to circle space
inside_samp <- ellipseInOut(Z_samp, p = p) # test if inside
# inside points are marked TRUE which corresponds to 1 in numeric terms, and
# outside marked FALSE which corresponds to 0.
# So below I calculate (1 + !inside_test) which makes 1 correspond to inside
# and coloured black, and 2 correspond to outside and coloured red.
# and plot them with colour coding for whether they are inside or outside
points(Y[,2] ~ Y[,1], col = 1 + !inside_samp)
## ----manually-test-points, eval = TRUE----------------------------------------
# Define a matrix of 5 data points to test against our ellipse.
# For ease of interpretation of this code, I have built this matrix by
# specifying each row on a separate line and do this by adding the option
# byrow = FALSE (by default R fills down the rows first of a matrix).
test_these <- matrix(c(-2, 2,
0, 0,
-5, 2,
1, -2,
4, 0),
byrow = TRUE,
ncol = 2, nrow = 5)
# transform these points onto ellipsoid coordinates
Z_test <- pointsToEllipsoid(test_these, Sigma, mu)
# determine whther or not these points are inside or outside the ellipse drawn
# with same p as above (percentile).
inside_test <- ellipseInOut(Z_test, p = p)
# inside points are marked TRUE which corresponds to 1 in numeric terms, and
# outside marked FALSE which corresponds to 0.
# So below I calculate (1 + !inside_test) which makes 1 correspond to inside
# and coloured black, and 2 correspond to outside and coloured red.
# and plot them with colour coding for whether they are inside or outside
plot(test_these[,2] ~ test_these[,1],
col = 1 + !inside_test,
pch = "*",
cex = 2,
xlim = c(-6, 6),
ylim = c(-6, 6))
# and add the ellipse same as the one above
tmp <- addEllipse(mu, Sigma, p.interval = p, col = "red", lty = 2)
## -----------------------------------------------------------------------------
# a wrapper function to simulate a population, fit the ellipses and
# determine the proportion of samples inside.
testEllipse <- function(n, p) {
# generate the samples
Y <- generateSiberGroup(n.obs = n)
# sample moments
mu <- colMeans(Y) # centre of the ellipse
Sigma <- cov(Y) # covariance matrix of the ellipse
# Determine which of the samples are inside the ellipse
Z_samp <- pointsToEllipsoid(Y, Sigma, mu) # convert to circle space
# test if inside
inside_samp <- ellipseInOut(Z_samp, p = p)
# return propotion inside
return(sum(inside_samp) / length(inside_samp))
}
## -----------------------------------------------------------------------------
# define the prediciton interval to use
prediction_interval <- 0.95
# specify a range of n
do_these_n <- c(5, 10, 15, 20, 50, 100, 320, 1000)
# how many replicates per n to simulate
reps_per_n <- 200
# replicate a range of n and sort on n
simExperiment <- tibble(n = rep(do_these_n, reps_per_n ),
p = prediction_interval) %>% arrange(n)
## -----------------------------------------------------------------------------
simExperiment %<>% mutate(p_inside = pmap(list(n = n, p = p), testEllipse) %>% unlist)
# and summarise the results
summaries_simExperiment <- simExperiment %>% group_by(n) %>%
summarise(mu_p_inside = mean(p_inside),
min_p_inside = min(p_inside),
max_p_inside = max(p_inside))
## -----------------------------------------------------------------------------
# and plot p_inside against n
g3 <- ggplot(data = simExperiment, mapping = aes(x = n, y = p_inside)) +
geom_jitter() +
scale_x_log10() +
geom_point(data = summaries_simExperiment, mapping = aes(y = mu_p_inside),
color = "red") +
geom_path(data = summaries_simExperiment, mapping = aes(y = mu_p_inside),
color = "red")
print(g3)
## ----highdim, eval = TRUE-----------------------------------------------------
# set the random seed generator so we get consistent results each time
# we run this code.
set.seed(2)
# n random numbers
n_d <- 10^3
# number of dimensions
d <- 3
# vector of d means between -1 and +1
mu_d <- stats::runif(d, -1, +1)
# a (d x d) covariance matrix
# pull a precision matrix from the wishart distribution and invert it to
# get the corresponding covariance matrix.
sigma_d <- solve(matrix(stats::rWishart(1, d, diag(d)), d, d))
# n-dimensional multivariate random numbers for this test
Y_d <- mnormt::rmnorm(n_d, mu_d, sigma_d)
# sample mean and covariance matrix
mu_samp_d <- colMeans(Y_d) # centre of the ellipse
Sigma_samp_d <- cov(Y_d) # covariance matrix of the ellipse
# percentile of the ellipsoid to test
p <- 0.95
# here i am just going to test whether my actual data points are inside
# or outside the 95% ellipsoid but you could replace with your own
# data points as above
# transform these points onto ellipsoid coordinates
Z_d <- pointsToEllipsoid(Y_d, Sigma_samp_d, mu_d)
# determine whther or not these points are inside or outside the ellipse drawn
# with same p as above (percentile).
inside_d <- ellipseInOut(Z_d, p = p)
# how many of our points are inside our ellipse?
p_d_inside <- sum(inside_d) / length(inside_d)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.