library(learnr) knitr::opts_chunk$set(echo = TRUE, eval = TRUE)
We generate the data, after setting the seed. We are going to sample from a multivariate normal distribution. We need to define $\mu$ and $\Sigma$, mean vector and covariance matrix. Moreover, we need to set $n$, the number of samples.
# Loading libraries --------------------- library(mvtnorm) # Setting the seed ---------------------- set.seed(26102016) # Setting mean, variance, samples ------- mu <- c(1,2) sig <- matrix(data = c(1, 1, 1, 4), nrow = 2) n <- 100 # Sampling the data --------------------- X <- rmvnorm(n, mu, sig)
We plot the data
plot(X, asp=1, xlab='Var1', ylab='Var2', pch=19)
And add the sample mean
plot(X, asp=1, xlab='Var1', ylab='Var2', pch=19) points(colMeans(X)[1], colMeans(X)[2], col='red', pch=19,lwd=3)
We then plot the projection on the x and y axes and compute the variance.
plot(X, asp=1, xlab='Var1', ylab='Var2', pch=19) points(colMeans(X)[1], colMeans(X)[2], col='red', pch=19,lwd=3) abline(h=colMeans(X)[2], lty=2, col='grey') points(X[,1], rep(colMeans(X)[2], n), col='red')
var(X[,1])
plot(X, asp=1, xlab='Var1', ylab='Var2', pch=19) points(colMeans(X)[1], colMeans(X)[2], col='red', pch=19,lwd=3) abline(v=colMeans(X)[1], lty=2, col='grey') points(rep(colMeans(X)[1], n), X[,2], col='red')
var(X[,2])
We can evaluate the variability of the dataset along any direction of the plane.
For instance, consider the direction with angle $\theta$ with the x axis: $\theta = \frac{\pi}{6}$
# Set theta ------------------------------ theta <- pi / 6 # Plot points ---------------------------- plot(X, asp=1, xlab='Var1', ylab='Var2', pch=19) abline(a = colMeans(X)[2] - tan(theta)*colMeans(X)[1], b = tan(theta), lty=2) a <- c(cos(theta), sin(theta)) proj30=(a)%*%(t(X)-colMeans(X)) points(colMeans(X)[1]+cos(theta)*proj30, colMeans(X)[2]+sin(theta)*proj30, col='red')
Try to change the value of $\theta$ and see what happens!
plot_direction <- function(X, theta) { tan_theta <- tan(theta) column_means <- colMeans(X) plot(X, asp = 1, xlab = 'Var1', ylab = 'Var2', pch = 19) abline(a = column_means[2] - tan_theta * column_means[1], b = tan_theta, lty = 2) a <- c(cos(theta), sin(theta)) projection <- a %*% (t(X) - column_means) points(column_means[1] + cos(theta) * projection, column_means[2] + sin(theta) * projection, col='red') }
theta <- pi * 1/6 plot_direction(X = X, theta = theta)
We now compute the variance along all the directions.
theta <- seq(from = 0, to = 2*pi, by = 2*pi/3600)
And we compute the variances.
N <- length(theta) a <- matrix(data = c(cos(theta), sin(theta)), nrow = N, ncol = 2, byrow = FALSE) Var <- apply(X = a, MARGIN = 1, FUN = function(x) cov(X %*% x))
We now want to see how the variances change according to the direction.
(max_var <- max(Var)) (max_theta <- theta[which.max(Var)]) (max_a <- c(cos(max_theta), sin(max_theta))) (min_var <- min(Var)) (min_theta <- theta[which.min(Var)]) (min_a <- c(cos(min_theta), sin(min_theta)))
par(mfrow=c(1,2)) plot(X, asp=1, xlab='Var 1', ylab='Var 2',pch=20) abline(a = colMeans(X)[2] - tan(max_theta)*colMeans(X)[1], b = tan(max_theta), lty = 4, col = 'navyblue', lwd = 2) abline(a = colMeans(X)[2] - tan(min_theta)*colMeans(X)[1], b = tan(min_theta), lty = 4, col = 'red', lwd = 2) plot(theta, Var, type = 'l', col='dark grey', lwd = 2,ylab='Variance') points(max_theta, max_var, pch=16, col='navyblue') points(min_theta, min_var, pch=16, col='red')
We now check what we have found against the actual values. We compute the loads (loads
) and the variances (var_pc
).
(loads <- eigen(cov(X))$vectors) (var_pc <- eigen(cov(X))$values)
# Set the graph ----------------------------------------------- par(mfrow=c(1,2)) # Plot the points --------------------------------------------- plot(X, asp=1, xlab='Var 1', ylab='Var 2',pch=20) # Compute the column means of X ------------------------------- M <- colMeans(X) # Plot directions of maximum and minimum variance ------------- abline(a = M[2] - loads[2,1]/loads[1,1]*M[1], b = loads[2,1]/loads[1,1], lty = 2, col = 'dark blue', lwd = 2) abline(a = M[2] - loads[2,2]/loads[1,2]*M[1], b = loads[2,2]/loads[1,2], lty = 2, col = 'dark red', lwd = 2) abline(a = M[2] - tan(max_theta)*M[1], b = tan(max_theta), lty = 4, col = 'navyblue', lwd = 2) abline(a = M[2] - tan(min_theta)*M[1], b = tan(min_theta), lty = 4, col = 'red', lwd = 2) # Plot the possible angles plot(theta, Var, type = 'l', col='dark grey', lwd = 2,ylab='Variance') # Plot the angles of maximum and minimum variances ------------ points(max_theta, max_var, pch=20, col='navyblue') points(min_theta, min_var, pch=20, col='red') points(atan(loads[2,1]/loads[1,1]), max_var, pch=3, col='dark blue') points(atan(loads[2,2]/loads[1,2])+pi, min_var, pch=3, col='dark red')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.