knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = TRUE ) quick_eval <- FALSE
library(MASS) library(mvtnorm) library(latex2exp)
Let $F_n$ be the empirical distribution function of i.i.d. random variables $X_1, X_2, X_3,...$ with distribution function $F$. Define the centered and scaled version of $F_n$ by
[ G_n(x) = \sqrt{n}(F_n(x) - F(x)) (#eq:donskerseq) ]
indexed by $x \in \mathbb{R}$. From the classical central limit theorem for fixed $x$, we know the random variable $G_n(x)$ converges in distribution to a Gaussian random variable with mean zero and variance $F(x)(1 - F(x))$ as the sample size $n$ grows. Donsker's theorem adds to this, and shows that $G_n(x)$ converges in distribution to a Gaussian Process with mean zero and covariance $K(G(s), G(t))$ given by
[ K(G(s), G(t)) = E[G(s)G(t)] = min{F(s), F(t)} - F(s)F(t) (#eq:donskerkernel) ]
Let $x \sim N(0, 1)$.
First, lets look at some potential CDFs from sampling $X_n$ with $n = 1000$.
N <- 50 n <- 1000 storage <- matrix(rnorm(N * n), nrow = N, ncol = n) colors <- sample(colors()[-1], N) plot(NA, NA, xlim = c(-3, 3), ylim = c(-0.05, 1.05), xlab = "x", ylab = "F(x)") for(i in 1:N) { lines(sort(storage[i, ]), (1:n) / n, type = "s", col = colors[i]) }
Next, lets transform these $F_n(x)$, using Donsker's theorem.
storage2 <- matrix(NA, nrow = N, ncol = n) for(i in 1:N) { storage2[i, ] <- sqrt(n) * (sapply(storage[i, ], function(x) sum(storage[i, ] <= x)) / n - pnorm(storage[i, ])) }
And now we can visualize them.
plot(NA, NA, xlim = c(-3, 3), ylim = c(-1.5, 1.5), xlab = "x", ylab = TeX("$\\sqrt{n}(F_n(x) - F(x))$")) for(i in 42:42) { ord <- order(storage[i, ]) lines(storage[i, ord], storage2[i, ord], type = "l", col = colors[i]) }
Extracting densities.
covMat <- function(p){ outer(p, p, FUN = "pmin") - outer(p, p) } denStore <- rep(NA, N) for(i in 1:N) { ord <- order(storage[i, ]) K <- covMat(pnorm(storage[i, ord]))[-n, -n] test <- rmvnorm(1, sigma = K) dmvnorm(test, sigma = K, log = TRUE) denStore[i] <- dmvnorm(sqrt(n) * (sapply(storage[i, ord], function(x) sum(storage[i, ] <= x)) / n - pnorm(storage[i, ord]))[-n], sigma = K, log = TRUE) } denStore
Let $x \sim Unif({1, 2, 3, \dots, 100})$.
First, lets look at some potential CDFs from sampling $X_n$ with $n = 1000$.
N <- 10 n <- 1000 storage <- matrix(sample(1:100, N * n, replace = TRUE), nrow = N, ncol = n) plot(NA, NA, xlim = c(0, 101), ylim = c(-0.05, 1.05), xlab = "x", ylab = "F(x)") for(i in 1:N) { FX <- sapply(1:100, function(x) sum(storage[i, ] <= x)) / n lines(1:100, FX, type = "s", col = colors[i]) }
Next, lets transform these $F_n(x)$, using Donsker's theorem.
storage2 <- matrix(NA, nrow = N, ncol = 100) for(i in 1:N) { storage2[i, ] <- sqrt(n) * (sapply(1:100, function(x) sum(storage[i, ] <= x) / n - x / 100)) }
And now we can visualize them.
plot(NA, NA, xlim = c(0, 101), ylim = c(-1.5, 1.5), xlab = "x", ylab = TeX("$\\sqrt{n}(F_n(x) - F(x))$")) for(i in 1:N) { lines(1:100, storage2[i, ], type = "l", col = colors[i]) }
Extracting densities.
covMat <- function(p){ outer(p, p, FUN = "pmin") - outer(p, p) } denStore <- rep(NA, N) for(i in 1:N) { K <- covMat(1:100 / 100)[-100, -100] # test <- rmvnorm(1, mean = rep(0, 99), sigma = K[-100, -100]) # dmvnorm(test, sigma = K[-100, -100]) denStore[i] <- dmvnorm(storage2[i, -100], sigma = K) } denStore
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.