Nothing
#' Illustration of the central limit theorem for sampling from the
#' uniform distribution
#'
#' @param nsize Sample size, n. Its default value is 10.
#' @param nrep Number of replications. How many samples of size \code{nsize}
#' should be taken, default value is 10000.
#' @return A vector of means of the replicated samples. The function also
#' has the side effect of drawing a histogram of the sample means and
#' two superimposed density functions: one estimated from the data using
#' the \code{density} function and the other is the density of the CLT
#' approximated normal distribution. The better the CLT approximation, the
#' closer are the two superimposed densities.
#' @examples
#' a <- see_the_clt_for_uniform()
#' old.par <- par(no.readonly = TRUE)
#' par(mfrow=c(2, 3))
#' a1 <- see_the_clt_for_uniform(nsize=1)
#' a2 <- see_the_clt_for_uniform(nsize=2)
#' a3 <- see_the_clt_for_uniform(nsize=5)
#' a4 <- see_the_clt_for_uniform(nsize=10)
#' a5 <- see_the_clt_for_uniform(nsize=20)
#' a6 <- see_the_clt_for_uniform(nsize=50)
#' par(old.par)
#' ybars <- see_the_clt_for_uniform(nsize=12)
#' zbars <- (ybars - mean(ybars))/sd(ybars)
#' k <- 100
#' u <- seq(from=min(zbars), to= max(zbars), length=k)
#' ecdf <- rep(NA, k)
#' for(i in 1:k) ecdf[i] <- length(zbars[zbars<u[i]])/length(zbars)
#' tcdf <- pnorm(u)
#' plot(u, tcdf, type="l", col="red", lwd=4, xlab="", ylab="cdf")
#' lines(u, ecdf, lty=2, col="darkgreen", lwd=4)
#' symb <- c("cdf of sample means", "cdf of N(0, 1)")
#' legend(x=-3.5, y=0.4, legend = symb, lty = c(2, 1),
#' col = c("darkgreen","red"), bty="n")
#'
#' @export
see_the_clt_for_uniform <- function(nsize = 10, nrep=10000) {
## set number of replicate samples
## First generate sample*nrep many U(0,1)
x <- runif(nsize * nrep)
## Put the random values in a matrix form with nsample rows and nrep columns.
## Each column is then a replicate
y <- matrix(x, ncol=nrep)
## Now find mean for each column
zbar <- apply(y, 2, mean) ## 2 for columns
## Would like to overlay a density
u <- seq(from=min(zbar), to= max(zbar), length=100)
v <- dnorm(u, mean=0.5, sd=sqrt(1/(12*nsize)))
v1 <- density(zbar)
hist(zbar, nclass=20, prob=TRUE, xlab="sample mean", col="grey", xlim=range(c(min(zbar)-0.05, max(zbar)+0.05)),
ylim=range(v, v1$y), main=paste("Histogram of", nrep, "sample means from samples of size", nsize))
lines(density(zbar), lty=2, col="darkgreen", lwd=2) # add a density estimate with defaults
## Now the normal distribution density
lines(u, v, lty=1, col="red", lwd=2)
symb <- c("data density", "CLT density")
legend(x=mean(zbar), y=max(v)-0.5, legend = symb, lty = c(2, 1), col = c("darkgreen","red"), bty="n")
zbar
}
#' Illustration of the CLT for samples from the Bernoulli distribution
#'
#' @inheritParams see_the_clt_for_uniform
#' @param prob True probability of success for the Bernoulli trials
#' @return A vector of means of the replicated samples.
#' It also has the side effect of drawing a histogram of
#' the standardized sample means and a superimposed density function
#' of the standard normal distribution. The better the CLT approximation,
#' the closer are the superimposed density and the histogram.
#' @examples
#' a <- see_the_clt_for_Bernoulli()
#' old.par <- par(no.readonly = TRUE)
#' par(mfrow=c(2, 3))
#' a30 <- see_the_clt_for_Bernoulli(nsize=30)
#' a50 <- see_the_clt_for_Bernoulli(nsize=50)
#' a100 <- see_the_clt_for_Bernoulli(nsize=100)
#' a500 <- see_the_clt_for_Bernoulli(nsize=500)
#' a1000 <- see_the_clt_for_Bernoulli(nsize=1000)
#' a5000 <- see_the_clt_for_Bernoulli(nsize=5000)
#' par(old.par)
#' @export
see_the_clt_for_Bernoulli <- function(nsize = 10, nrep=10000, prob=0.8) {
## set number of replicate samples
#
## First generate sample*nrep many U(0,1)
## library(extraDistr)
x <- extraDistr::rbern(nsize * nrep, prob=prob)
## Put the random values in a matrix form with nsample rows and nrep columns.
## Each column is then a replicate
y <- matrix(x, ncol=nrep)
## Now find mean for each column
ybar <- apply(y, 2, mean) ## 2 for columns
zbar <- (ybar - mean(ybar))/sd(ybar)
## Would like to overlay a density
u <- seq(from=min(zbar), to= max(zbar), length=100)
# v <- dnorm(u, mean=prob, sd=sqrt((prob * (1-prob)/nsize)))
v <- dnorm(u)
v1 <- density(zbar)
hist(zbar, prob=TRUE, xlab="sample mean", col="grey", xlim=range(c(min(zbar)-0.05, max(zbar)+0.05)),
ylim=range(v, v1$y), main=paste("Histogram of", nrep, "sample means from samples of size", nsize))
# lines(density(zbar), lty=2, col="darkgreen", lwd=2) # add a density estimate with defaults
## Now the normal distribution density
lines(u, v, lty=1, col="red", lwd=2)
# symb <- c("data density", "CLT density")
#legend(x=mean(zbar), y=max(v)-0.5, legend = symb, lty = c(2, 1), col = c("darkgreen","red"), bty="n")
ybar
}
###
#' Illustration of the weak law of large numbers for sampling from the
#' uniform distribution
#' @param nsize Sample size, n. Its default value is 10.
#' @param nrep Number of replications. How many samples of size \code{nsize}
#' should be taken, default value is 10000.
#' @return A list giving the x values and the density estimates y, from the
#' generated random samples. The function also draws the empirical density on
#' the current graphics device.
#' @examples
#' a1 <- see_the_wlln_for_uniform(nsize=1, nrep=50000)
#' a2 <- see_the_wlln_for_uniform(nsize=10, nrep=50000)
#' a3 <- see_the_wlln_for_uniform(nsize=50, nrep=50000)
#' a4 <- see_the_wlln_for_uniform(nsize=100, nrep=50000)
#' plot(a4, type="l", lwd=2, ylim=range(c(a1$y, a2$y, a3$y, a4$y)), col=1,
#' lty=1, xlab="mean", ylab="density estimates")
#' lines(a3, type="l", lwd=2, col=2, lty=2)
#' lines(a2, type="l", lwd=2, col=3, lty=3)
#' lines(a1, type="l", lwd=2, col=4, lty=4)
#' symb <- c("n=1", "n=10", "n=50", "n=100")
#' legend(x=0.37, y=11.5, legend = symb, lty =4:1, col = 4:1)
#' @export
see_the_wlln_for_uniform <- function(nsize = 10, nrep=1000) {
## set number of replicate samples
## First generate sample*nrep many U(0,1)
x <- runif(nsize * nrep)
## Put the random values in a matrix form with nsample rows and nrep columns.
## Each column is then a replicate
y <- matrix(x, ncol=nrep)
## Now find mean for each column
zbar <- apply(y, 2, mean) ## 2 for columns
## Would like to overlay a density
u <- seq(from=min(zbar), to= max(zbar), length=100)
v <- dnorm(u, mean=0.5, sd=sqrt(1/(12*nsize)))
v1 <- density(zbar)
hist(zbar, nclass=20, prob=TRUE, xlab="sample mean", col="grey",
xlim=range(c(min(zbar)-0.05, max(zbar)+0.05)),
ylim=range(v, v1$y), main=paste("Histogram of", nrep,
"sample means from samples of size", nsize))
lines(density(zbar), lty=2, col="darkgreen", lwd=2) #
# add a density estimate with defaults
cbind.data.frame(x=v1$x, y=v1$y)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.