# ================================== corr_sim =================================
#' Sampling distribution of the correlation coefficient movie
#'
#' A movie to illustrate how the sampling distribution of the product moment
#' sample correlation coefficient \eqn{r} depends on the sample size \eqn{n}
#' and on the true correlation \eqn{\rho}.
#'
#' @param n An integer scalar. The initial value of the sample size.
#' Must not be less than 2.
#' @param rho A numeric scalar. The initial value of the true correlation
#' \eqn{\rho}. Must be in [-1, 1].
#' @param panel_plot A logical parameter that determines whether the plot
#' is placed inside the panel (\code{TRUE}) or in the standard graphics
#' window (\code{FALSE}). If the plot is to be placed inside the panel
#' then the tkrplot library is required.
#' @param hscale,vscale Numeric scalars. Scaling parameters for the size
#' of the plot when \code{panel_plot = TRUE}. The default values are 1.4 on
#' Unix platforms and 2 on Windows platforms.
#' @param delta_n An integer scalar. The amount by which the value of the
#' sample size is increased/decreased after one click of the +/- button.
#' @param delta_rho A numeric scalar. The amount by which the value of
#' rho is increased/decreased after one click of the +/- button.
#' @param ... Additional arguments to the rpanel functions
#' \code{\link[rpanel]{rp.button}} and
#' \code{\link[rpanel]{rp.doublebutton}}, not including \code{panel},
#' \code{variable}, \code{title}, \code{step}, \code{action}, \code{initval},
#' \code{range}.
#' @details Random samples of size \eqn{n} are simulated from a
#' \href{https://en.wikipedia.org/wiki/Multivariate_normal_distribution}{bivariate normal distribution}
#' in which each of the variables has a mean of 0 and a variance of 1 and
#' the correlation \eqn{\rho} between the variables is chosen by the user.
#'
#' The movie contains two plots. On the top is a scatter plot of the
#' simulated sample, illustrating the strength of the association between
#' the simulated values of the variables.
#' A new sample is produced by clicking "simulate another sample.
#' For each simulated sample the sample (product moment) correlation
#' coefficient \eqn{r} is calculated and displayed in the title of the top
#' plot.
#'
#' The values of the sample correlation coefficients are stored and are
#' plotted in a histogram in the bottom plot. A rug displays the individual
#' values, with the most recent value coloured red. As we accumulate a large
#' number of values in this histogram the shape of the sampling
#' distribution of \eqn{r} emerges. The exact p.d.f. of \eqn{r} is
#' superimposed on this histogram, as is the value of \eqn{\rho}.
#'
#' The bottom plot can be changed in two ways:
#' (i) a radio button can be pressed to replace the histogram and pdf with
#' a plot of the empirical c.d.f. and exact cdf;
#' (ii) the variable can be changed from \eqn{\rho} to Fisher's
#' z-transformation \eqn{F(\rho) = arctanh(\rho) = [ln(1+\rho) - ln(1-\rho)]/2}.
#' For sufficiently large values of \eqn{n}, \eqn{F(\rho)} has approximately
#' a normal distribution with mean \eqn{\rho} and variance
#' \eqn{1 / (n - 3)}.
#'
#' The values of the sample size \eqn{n} or true correlation coefficient
#' \eqn{\rho} can be changed using the respective +/- buttons.
#' If one of these is changed then the bottom plot is
#' reset using the sample correlation coefficient of the first sample
#' simulated using the new combination of \eqn{n} and \eqn{\rho}.
#' @return Nothing is returned, only the animation is produced.
#' @seealso \code{\link{movies}}: a user-friendly menu panel.
#' @seealso \code{\link{smovie}}: general information about smovie.
#' @examples
#' correlation(rho = 0.8)
#' correlation(n = 10)
#' @export
correlation <- function(n = 30, rho = 0, panel_plot = TRUE, hscale = NA,
vscale = hscale, delta_n = 1, delta_rho = 0.1, ...) {
# Check that SuppDists is installed
if (!requireNamespace("SuppDists", quietly = TRUE)) {
stop("the SuppDists package is needed. Please install it.",
call. = FALSE)
}
temp <- set_scales(hscale, vscale)
hscale <- temp$hscale
vscale <- temp$vscale
if (n < 2) {
stop("n must not be less than 2")
}
n <- round(n, 0)
if (rho < -1 || rho > 1) {
stop("rho must be in [-1, 1]")
}
delta_n <- round(delta_n)
nseed <- nseed_init <- 47
rho_init <- rho
nsim <- nsim_init <- n
rvals <- NULL
vals <- NULL
fisher_z <- FALSE
pdf_or_cdf <- "pdf"
#
now_time <- strsplit(substr(date(), 12, 19), ":")[[1]]
now_time <- paste(now_time[1], now_time[2], now_time[3], sep = "")
# Set a unique panel name to enable saving of objects to the correct panel
my_panelname <- paste("correlation_", now_time, sep = "")
nseed_old <- nseed_init
rho_old <- rho_init
nsim_old <- nsim_init
fisher_z_old <- fisher_z
old_pdf_or_cdf <- pdf_or_cdf
#
corr_sim_panel <- rpanel::rp.control("correlation", panelname = my_panelname,
nsim = nsim_init, rho = rho_init,
nseed = nseed_init, fisher_z = FALSE,
pdf_or_cdf = "pdf",
nseed_old = nseed_old,
rho_old = rho_old, nsim_old = nsim_old,
fisher_z_old = fisher_z,
old_pdf_or_cdf = pdf_or_cdf,
vals = vals, rvals = rvals)
#
redraw_plot <- NULL
panel_redraw <- function(panel) {
rpanel::rp.tkrreplot(panel = panel, name = redraw_plot)
# rp.tkrreplot() doesn't update the panel automatically, so do it manually
# Get ...
panel$vals <- rpanel::rp.var.get(my_panelname, "vals")
panel$rvals <- rpanel::rp.var.get(my_panelname, "rvals")
panel$nseed_old <- rpanel::rp.var.get(my_panelname, "nseed_old")
panel$rho_old <- rpanel::rp.var.get(my_panelname, "rho_old")
panel$nsim_old <- rpanel::rp.var.get(my_panelname, "nsim_old")
panel$fisher_z_old <- rpanel::rp.var.get(my_panelname, "fisher_z_old")
panel$old_pdf_or_cdf <- rpanel::rp.var.get(my_panelname, "old_pdf_or_cdf")
# Put ...
rpanel::rp.control.put(my_panelname, panel)
return(panel)
}
if (panel_plot & !requireNamespace("tkrplot", quietly = TRUE)) {
warning("tkrplot is not available so panel_plot has been set to FALSE.")
panel_plot <- FALSE
}
if (panel_plot) {
# Set a seed and then reset it before the panel is redrawn so that only
# one arrow and sample maximum appears in the first plot
rpanel::rp.tkrplot(panel = corr_sim_panel, name = redraw_plot,
plotfun = corr_sim_movie_plot, pos = "right",
hscale = hscale, vscale = vscale, background = "white")
action <- panel_redraw
} else {
action <- corr_sim_movie_plot
}
#
# Create buttons for movie
dlist <- list(...)
# If the user hasn't set either repeatdelay or repeatinterval then set them
# to the default values in rp.doublebutton (100 milliseconds)
if (is.null(dlist$repeatdelay) & is.null(dlist$repeatinterval)) {
rpanel::rp.button(panel = corr_sim_panel, action = action,
title = "simulate another sample",
repeatdelay = 100, repeatinterval = 100, ...)
} else {
rpanel::rp.button(panel = corr_sim_panel, action = action,
title = "simulate another sample",
...)
}
rpanel::rp.doublebutton(corr_sim_panel, nsim, delta_n, range = c(2, 1000),
repeatinterval = 20, initval = n,
title = "sample size, n",
action = action, showvalue = TRUE, ...)
rpanel::rp.doublebutton(corr_sim_panel, rho, delta_rho, range = c(-1, 1),
repeatinterval = 20, initval = rho_init,
title = "correlation, rho", action = action,
showvalue = TRUE, ...)
rpanel::rp.checkbox(panel = corr_sim_panel, fisher_z,
labels = "Fisher z-transformation",
action = action)
rpanel::rp.radiogroup(panel= corr_sim_panel, pdf_or_cdf, c("pdf", "cdf"),
title = "pdf or cdf in bottom plot",
action = action)
if (!panel_plot) {
rpanel::rp.do(panel = corr_sim_panel, action = action)
}
return(invisible())
}
# Function to be called by corr_sim_movie().
corr_sim_movie_plot <- function(panel){
oldpar <- graphics::par(mfrow = c(1, 1), bty = "l", las = 1,
oma = c(0, 0, 0, 0))
on.exit(graphics::par(oldpar))
# To please R CMD check
rho <- nsim <- fisher_z <- pdf_or_cdf <- nseed <- NULL
panel <- within(panel, {
if (rho != rho_old | nsim != nsim_old) {
rvals <- NULL
}
# Colour for the histograms and empirical cdfs
my_col <- 8
cond1 <- (fisher_z_old & fisher_z) | (!fisher_z_old & !fisher_z)
cond2 <- (rho == rho_old & nsim == nsim_old) | nsim != nsim_old
cond3 <- (old_pdf_or_cdf == pdf_or_cdf)
if (cond1 & cond2 & cond3){
vals <- matrix(stats::rnorm(2 * nsim), ncol = 2, nrow = nsim,
byrow = TRUE)
}
x1 <- vals[, 1]
x2 <- vals[, 2]
y1 <- rho * x1 + sqrt(1 - rho ^ 2) * x2
sim_vals <- cbind(x1, y1)
nf <- graphics::layout(mat = matrix(c(0, 2, 2, 0,
1, 1, 1, 1), nrow = 2, byrow = TRUE))
if ((fisher_z_old & fisher_z) | (!fisher_z_old & !fisher_z)) {
new_rval <- stats::cor(sim_vals)[1, 2]
rvals <- c(rvals, new_rval)
} else {
new_rval <- rvals[length(rvals)]
}
graphics::par(mar = c(4, 4.2, 1, 1))
atanh_rho <- atanh(rho)
# Calculate the true density (under sampling from a BV normal)
if (!fisher_z | (fisher_z & nsim < 3)) {
if (nsim <= 3) {
ep <- 1e-2
# Use the normal approximation to avoid numerical issues
z_range <- stats::qnorm(p = c(0.01, 0.99), mean = atanh_rho,
sd = 1 / sqrt(4 - 3))
r_range <- tanh(z_range)
} else {
# Use the normal approximation to avoid numerical issues
z_range <- stats::qnorm(p = c(0.01, 0.99), mean = atanh_rho,
sd = 1 / sqrt(nsim - 3))
r_range <- tanh(z_range)
}
r_range <- range(r_range, rvals)
r_vec <- seq(from = r_range[1], to = r_range[2], len = 101)
if (abs(rho) < 1 & nsim > 2) {
if (pdf_or_cdf == "pdf") {
true_pdf_vec <- SuppDists::dPearson(x = r_vec, N = nsim, rho = rho)
my_ylim <- c(0, max(true_pdf_vec) * 1.25)
} else {
true_cdf_vec <- SuppDists::pPearson(q = r_vec, N = nsim, rho = rho)
my_ylim <- 0:1
}
} else {
if (pdf_or_cdf == "pdf") {
my_ylim <- NULL
} else {
my_ylim <- 0:1
}
}
if (pdf_or_cdf == "pdf") {
if (abs(rho) < 1) {
graphics::hist(rvals, freq = FALSE, col = my_col, xlim = r_range,
main = "", axes = FALSE, ylim = my_ylim, xlab = "r",
ylab = "pdf", cex.lab = 1.5)
} else {
graphics::plot(1, type = "h", xlim = c(-1, 1), ylim = c(0, 1),
lwd = 2, xlab = "r", ylab = "pdf", cex.lab = 1.5,
axes = FALSE)
graphics::segments(rho, 0, rho, 1, lty = 1, lwd = 2, col = "blue")
leg_pos <- ifelse(rho > 0, "topleft", "topright")
graphics::legend(leg_pos, legend = expression(rho), col = "blue", lty = 1,
lwd = 2, box.lty = 0, cex = 1.5)
}
} else {
ecdf_rvals <- stats::ecdf(rvals)
leg_pos <- "topleft"
if (abs(rho) < 1) {
graphics::plot(ecdf_rvals, col = my_col, las = 1, main = "",
axes = FALSE, xlab = "r", ylab = "cdf", xpd = TRUE,
xlim = r_range, ylim = my_ylim, col.01line = 0,
cex.lab = 1.5, cex = 1.5)
graphics::legend(leg_pos,
legend = c("exact cdf", "empirical cdf"),
col = c("black", 8), lty = c(1, -1), lwd = 2,
pch = c(-1, 16), box.lty = 0, cex = 1.5)
} else {
graphics::plot(c(-1, 1, 1, 1.2), c(0, 0, 1, 1), xlim = c(-1, 1),
ylim = c(0, 1), type = "l", lwd = 2, axes = FALSE,
xlab = "r", ylab = "cdf", cex.lab = 1.5, cex = 1.5)
graphics::legend(leg_pos, legend = "exact cdf", col = "black",
lty = 1, lwd = 2, box.lty = 0, cex = 1.5)
}
}
if (!fisher_z | (fisher_z & abs(rho) < 1)) {
graphics::axis(2)
graphics::rug(rvals, line = 0.5, ticksize = 0.05)
graphics::rug(new_rval, line = 0.5, ticksize = 0.05, col = "red",
lwd = 2)
}
# Add the true density function, but only if |rho| is not equal to 1
if (abs(rho) < 1 & nsim > 2) {
if (pdf_or_cdf == "pdf") {
graphics::lines(r_vec, true_pdf_vec, lwd = 2, col = "black")
leg_pos <- ifelse(rho > 0, "topleft", "topright")
graphics::legend(leg_pos, legend = c("exact pdf", expression(rho)),
col = c("black", "blue"), lty = c(1, 1), lwd = 2,
box.lty = 0, cex = 1.5)
rtop <- SuppDists::dPearson(x = rho, N = nsim, rho = rho)
graphics::segments(rho, 0, rho, rtop, lty = 1, lwd = 2, col = "blue")
} else {
graphics::lines(r_vec, true_cdf_vec, lwd = 2, col = "black")
leg_pos <- "topleft"
graphics::legend(leg_pos,
legend = c("exact cdf", "empirical cdf"),
col = c("black", 8), lty = c(1, -1), lwd = 2,
pch = c(-1, 16), box.lty = 0, cex = 1.5)
}
}
if (!fisher_z | (fisher_z & abs(rho) < 1)) {
graphics::axis(1, line = 0.5)
graphics::axis(1, line = 0.5, at = c(-1, 1), labels = c("-1.0", "1.0"))
}
} else if (nsim > 2) {
zvals <- atanh(rvals)
z_range <- range(zvals)
if (abs(rho) < 1 & nsim > 3) {
# Use the normal approximation to avoid numerical issues
z_range_2 <- stats::qnorm(p = c(0.01, 0.99), mean = atanh_rho,
sd = 1 / sqrt(nsim - 3))
z_range <- range(z_range, z_range_2)
z_vec <- seq(from = z_range[1], to = z_range[2], len = 101)
if (pdf_or_cdf == "pdf") {
true_pdf_vec <- dFcorr(x = z_vec, N = nsim, rho = rho)
approx_pdf_vec <- stats::dnorm(x = z_vec, mean = atanh_rho,
sd = 1 / sqrt(nsim - 3))
my_ylim <- c(0, max(true_pdf_vec) * 1.25)
} else {
true_cdf_vec <- pFcorr(q = z_vec, N = nsim, rho = rho)
approx_cdf_vec <- stats::pnorm(q = z_vec, mean = atanh_rho,
sd = 1 / sqrt(nsim - 3))
my_ylim <- c(0, 1)
}
} else {
if (pdf_or_cdf == "pdf") {
my_ylim <- NULL
} else {
my_ylim <- 0:1
}
}
new_zval <- atanh(new_rval)
if (pdf_or_cdf == "pdf") {
if (abs(rho) < 1) {
graphics::hist(zvals, freq = FALSE, col = 8, main = "", axes = FALSE,
ylim = my_ylim, xlab = "arctanh(r)", cex.lab = 1.5,
xlim = z_range, ylab = "pdf")
} else {
graphics::plot(1, type = "n", xlim = c(-1, 1), ylim = c(0, 1),
lwd = 2, xlab = "r", ylab = "pdf",
axes = FALSE, ann = FALSE)
graphics::axis(2, col = 0, col.ticks = 0, col.axis = "white")
graphics::axis(1, line = 0.5, col = 0, col.ticks = 0,
col.axis = "white")
cex_val <- 2
if (rho == 1) {
graphics::text(0, 0.5,
"Fisher z value is always +Inf when rho = +1",
cex = cex_val)
} else {
graphics::text(0, 0.5,
"Fisher z value is always -Inf when rho = -1",
cex = cex_val)
}
}
} else {
ecdf_zvals <- stats::ecdf(zvals)
if (abs(rho) < 1) {
graphics::plot(ecdf_zvals, col = my_col, las = 1, main = "",
axes = FALSE, xlab = "arctanh(r)", ylab = "cdf",
xpd = TRUE, xlim = z_range, ylim = my_ylim,
col.01line = 0, cex.lab = 1.5, cex = 1.5)
} else {
graphics::plot(1, type = "n", xlim = c(-1, 1), ylim = c(0, 1),
lwd = 2, xlab = "r", ylab = "pdf",
axes = FALSE, ann = FALSE)
graphics::axis(2, col = 0, col.ticks = 0, col.axis = "white")
graphics::axis(1, line = 0.5, col = 0, col.ticks = 0,
col.axis = "white")
cex_val <- 2
if (rho == 1) {
graphics::text(0, 0.5,
"Fisher z value is always +Inf when rho = +1",
cex = cex_val)
} else {
graphics::text(0, 0.5,
"Fisher z value is always -Inf when rho = -1",
cex = cex_val)
}
}
}
if (!fisher_z | (fisher_z & abs(rho) < 1)) {
graphics::axis(2)
graphics::rug(zvals, line = 0.5, ticksize = 0.05)
graphics::rug(new_zval, line = 0.5, ticksize = 0.05, col = "red", lwd = 2)
}
# Add the true density function, but only if |rho| is not equal to 1
if (abs(rho) < 1 & nsim > 3) {
if (pdf_or_cdf == "pdf") {
graphics::lines(z_vec, true_pdf_vec, lwd = 2, col = "black")
graphics::lines(z_vec, approx_pdf_vec, lwd = 2, col = "red", lty = 2)
leg_pos <- ifelse(rho > 0, "topleft", "topright")
graphics::legend(leg_pos, legend = c("exact", "approximate",
expression(arctanh(rho))),
col = c("black", "red", "blue"), lty = c(1, 2, 1),
lwd = 2, box.lty = 0, cex = 1.5)
graphics::segments(atanh_rho, 0, atanh_rho, dFcorr(atanh_rho, nsim,
rho),
lty = 1, lwd = 2, col = "blue")
} else {
graphics::lines(z_vec, true_cdf_vec, lwd = 2, col = "black")
graphics::lines(z_vec, approx_cdf_vec, lwd = 2, col = "red", lty = 2)
leg_pos <- "topleft"
graphics::legend(leg_pos,
legend = c("exact", "approximate", "empirical cdf"),
col = c("black", "red", 8), lty = c(1, 2, -1),
lwd = 2, pch = c(-1, -1, 16), box.lty = 0, cex = 1.5)
}
} else {
graphics::abline(v = atanh_rho, lty = 2, lwd = 2, col = "blue")
}
if (!fisher_z | (fisher_z & abs(rho) < 1)) {
graphics::axis(1, line = 0.5)
graphics::axis(1, line = 0.5, at = c(-1e10, 1e10))
}
}
nseed_old <- nseed
rho_old <- rho
nsim_old <- nsim
fisher_z_old <- fisher_z
old_pdf_or_cdf <- pdf_or_cdf
graphics::par(mar = c(3, 3, 2, 1))
graphics::plot(sim_vals, pch = 16, xlab = "x", ylab = "y",
xlim = c(-3.5, 3.5), ylim = c(-3.5, 3.5))
rhoval <- round(rho, 2)
rprint <- sprintf('%.2f', abs(stats::cor(sim_vals)[1, 2]))
rval <- stats::cor(sim_vals)[1, 2]
if (rval > 0 ) {
ttxt <- substitute(paste(rho == rhoval, " , ", r == +rprint, " , ",
n == nsim),
list(rhoval = rhoval, rprint = rprint, nsim = nsim))
} else {
ttxt <- substitute(paste(rho == rhoval, " , ", r == -rprint, " , ",
n == nsim),
list(rhoval = rhoval, rprint = rprint, nsim = nsim))
}
graphics::title(main = ttxt, cex.main = 1.5)
})
return(invisible(panel))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.