#' Play God: Simulate a Univariate Regression
#'
#' Making inferences from data can be hard. Most of the time, this is because we don't know the true population parameters. But, if we simulate the data ourself, this is no longer a problem. This function simulates data, then fits and plots a univariate regression. Try changing the different parameters to see what happens. This will help to develop your instincts before you analyse real data.
#' @param a True intercept value. Defaults to 0.
#' @param b True slope value. Defaults to 1.
#' @param e True error variance. Defaults to 1.
#' @author Jack Bailey jack.bailey@manchester.ac.uk
#' @export
#' @examples
#' # Run the three following examples and keep an eye on the R2
#'
#' play_god(e = 1)
#' play_god(e = 3)
#' play_god(e = 5)
play_god <- function(a = .5, b = 1, e = .5){
# Stop function if n < 5
if(e == 0){
# Stop function if error variance is 0
cat(bold("Error: ") %+% "Please set " %+% bold("e") %+% " to be greater than 0.\n\n")
} else if(length(a) > 1){
cat(bold("Error: ") %+% "Please use only one value for " %+% bold("a") %+% ".\n\n")
} else if(length(b) > 1){
cat(bold("Error: ") %+% "Please use only one value for " %+% bold("b") %+% ".\n\n")
} else if(length(e) > 1){
cat(bold("Error: ") %+% "Please use only one value for " %+% bold("e") %+% ".\n\n")
}
# Simulate data
n <- 250
X <- cbind(1, rnorm(n, 0, 1.5))
epsilon <- rnorm(n, 0, e)
beta <- c(a, b)
y <- X %*% beta + epsilon
# Estimate betas
b_hat <- solve(t(X) %*% X) %*% t(X) %*% y
# Calculate residuals
resid <- y - X %*% b_hat
# Estimate sigma^2
s2_hat <- (t(resid) %*% resid)/(nrow(X) - ncol(X))
# Estimate variance-covariance matrix of estimated betas
vcov_b_hat <- c(s2_hat) * solve(t(X) %*% X)
# Compute t-values
t_val <- b_hat/sqrt(diag(vcov_b_hat))
# Compute p-values
p_val <- 2*pt(-abs(t_val),df = n-4)
# Compute r-squared
rsq <- cor(X[,2], y)^2
# Create p-value asterisk function
pvalue <- function(pval){
if(pval >= 0 & pval < .001){
return("***")
} else if(pval >= .001 & pval < .01){
return("**")
} else if(pval >= .01 & pval < .05){
return("*")
} else {
return("")
}
}
# Create plot
gg <- data.frame(x = X[,2], y = y)
plot <-
ggplot(gg, aes(x = x, y = y)) +
geom_point(alpha = .5, col = "#999999") +
stat_smooth(method = "lm", col = "#660099", fill = "#999999", alpha = .3) +
coord_cartesian(xlim = -1:5,
ylim = -1:5) +
xlab("X-axis") +
ylab("Y-axis") +
scale_x_continuous(breaks = -1:5) +
scale_y_continuous(breaks = -1:5) +
geom_hline(yintercept = 0, color = "#999999", size = .5) +
geom_vline(xintercept = 0, color = "#999999", size = .5) +
annotate(
"segment",
x = 0,
xend = 0,
y = 0,
yend = b_hat[1],
size = 1.5,
color = "#FFCC33") +
annotate(
"segment",
x = 1,
xend = 2,
y = b_hat[1] + b_hat[2],
yend = b_hat[1] + b_hat[2],
size = 1.5,
color = "#FFCC33") +
annotate(
"segment",
x = 2,
xend = 2,
y = b_hat[1] + b_hat[2],
yend = b_hat[1] + 2*b_hat[2],
size = 1.5,
color = "#FFCC33") +
annotate(
"text",
label = paste0("a = ", round(b_hat[1], 2), pvalue(p_val[1])),
x = .2,
y = .5*b_hat[1],
hjust = 0) +
annotate(
"text",
label = paste0("b = ", round(b_hat[2], 2), pvalue(p_val[2])),
x = 2.3, y = b_hat[1] + 1.5*b_hat[2],
# horizontal justification = 0 sets x position to left edge of text
hjust = 0) +
annotate("text",
x = 4.5,
y = -.5,
label = paste0("R2 = ", round(rsq, 2))) +
annotate(
"point",
x = 0,
y = 0,
size = 3,
color = "#FFCC33") +
annotate(
"point",
x = 0,
y = b_hat[1],
size = 3,
color = "#FFCC33") +
annotate(
"point",
x = 1,
y = b_hat[1] + b_hat[2],
size = 3,
color = "#FFCC33") +
annotate(
"point",
x = 2,
y = b_hat[1] + 2*b_hat[2],
size = 3,
color = "#FFCC33") +
theme_bw() +
theme(axis.line = element_blank(),
axis.ticks = element_blank(),
panel.border = element_blank())
# Return plot and table
return(plot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.