R/play_god.R

#' Play God: Simulate a Univariate Regression
#'
#' Making inferences from data can be hard. Most of the time, this is because we don't know the true population parameters. But, if we simulate the data ourself, this is no longer a problem. This function simulates data, then fits and plots a univariate regression. Try changing the different parameters to see what happens. This will help to develop your instincts before you analyse real data.
#' @param a True intercept value. Defaults to 0.
#' @param b True slope value. Defaults to 1.
#' @param e True error variance. Defaults to 1.
#' @author Jack Bailey jack.bailey@manchester.ac.uk
#' @export
#' @examples
#' # Run the three following examples and keep an eye on the R2
#'
#' play_god(e = 1)
#' play_god(e = 3)
#' play_god(e = 5)

play_god <- function(a = .5, b = 1, e = .5){

  # Stop function if n < 5
  if(e == 0){
    # Stop function if error variance is 0
    cat(bold("Error: ") %+% "Please set " %+% bold("e") %+% " to be greater than 0.\n\n")
  } else if(length(a) > 1){
    cat(bold("Error: ") %+% "Please use only one value for " %+% bold("a") %+% ".\n\n")
  } else if(length(b) > 1){
    cat(bold("Error: ") %+% "Please use only one value for " %+% bold("b") %+% ".\n\n")
  } else if(length(e) > 1){
    cat(bold("Error: ") %+% "Please use only one value for " %+% bold("e") %+% ".\n\n")
  }

  # Simulate data
  n <- 250
  X <- cbind(1, rnorm(n, 0, 1.5))
  epsilon <- rnorm(n, 0, e)
  beta <- c(a, b)
  y <- X %*% beta + epsilon

  # Estimate betas
  b_hat <- solve(t(X) %*% X) %*% t(X) %*% y

  # Calculate residuals
  resid <- y - X %*% b_hat

  # Estimate sigma^2
  s2_hat <- (t(resid) %*% resid)/(nrow(X) - ncol(X))

  # Estimate variance-covariance matrix of estimated betas
  vcov_b_hat <- c(s2_hat) * solve(t(X) %*% X)

  # Compute t-values
  t_val <- b_hat/sqrt(diag(vcov_b_hat))

  # Compute p-values
  p_val <- 2*pt(-abs(t_val),df = n-4)

  # Compute r-squared
  rsq <- cor(X[,2], y)^2

  # Create p-value asterisk function
  pvalue <- function(pval){

    if(pval >= 0 & pval < .001){
      return("***")
    } else if(pval >= .001 & pval < .01){
      return("**")
    } else if(pval >= .01 & pval < .05){
      return("*")
    } else {
      return("")
    }
  }

  # Create plot
  gg <- data.frame(x = X[,2], y = y)

  plot <-
    ggplot(gg, aes(x = x, y = y)) +
    geom_point(alpha = .5, col = "#999999") +
    stat_smooth(method = "lm", col = "#660099", fill = "#999999", alpha = .3) +
    coord_cartesian(xlim = -1:5,
                    ylim = -1:5) +
    xlab("X-axis") +
    ylab("Y-axis") +
    scale_x_continuous(breaks = -1:5) +
    scale_y_continuous(breaks = -1:5) +
    geom_hline(yintercept = 0, color = "#999999", size = .5) +
    geom_vline(xintercept = 0, color = "#999999", size = .5) +
    annotate(
      "segment",
      x = 0,
      xend = 0,
      y = 0,
      yend = b_hat[1],
      size = 1.5,
      color = "#FFCC33") +
    annotate(
      "segment",
      x = 1,
      xend = 2,
      y = b_hat[1] + b_hat[2],
      yend = b_hat[1] + b_hat[2],
      size = 1.5,
      color = "#FFCC33") +
    annotate(
      "segment",
      x = 2,
      xend = 2,
      y = b_hat[1] + b_hat[2],
      yend = b_hat[1] + 2*b_hat[2],
      size = 1.5,
      color = "#FFCC33") +
    annotate(
      "text",
      label = paste0("a = ", round(b_hat[1], 2), pvalue(p_val[1])),
      x = .2,
      y = .5*b_hat[1],
      hjust = 0) +
    annotate(
      "text",
      label = paste0("b = ", round(b_hat[2], 2), pvalue(p_val[2])),
      x = 2.3, y = b_hat[1] + 1.5*b_hat[2],
      # horizontal justification = 0 sets x position to left edge of text
      hjust = 0)  +
    annotate("text",
             x = 4.5,
             y = -.5,
             label = paste0("R2 = ", round(rsq, 2))) +
    annotate(
      "point",
      x = 0,
      y = 0,
      size = 3,
      color = "#FFCC33") +
    annotate(
      "point",
      x = 0,
      y = b_hat[1],
      size = 3,
      color = "#FFCC33") +
    annotate(
      "point",
      x = 1,
      y = b_hat[1] + b_hat[2],
      size = 3,
      color = "#FFCC33") +
    annotate(
      "point",
      x = 2,
      y = b_hat[1] + 2*b_hat[2],
      size = 3,
      color = "#FFCC33") +
    theme_bw() +
    theme(axis.line = element_blank(),
          axis.ticks = element_blank(),
          panel.border = element_blank())

  # Return plot and table
  return(plot)

}
jackobailey/IDA documentation built on May 7, 2019, 6:59 p.m.