Plots

This document is for testing purposes only.

plot(dist ~ speed, data = cars,
     xlab = "Speed (in Miles Per Hour)",
     ylab = "Stopping Distance (in Feet)",
     main = "Stopping Distance vs Speed",
     pch  = 20,
     cex  = 2,
     col  = "grey")
generate_data = function(int = 1,
                         slope = 2,
                         sigma = 5,
                         n_obs = 15,
                         x_min = 0,
                         x_max = 10) {
  x = seq(x_min, x_max, length.out = n_obs)
  y = int + slope * x + rnorm(n_obs, 0, sigma)
  fit = lm(y ~ x)
  y_hat = fitted(fit)
  y_bar = rep(mean(y), n_obs)
  data.frame(x, y, y_hat, y_bar)
}
set.seed(2)
reg_data = generate_data(sigma = 2)
par(mfrow = c(1, 2))
plot(reg_data$x, reg_data$y, main = "", 
     xlab = "x", ylab = "y", pch = 20, cex = 2, col = "grey")
abline(h = mean(reg_data$y), col = "grey", lwd = 2)
plot(reg_data$x, reg_data$y, main = "", 
     xlab = "x", ylab = "y", pch = 20, cex = 2, col = "grey")
fit = lm(y ~ x, data = reg_data)
abline(fit, col = "grey", lwd = 2)
set.seed(2)
reg_data = generate_data(sigma = 2)
plot(reg_data$x, reg_data$y, main = "", 
     xlab = "x", ylab = "y", pch = 20, cex = 2, col = "grey")
abline(h = mean(reg_data$y), col = "grey", lwd = 2)
fit = lm(y ~ x, data = reg_data)
abline(fit, col = "grey", lwd = 2)
# define grid of x values
x = seq(-4, 4, length = 100)

# plot curve for t with 10 df
plot(x, dt(x, df = 10), type = "l", lty = 1, lwd = 2,
     xlab = "", ylab = "", main = "")
par(mfrow = c(1, 3))

sim_slr = function(x, beta_0 = 10, beta_1 = 5, sigma = 1) {
  n = length(x)
  epsilon = rnorm(n, mean = 0, sd = sigma)
  y = beta_0 + beta_1 * x + epsilon
  data.frame(predictor = x, response = y)
}

# suppose alpha = 0.05, which is how often we are willing to make a mistake if the null hypothesis is true

set.seed(9)
x = seq(1, 20, length.out = 21)
# beta_1 = 0, sigma = 1
# beta_1 = 0.5, sigma = 7
sim_data = sim_slr(x = x, beta_0 = 2, beta_1 = 0, sigma = 1)

sim_fit = lm(response ~ predictor, data = sim_data)
summary(sim_fit)$coefficients["predictor", "Pr(>|t|)"]

plot(response ~ predictor, data = sim_data, pch = 20, col = "grey", cex = 1.5, ylim = c(-1, 5))
abline(sim_fit, lwd = 3, lty = 1, col = "darkorange")
abline(2, 0, lwd = 3, lty = 2, col = "dodgerblue")
legend("topleft", c("Estimate", "Truth"), lty = c(1, 2), lwd = 2,
       col = c("darkorange", "dodgerblue"))

set.seed(78)
x = seq(1, 20, length.out = 21)
# beta_1 = 0, sigma = 1
# beta_1 = 0.5, sigma = 7
sim_data = sim_slr(x = x, beta_0 = 2, beta_1 = 0, sigma = 1)

sim_fit = lm(response ~ predictor, data = sim_data)
summary(sim_fit)$coefficients["predictor", "Pr(>|t|)"]

plot(response ~ predictor, data = sim_data, pch = 20, col = "grey", cex = 1.5, ylim = c(-1, 5))
abline(sim_fit, lwd = 3, lty = 1, col = "darkorange")
abline(2, 0, lwd = 3, lty = 2, col = "dodgerblue")
legend("topleft", c("Estimate", "Truth"), lty = c(1, 2), lwd = 2,
       col = c("darkorange", "dodgerblue"))

set.seed(1)
x = seq(1, 20, length.out = 21)
# beta_1 = 0, sigma = 1
# beta_1 = 0.5, sigma = 7
sim_data = sim_slr(x = x, beta_0 = 2, beta_1 = 1, sigma = 2)

sim_fit = lm(response ~ predictor, data = sim_data)
summary(sim_fit)$coefficients["predictor", "Pr(>|t|)"]

plot(response ~ predictor, data = sim_data, pch = 20, col = "grey", cex = 1.5)
abline(sim_fit, lwd = 3, lty = 1, col = "darkorange")
abline(2, 1, lwd = 3, lty = 2, col = "dodgerblue")
abline(h = mean(sim_data$response), lwd = 3, lty = 3)
legend("topleft", c("Estimate", "Truth", "Null Estimate"), lty = c(1, 2, 3), lwd = 2,
       col = c("darkorange", "dodgerblue", "black"))
set.seed(1337)
n = 100 # sample size
p = 3

beta_0 = 5
beta_1 = -2
beta_2 = 6
sigma  = 4


x0 = rep(1, n)
x1 = sample(seq(1, 10, length = n))
x2 = sample(seq(1, 10, length = n))
X = cbind(x0, x1, x2)
C = solve(t(X) %*% X)


eps      = rnorm(n, mean = 0, sd = sigma)
y        = beta_0 + beta_1 * x1 + beta_2 * x2 + eps
sim_data = data.frame(x1, x2, y)



# make this use data.frame? or, simply hide this?
fit = lm(y ~ x1 + x2)

grid.lines = 25
x1.pred = seq(min(x1), max(x1), length.out = grid.lines)
x2.pred = seq(min(x2), max(x2), length.out = grid.lines)
x1x2 = expand.grid(x1 = x1.pred, x2 = x2.pred)

y.pred = matrix(predict(fit, newdata = x1x2), 
                nrow = grid.lines, ncol = grid.lines)
# fitted points for droplines to surface
fitpoints = predict(fit)

library("plot3D")




minlim = min(fitpoints, y.pred, y) - 0.1
maxlim = max(fitpoints, y.pred, y) + 0.1

bothlim = c(minlim, maxlim)
par(mfrow = c(1, 3))

scatter3D(x1, x2, y, pch = 20, cex = 2, col = gg.col(1000), colkey = FALSE, pch = 20,
          theta = 0, phi = 45, zlim = c(min(y.pred), max(y.pred)), clim = bothlim,
          xlab = "x1", ylab = "x2", zlab = "y",
          # surf = list(x = x1.pred, y = x2.pred, z = y.pred, facets = NA, fit = fitpoints), 
          main = "")

scatter3D(x1, x2, y, pch = 20, cex = 2, col = gg.col(1000), colkey = FALSE,
          theta = 45, phi = 15, zlim = c(min(y.pred), max(y.pred)), clim = bothlim,
          xlab = "x1", ylab = "x2", zlab = "y",
          # surf = list(x = x1.pred, y = x2.pred, z = y.pred, facets = NA, fit = fitpoints), 
          main = "")

scatter3D(x1, x2, y, pch = 20, cex = 2, col = gg.col(1000), colkey = FALSE,
          theta = 90, phi = 0, zlim = c(min(y.pred), max(y.pred)), clim = bothlim,
          xlab = "x1", ylab = "x2", zlab = "y",
          # surf = list(x = x1.pred, y = x2.pred, z = y.pred, facets = NA, fit = fitpoints), 
          main = "")
par(mfrow = c(1, 3))

scatter3D(x1, x2, y, pch = 20, cex = 2, col = gg.col(1000), colkey = FALSE, pch = 20,
          theta = 0, phi = 45, zlim = c(min(y.pred), max(y.pred)), clim = bothlim,
          xlab = "x1", ylab = "x2", zlab = "y",
          surf = list(x = x1.pred, y = x2.pred, z = y.pred, facets = NA, fit = fitpoints),
          main = "")

scatter3D(x1, x2, y, pch = 20, cex = 2, col = gg.col(1000), colkey = FALSE,
          theta = 45, phi = 15, zlim = c(min(y.pred), max(y.pred)), clim = bothlim,
          xlab = "x1", ylab = "x2", zlab = "y",
          surf = list(x = x1.pred, y = x2.pred, z = y.pred, facets = NA, fit = fitpoints),
          main = "")

scatter3D(x1, x2, y, pch = 20, cex = 2, col = gg.col(1000), colkey = FALSE,
          theta = 90, phi = 0, zlim = c(min(y.pred), max(y.pred)), clim = bothlim,
          xlab = "x1", ylab = "x2", zlab = "y",
          surf = list(x = x1.pred, y = x2.pred, z = y.pred, facets = NA, fit = fitpoints),
          main = "")
scatter3D(x1, x2, y, pch = 20, cex = 2, col = gg.col(1000), lighting = TRUE, colkey = FALSE,
          theta = 95, phi = -10, zlim = c(min(y.pred), max(y.pred)), clim = bothlim,
          xlab = "x1", ylab = "x2", zlab = "y",
          surf = list(x = x1.pred, y = x2.pred, z = y.pred, facets = NA, fit = fitpoints),
          main = "")


daviddalpiaz/appliedstats documentation built on Sept. 16, 2024, 5:09 a.m.