dajmcdon
). Include your buddy in the author field if you are working together.## Note: this is the same data as in Lecture 8. trueFunction <- function(x) sin(x) + 1/sqrt(x) + 3 set.seed(1234) n = 100 x = 1:n/n*2*pi wiggly = data.frame(x = x, y = trueFunction(x) + rnorm(n, 0, .75)) library(ggplot2) ggplot(wiggly, aes(x, y)) + geom_point() + xlim(0,2*pi) + ylim(0,max(wiggly$y)) + stat_function(fun=trueFunction, color='red')
resample <- function(x) { sample(x, replace = TRUE) } resample.data.frame <- function(data) { sample.rows <- resample(1:nrow(data)) return(data[sample.rows, ]) } rboot <- function(statistic, simulator, B) { tboots <- replicate(B, statistic(simulator())) if (is.null(dim(tboots))) { tboots <- array(tboots, dim = c(1, B)) } return(tboots) } bootstrap <- function(tboots, summarizer, ...) { summaries <- apply(tboots, 1, summarizer, ...) return(t(summaries)) } equitails <- function(x, alpha) { lower <- quantile(x, alpha/2) upper <- quantile(x, 1 - alpha/2) return(c(lower, upper)) } bootstrap.ci <- function(statistic = NULL, simulator = NULL, tboots = NULL, B = if (!is.null(tboots)) { ncol(tboots) }, t.hat, level) { if (is.null(tboots)) { stopifnot(!is.null(statistic)) stopifnot(!is.null(simulator)) stopifnot(!is.null(B)) tboots <- rboot(statistic, simulator, B) } alpha <- 1 - level intervals <- bootstrap(tboots, summarizer = equitails, alpha = alpha) upper <- t.hat + (t.hat - intervals[, 1]) lower <- t.hat + (t.hat - intervals[, 2]) CIs <- cbind(lower = lower, upper = upper) return(CIs) }
Write a function that estimates the linear model of $y$ on $x$ for any data set. Call your function wiggly.estimator.lm
. It should return the slope and intercept. Run lm
on the wiggly data (call the fitted model wiggly.mdl
). Compare it's estimates to those of wiggly.estimator.lm
.
wiggly.estimator.lm <- function(newdata) coef(lm(y~x, data=newdata)) wiggly.mdl = lm(y~x, data=wiggly) coef(wiggly.mdl) wiggly.estimator.lm(wiggly)
Write a function that resamples the residuals. Call it wiggly.resids.resamp
.
wiggly.resids.resamp <- function(){ resids = residuals(wiggly.mdl) newResids = sample(resids, replace=TRUE) # resample the residuals from the original model new.wiggles = data.frame( # create a new dataframe x = wiggly$x, # with the original x's but new y's y = fitted(wiggly.mdl)+newResids) return(new.wiggles) }
Write a function to resample the data frame.
new.wiggly.resamp <- function() resample.data.frame(wiggly)
Use both samplers to get model-based and nonparametric confidence intervals for your linear model.
mbb.wiggly = bootstrap.ci( statistic = wiggly.estimator.lm, simulator = wiggly.resids.resamp, B = 1000, t.hat = wiggly.estimator.lm(wiggly), level = 0.95) npb.wiggly = bootstrap.ci( statistic = wiggly.estimator.lm, simulator = new.wiggly.resamp, B = 1000, t.hat = wiggly.estimator.lm(wiggly), level = 0.95) mbb.wiggly npb.wiggly
Which bootstrap is more appropriate to use in this case? Why?
Answer: The model-based bootstrap is wrong. Data did not actually come from this linear model. So resampling rows is a more accurate measure of uncertainty.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.