set.seed(252)
x <- rnorm(20)
noise <- rnorm(20)
# y <- x^3 + 5*x^2 - 4*x + noise
y <- 3*x^2 - x + noise
# y <- 3*x^2-3*x+noise
train <- as.data.frame(cbind(x,y))

```r"} ggplot(data = train, aes(x, y)) + geom_point(color = 'black') + stat_function(fun = function(x) 3*x^2 - x, color = 'red', size=0.7) + theme_minimal()

```r

poly.formulae <- c(paste("y ~ poly(x,", c(1,2,9,11), ")", sep = ""))
poly.formulae <- sapply(poly.formulae, as.formula)
df.plot <- data.frame(x = seq(min(x), max(x), length.out = 200))
fitted.models <- list(length = length(poly.formulae))
for (model_index in 1:length(poly.formulae)) {
fm <- lm(formula = poly.formulae[[model_index]])
fitted.models[[model_index]] <- fm
}
for (i in 1:4) {
  train <- cbind(train, fitted.models[[i]]$fitted.values)
}
colnames(train) <- c('x', 'y', 'deg_0', 'deg_1', 'deg_2', 'deg_3')

r'} ggplot(train, aes(x,y)) + geom_point(color = 'black') + stat_smooth(method = 'lm', formula = poly.formulae[[1]], se = 0, aes(color = '1'), size = 1) + stat_smooth(method = 'lm', formula = poly.formulae[[2]], se = 0, aes(color = '2'), size = 1) + stat_smooth(method = 'lm', formula = poly.formulae[[3]], se = 0, aes(color = '9'), size = 1) + stat_smooth(method = 'lm', formula = poly.formulae[[4]], se = 0, aes(color = '11'), size = 1) + scale_colour_manual(name = 'Polynomial \ndegree', breaks = c('1','2','4','9','11'), values = c('1' = '#bce784', '2' = '#5dd39e', '9' = '#348aa7','9' = '#525174', '11' = '#513b56'))+ theme_minimal() ggsave('/home/work/overfit.png')



andreabecsek/panda documentation built on Jan. 2, 2020, 1:56 p.m.