Nothing
overfit_demo <-
function(DF,y=NA,seed=NA,aic=TRUE) {
if(is.na(y)) { stop(paste("Need to specify y variable in quotes\n")) }
if(!is.na(seed)) { set.seed(seed) }
n <- nrow(DF)
selected <- sample(n,n/2,replace=TRUE)
training <- DF[selected,]
holdout <- DF[-selected,]
form1 <- formula( paste(y,"~1") )
form2 <- formula( paste(y,"~.^2") )
null.model <- lm(form1,data=training)
full.model <- lm(form2,data=training)
best.model <- step(null.model,scope=list(lower=null.model,upper=full.model),direction="forward",trace=0)
M <- step(null.model,scope=list(lower=null.model,upper=full.model),direction="forward",trace=0,steps=1)
y.pos <- which(names(holdout)==y)
y.holdout <- holdout[,y.pos]
pred.holdout <- predict(M,newdata=holdout)
RMSE.holdout <- sqrt(mean( (y.holdout-pred.holdout)^2 ))
aic.train <- AIC(M)
RMSE.train <- summary(M)$sigma
for (i in 2:30) {
M <- step(M,scope=list(lower=null.model,upper=full.model),direction="both",steps=1,trace=0,k=.001)
pred.holdout <- predict(M,newdata=holdout)
RMSE.holdout[i] <- sqrt(mean( (y.holdout-pred.holdout)^2 ))
aic.train[i] <- AIC(M)
RMSE.train[i] <- summary(M)$sigma
}
RMSE.holdout <- (RMSE.holdout-min(RMSE.holdout))/(max(RMSE.holdout)-min(RMSE.holdout))
aic.train <- (aic.train-min(aic.train))/(max(aic.train)-min(aic.train))
RMSE.train <- (RMSE.train-min(RMSE.train))/(max(RMSE.train)-min(RMSE.train))
if(aic==TRUE) {
plot( 1:length(aic.train),RMSE.holdout,xlab="# variables added",ylab="Relative Error",type="l",lwd=2,yaxt="n")
lines( 1:length(aic.train),aic.train,lwd=2,lty=2)
legend("top",c("RMSE(holdout)","AIC(training)"),lwd=2,lty=1:2)
}
if(aic==FALSE) {
plot( 1:length(RMSE.train),RMSE.holdout,xlab="# variables added",ylab="Relative Error",type="l",lwd=2,yaxt="n")
lines( 1:length(RMSE.train),RMSE.train,lwd=2,lty=2)
legend("top",c("RMSE(holdout)","RMSE(training)"),lwd=2,lty=1:2)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.