Nothing
## -----------------------------------------------------------------------------
library(heuristica)
## -----------------------------------------------------------------------------
data_set <- city_population_original
criterion_col <- 3 # Population
cols_to_fit <- 4:ncol(data_set) # The 9 cues
reg <- regInterceptModel(data_set, criterion_col, cols_to_fit)
ttb <- ttbModel(data_set, criterion_col, cols_to_fit)
unit <- unitWeightModel(data_set, criterion_col, cols_to_fit)
min <- minModel(data_set, criterion_col, cols_to_fit)
## -----------------------------------------------------------------------------
results <- percentCorrectList(data_set, list(reg, ttb, unit, min))
# Round values to make comparison easier.
round(results)
## -----------------------------------------------------------------------------
# Cross-validate the data over the vector of models, suing training-proportion
# of the data to train and the rest as a test set. Outputs the mean percent
# correct for each model in fitting and in prediction.
crossV <- function(vec_of_models, criterion_col, cols_to_fit, data, reps, training_proportion){
fitting <- vector()
prediction <- vector()
for(i in 1:reps){
#randomly sample training and test row indexes
train <- sample(1:nrow(data), nrow(data)*training_proportion)
test <- setdiff(1:nrow(data), train)
#create training and test set
training_set <- data[train,]
test_set <- data[test,]
# If a regression is overdetermined (e.g. has too many columns(), it will
# drop the right-most columns. To instead make it drop random columns,
# we shuffle the column order.
shuffled_cols_to_fit <- sample(cols_to_fit)
models<-list()
y <- 0
for (mod in vec_of_models) { #fit the models to the training_set
y <- y+1
models[[y]] <- mod(training_set, criterion_col, shuffled_cols_to_fit)
}
#calculate percentage of correct predictions
fittingAccuracy <- percentCorrectList(training_set, models)
predictionAccuracy <- percentCorrectList(test_set, models)
fitting <- rbind(fitting,fittingAccuracy)
prediction <- rbind(prediction,predictionAccuracy)
}
results <- (rbind(colMeans(fitting),colMeans(prediction)))
rownames(results) <- c("Fitting","Prediction")
results
}
## -----------------------------------------------------------------------------
set.seed(0) # Use the same seed if you want to reproduce same results.
reps <- 200 # The book used 10,000 but that takes minutes.
training_proportion <- 0.5
results <- crossV(c(regInterceptModel, ttbModel, unitWeightModel, minModel), criterion_col, cols_to_fit, data_set, reps,training_proportion)
# Round values to make comparison easier.
round(results, 1)
## ----fig.width=7, fig.height=5------------------------------------------------
library(ggplot2)
library(reshape)
p <- melt(results)
colnames(p) <- c("condition","model","value")
ggplot(p, aes(x=condition, y=value, colour=model,group=model)) + geom_point() + geom_line() + ylab("Proportion correct")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.