stacking <- function(X.train, X.test, y, folds, layer1.models,
predict.with.full.training.set = TRUE,
save.on.disk = TRUE,
parent.folder = NULL)
{
if (is.factor(y) == FALSE)
stop("stacking: y must be a factor.")
if (save.on.disk)
{
if (is.null(parent.folder)) {
parent.folder <- getwd()
} else {
dir.create(parent.folder, showWarnings = FALSE)
}
parent.folder <- file.path(parent.folder, "metafeatures-info")
dir.create(parent.folder, showWarnings = FALSE)
folds.path <- file.path(parent.folder, "folds")
metafeatures.path <- file.path(parent.folder, "metafeatures")
dir.create(folds.path, showWarnings = FALSE)
dir.create(metafeatures.path, showWarnings = FALSE)
}
idx <- Reduce(c, folds)
y.oof <- y[idx]
X.oof <- X.train[idx,]
if (save.on.disk)
{
save.as.json(folds, folds.path)
save.as.csv(X.oof, folds.path)
save.as.csv(y.oof, folds.path)
}
models.metafeatures.info <- lapply(
layer1.models,
function (model.func)
{
i <- 1
folds.metafeatures.info <- lapply(
folds,
function (fold)
{
model <- model.func()
X.train.fold <- X.train[-fold,]
X.test.fold <- X.train[fold,]
y.train.fold <- y[-fold]
cat(sprintf("Training %s for fold %d...\n", model$name, i))
model$train_(X.train.fold, y.train.fold)
cat(sprintf("Predicting meta-features using %s for fold %d...\n",
model$name, i))
if (predict.with.full.training.set)
{
metafeatures <- list(
metafeatures.fold = model$predict_(X.test.fold)
)
} else {
metafeatures <- list(
metafeatures.fold = model$predict_(X.test.fold),
metafeatures.test = model$predict_(X.test)
)
}
gc()
i <<- i + 1
metafeatures
})
model.metafeatures.train <- Reduce(
rbind, lapply(
folds.metafeatures.info,
function (info) info$metafeatures.fold))
if (predict.with.full.training.set)
{
model <- model.func()
model.description <- ifelse(is.null(model$description),
yes = model$name,
no = model$description)
cat(sprintf("Training %s on full training set...\n", model$name))
model$train_(X.train, y)
cat(sprintf("Predicting meta-features using %s on full training set...\n",
model$name))
model.metafeatures.test <- model$predict_(X.test)
} else {
n.folds <- length(folds)
feature.sum <- function(f1, f2) f1 + f2
model.metafeatures.test <- lapply(folds.metafeatures.info,
function(info) info$metafeatures.test)
model.metafeatures.test <- Reduce(feature.sum, model.metafeatures.test) / n.folds
}
if (save.on.disk)
{
model.path <- file.path(metafeatures.path, model.description)
dir.create(model.path, showWarnings = FALSE)
save.metafeatures.as.csv(model.metafeatures.train, model.path, model.description)
save.metafeatures.as.csv(model.metafeatures.test, model.path, model.description)
}
list(metafeatures.train = model.metafeatures.train,
metafeatures.test = model.metafeatures.test)
})
metafeatures.train <- Reduce(
cbind, lapply(models.metafeatures.info,
function (model.info) model.info$metafeatures.train))
metafeatures.test <- Reduce(
cbind, lapply(models.metafeatures.info,
function (model.info) model.info$metafeatures.test))
if (save.on.disk)
{
path <- file.path(metafeatures.path, "all-metafeatures")
dir.create(path, showWarnings = FALSE)
save.as.csv(metafeatures.train, path)
save.as.csv(metafeatures.test, path)
}
list(metafeatures.train = metafeatures.train,
metafeatures.test = metafeatures.test,
y.oof = y.oof,
X.oof = X.oof)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.