R/stacking.R

stacking <- function(X.train, X.test, y, folds, layer1.models,
                     predict.with.full.training.set = TRUE,
                     save.on.disk = TRUE,
                     parent.folder = NULL)
{
  if (is.factor(y) == FALSE)
    stop("stacking: y must be a factor.")

  if (save.on.disk)
  {
    if (is.null(parent.folder)) {
      parent.folder <- getwd()
    } else {
      dir.create(parent.folder, showWarnings = FALSE)
    }
    parent.folder <- file.path(parent.folder, "metafeatures-info")
    dir.create(parent.folder, showWarnings = FALSE)
    folds.path <- file.path(parent.folder, "folds")
    metafeatures.path <- file.path(parent.folder, "metafeatures")
    dir.create(folds.path, showWarnings = FALSE)
    dir.create(metafeatures.path, showWarnings = FALSE)
  }

  idx <- Reduce(c, folds)
  y.oof <- y[idx]
  X.oof <- X.train[idx,]

  if (save.on.disk)
  {
    save.as.json(folds, folds.path)
    save.as.csv(X.oof, folds.path)
    save.as.csv(y.oof, folds.path)
  }

  models.metafeatures.info <- lapply(
    layer1.models,
    function (model.func)
    {
      i <- 1
      folds.metafeatures.info <- lapply(
        folds,
        function (fold)
        {
          model <- model.func()

          X.train.fold <- X.train[-fold,]
          X.test.fold  <- X.train[fold,]
          y.train.fold <- y[-fold]

          cat(sprintf("Training %s for fold %d...\n", model$name, i))
          model$train_(X.train.fold, y.train.fold)

          cat(sprintf("Predicting meta-features using %s for fold %d...\n",
                      model$name, i))

          if (predict.with.full.training.set)
          {
            metafeatures <- list(
              metafeatures.fold = model$predict_(X.test.fold)
            )
          } else {
            metafeatures <- list(
              metafeatures.fold = model$predict_(X.test.fold),
              metafeatures.test = model$predict_(X.test)
            )
          }

          gc()
          i <<- i + 1
          metafeatures
        })

      model.metafeatures.train <- Reduce(
        rbind, lapply(
          folds.metafeatures.info,
          function (info) info$metafeatures.fold))

      if (predict.with.full.training.set)
      {
        model <- model.func()
        model.description <- ifelse(is.null(model$description),
                                    yes = model$name,
                                    no = model$description)

        cat(sprintf("Training %s on full training set...\n", model$name))
        model$train_(X.train, y)

        cat(sprintf("Predicting meta-features using %s on full training set...\n",
                    model$name))
        model.metafeatures.test <- model$predict_(X.test)

      } else {
        n.folds <- length(folds)
        feature.sum <- function(f1, f2) f1 + f2
        model.metafeatures.test <- lapply(folds.metafeatures.info,
                                          function(info) info$metafeatures.test)
        model.metafeatures.test <- Reduce(feature.sum, model.metafeatures.test) / n.folds
      }

      if (save.on.disk)
      {
        model.path <- file.path(metafeatures.path, model.description)
        dir.create(model.path, showWarnings = FALSE)
        save.metafeatures.as.csv(model.metafeatures.train, model.path, model.description)
        save.metafeatures.as.csv(model.metafeatures.test,  model.path, model.description)
      }

      list(metafeatures.train = model.metafeatures.train,
           metafeatures.test  = model.metafeatures.test)
    })

  metafeatures.train <- Reduce(
    cbind, lapply(models.metafeatures.info,
                  function (model.info) model.info$metafeatures.train))

  metafeatures.test <- Reduce(
    cbind, lapply(models.metafeatures.info,
                  function (model.info) model.info$metafeatures.test))

  if (save.on.disk)
  {
    path <- file.path(metafeatures.path, "all-metafeatures")
    dir.create(path, showWarnings = FALSE)
    save.as.csv(metafeatures.train, path)
    save.as.csv(metafeatures.test,  path)
  }

  list(metafeatures.train = metafeatures.train,
       metafeatures.test  = metafeatures.test,
       y.oof = y.oof,
       X.oof = X.oof)
}
rladeira/stacking documentation built on May 27, 2019, 9:28 a.m.