R/Adabag.R

Defines functions Adabag

Documented in Adabag

# Bagging classifier
# ----------------------------------------------------------------------
#' Bagging classifier
#'
#' Classify the input with a Bagging classifier. Function uses a vector of number of iterations
#' to search for the best training error. Once the best parameters have been identified, it uses them
#' to predict on the test data.
#' @param data A list generated by the function PrepareData. It is a list with two components.
#' One is a vector of labels, the other is a data frame of features. The other element of the list
#' is a data frame of test features.
#' @param mfinal.seq A vector of values of number of iterations that should be tested.
#' @return A list with a dataframe of errors a vector of predictions with the best parameters
#' @import doParallel
#' @import foreach
#' @import adabag
#' @export
#' @examples
#' path <- "/home/rishabh/mres/ml_comp/data/"
#' data <- PrepareData(path, mode = 2, sample = TRUE, size = 100)
#' mfinal.seq <- seq(20, 25, 5)
#' Adabag(data, mfinal.seq)

Adabag <- function(data, mfinal.seq) {

  train <- cbind( label = data$train$label, data$train$features)


  cl <- makeCluster(detectCores())
  # Register cluster
  registerDoParallel(cl)
  # Find out how many

  x <- foreach(i = mfinal.seq ) %dopar% {
    adabag::bagging.cv(label ~ ., train, v = 10, mfinal = i, rpart.control())

  }
  stopCluster(cl)

  err.vec <-  c()
  for(i in 1:length(x)) {
    err.vec <- c(err.vec, x[[i]][[3]])
  }

  best.mtry <- mfinal.seq[which.min(err.vec)]

  bagging.fit <- adabag::bagging(label ~ ., train, v = 10, mfinal = best.mtry, rpart.control())


  if(length(data$test) != 2){
    predictions <- adabag::predict.bagging(bagging.fit, data$test)$class
  } else {
    predictions <- adabag::predict.bagging(bagging.fit, data$test$features)$class
  }


  return(list(error = data.frame(iter = mfinal.seq, error = err.vec), predictions = predictions))
}
rishi1226/classrish documentation built on May 25, 2017, 3:22 a.m.