R/layerForests.R

#' One Layer of Forests consisting of multiple expert forest-groups
#'
#' @param x is an object
#' @return y is the output
#' @export

layerForests <- function(y.train, y.test,
                         dat.train, dat.test, dat.testREAL,
                         n.train, n.test, n.testREAL,
                         n.subvar = 2000, num_expForests = 3,
                         ntree = 100, mtry = 30, nodesize = 10,
                         cores = 2, rnd.seed = 1,
                         layer = 1){

  # Parallilisation Settings
  registerDoParallel(cores=cores)
  CL <- makeCluster(cores)

  # Subset data into sets of features
  set.seed(rnd.seed)
  list_folds <- split(sample(ncol(dat.train), ncol(dat.train),replace=FALSE), as.factor(1:n.subvar))

  # Determine levels for factorisation
  if(layer!=1){
    levels = c("ACB","ASW","BEB","CDX", "CEU","CHB","CHS",
               "CLM","ESN","FIN","GBR","GIH","GWD","IBS",
               "ITU","JPT","KHV","LWK","MSL","MXL","PEL",
               "PJL","PUR","STU","TSI","YRI")
  } else {
    levels = c("0", "1", "2")
  }

  #list_expertForests <- list(NULL)
  list_expertForests <- foreach(k = 1:n.subvar, .inorder = FALSE,
                                .packages = c("tcltk","randomForest", "SMLpractical")) %dopar%{
                                  # Rnd Seed
                                  set.seed(741*(rnd.seed+k)) # choose a base seed not commonly chosen

                                  if(!exists("pb")) pb <- tkProgressBar("Parallel Expert Forests", min=0, max=n.subvar)

                                  # Create variable subsets
                                  df.train <- dat.train[,list_folds[[k]]]
                                  df.test <- dat.test[,list_folds[[k]]]
                                  df.testREAL <- dat.testREAL[,list_folds[[k]]]

                                  # Modify matrices to factor data frames


                                  res <- expertForests(y.train, y.test,
                                                       df.train, df.test, df.testREAL,
                                                       n.train, n.test, n.testREAL, uniq.crty,
                                                       num_expForests = num_expForests, levels = levels,
                                                       ntree = ntree, mtry = mtry, nodesize = nodesize)
                                  #list_expertForests[[k]] <-

                                  progress <- setTkProgressBar(pb, k)

                                  # Output
                                  res
                                }
  # Terminate parallel clusters
  stopCluster(CL)

  final.results <- mergefromlists(list_expertForests, num_expForests,
                   n.train, n.test, n.testREAL)
  return(final.results)
}
thomaswiemann/SMLpractical documentation built on May 28, 2019, 12:23 p.m.