tests/testthat/test-randomForest.R

# library(testthat)
# library(randomForest)
library(SuperLearner)

if(all(sapply(c("testthat", "randomForest", "mlbench"), requireNamespace))){
  
testthat::context("Learner: randomForest")

############################
# Setup test dataset from mlbench.

data(BreastCancer, package="mlbench")

data = na.omit(BreastCancer)

set.seed(1)

# Reduce to a dataset of 100 observations to speed up testing.
data = data[sample(nrow(data), 100), ]

# Expand out factors into indicators.
X = data.frame(model.matrix(~ . -1, subset(data, select=-c(Class, Id))))

# Limit to 20 variables to speed up testing.
X = X[, 1:20]

Y = as.numeric(data$Class == "malignant")
print(table(Y))

###########################
# Test basic SL ensemble with RandomForest.

sl_lib = c("SL.randomForest", "SL.mean")

sl = SuperLearner(Y = Y, X = X, SL.library = sl_lib,
                  cvControl = list(V = 2),
                  family = binomial())
print(sl)
rm(sl_lib)

#############################
# test create.Learner with randomForest

######
# Test default call.
create_rf = create.Learner("SL.randomForest")
print(create_rf)
sl = SuperLearner(Y = Y, X = X, SL.library = create_rf$names,
                  cvControl = list(V = 2),
                  family = binomial())
print(sl)

# Clean up global environment.
rm(list=create_rf$names)



###########

# Create an environment to store the learners.
sl_env = new.env()

# Specify an environment and test verbose.
create_rf = create.Learner("SL.randomForest", env = sl_env, verbose=T)
print(create_rf)
print(ls(sl_env))
print(length(sl_env))

# Attach the environment with the learner functions so SL can access them.
sl = SuperLearner(Y = Y, X = X, SL.library = create_rf$names,
                  cvControl = list(V = 2),
                  family = binomial(),
                  env = sl_env)
print(sl)



############

# Create a new environment to start this test from scratch.
sl_env = new.env()

# Test a custom tune list but only specify mtry.
tune_rf = list(mtry = c(1, 2))
create_rf = create.Learner("SL.randomForest", tune = tune_rf, detailed_names = T,
                           env = sl_env)
print(create_rf)
print(ls(sl_env))

sl = SuperLearner(Y = Y, X = X, SL.library = create_rf$names,
                  cvControl = list(V = 2),
                  family = binomial(), env = sl_env)
print(sl)


############

# Create a new environment to start this test from scratch.
# sl_env = new.env()

# Test with detailed_names = F.
# create_rf = create.Learner("SL.randomForest", tune = tune_rf, detailed_names = F,
#                           env = sl_env)
#print(create_rf)
#print(ls(sl_env))

#sl = SuperLearner(Y = Y, X = X, SL.library = create_rf$names, family = binomial(),
#                  cvControl = list(V = 2),
#                  env = sl_env)
#print(sl)

############

# Create a new environment to start this test from scratch.
#sl_env = new.env()

# Test another version where we specify NULL as a string so that its incorporated into names.
#tune_rf = list(mtry = c(1, 2), nodesize = "NULL", maxnodes = "NULL")
#create_rf = create.Learner("SL.randomForest", tune = tune_rf, detailed_names = T,
#                           env = sl_env)
#print(create_rf)
#print(ls(sl_env))

#sl = SuperLearner(Y = Y, X = X, SL.library = create_rf$names, family = binomial(),
#                  cvControl = list(V = 2),
#                  env = sl_env)
#print(sl)



############

# Create a new environment to start this test from scratch.
#sl_env = new.env()

# Test maxnode specification, including one version that uses the default.
# We specify maxnodes using a list rather than vector so that 5 and 10 are not
# coerced into strings.
#tune_rf = list(mtry = c(1, 2), maxnodes = list(5, 10, "NULL"))
#create_rf = create.Learner("SL.randomForest", tune = tune_rf, detailed_names = T,
#                           env = sl_env)
#print(create_rf)

#sl = SuperLearner(Y = Y, X = X, SL.library = create_rf$names,
#                  family = binomial(),
#                  cvControl = list(V = 2),
#                  env = sl_env)
#print(sl)

# We need to use <<- in order for the sl result to be saved in our parent frame (GlobalEnv)
#with(sl_env, {
#  sl <<- SuperLearner(Y = Y, X = X, SL.library = create_rf$names,
#                      cvControl = list(V = 2),
#                      family = binomial())
#})
#print(sl)

# Or we can do this.
#sl = with(sl_env, SuperLearner(Y = Y, X = X, SL.library = create_rf$names,
#                               cvControl = list(V = 2),
#                               family = binomial()))
#print(sl)


################
# Test multicore.

### 2018-07-10 Removing test since generating WARN NOTE on CRAN-devel - EP

# Only run in RStudio so that automated CRAN checks don't give errors.
#if (.Platform$GUI == "RStudio") {

  # Note we don't create a new sl_env here, because we are using the env from the
  # previous test.

#  doMC::registerDoMC()
#  sl = with(sl_env, mcSuperLearner(Y = Y, X = X, SL.library = create_rf$names,
#                                   cvControl = list(V = 2),
#                                   family = binomial()))
#  print(sl)
#}

  }

Try the SuperLearner package in your browser

Any scripts or data that you put into this service are public.

SuperLearner documentation built on July 26, 2023, 6:05 p.m.