# This script is an example of the use of an external cross-validation around VSURF procedure, to estimate prediction errors of random forests involving only variables selected from VSURF.
# WARNING: lots of calculations are done. It takes almost 20 minutes on a linux computing server with 40 cores
library(VSURF)
data(srbct, package = "mixOmics")
set.seed(322, "L'Ecuyer-CMRG")
K <- 5
x <- srbct$gene
y <- srbct$class
n <- length(y)
folds <- replicate(ceiling(n / K), sample(1:K))[1:n]
errtest.mat <- matrix(nrow = K, ncol = 2)
colnames(errtest.mat) <- c("interp", "pred")
res.cv <- vector("list", K)
for (k in 1:K) {
xtrain <- x[-which(folds == k),]
ytrain <- y[-which(folds == k)]
xtest <- x[which(folds == k),]
ytest <- y[which(folds == k)]
vsurf.fold <-
VSURF(
xtrain, ytrain, parallel = TRUE, clusterType = "FORK", ncores = 40
)
errtest.mat[k, 1] <-
sum(ytest != predict(vsurf.fold, newdata = xtest, step = "interp"))
errtest.mat[k, 2] <-
sum(ytest != predict(vsurf.fold, newdata = xtest, step = "pred"))
res.cv[[k]] <- vsurf.fold
}
errtest <- colSums(errtest.mat) / n
errtest
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.