library(microbenchmark) library(ggplot2)
resFiles <- list.files("outputs/") for (fileName in resFiles) { load(paste0("outputs/", fileName)) }
mbplot <- function(res) { resDF <- as.data.frame(res) ggres <- ggplot(resDF, aes(expr, time)) + theme_bw() + ylab("time (log scale)") + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank()) + geom_boxplot(aes(fill = expr))# + scale_y_continuous(trans = "log10") }
toys
data: high-dimensional casedata(toys, package = "VSURF") dim(toys$x) str(toys$y) toysData <- data.frame(toys$x, y = toys$y) mtryToys <- floor(ncol(toys$x)/3)
mbToys <- microbenchmark::microbenchmark( randomForest::randomForest(y ~ ., toysData, mtry = mtryToys), randomForest::randomForest(toys$x, toys$y, mtry = mtryToys), ranger::ranger(y ~ ., toysData, mtry = mtryToys, num.threads = 1), Rborist::Rborist(toys$x, toys$y, predFixed = mtryToys, minInfo = 0, nThread = 1), times = 25) levels(mbToys$expr) <- c("RF formula", "RF", "ranger", "Rborist")
mbToys save(mbToys, file = "outputs/mbToys.Rdata") # plotToys <- mbplot(mbToys) + scale_fill_manual(values = viridis::viridis(4)) # plotToys
mbToys <- microbenchmark::microbenchmark( randomForest::randomForest(y ~ ., toysData, mtry = mtryToys, importance = TRUE), randomForest::randomForest(toys$x, toys$y, mtry = mtryToys, importance = TRUE), ranger::ranger(y ~ ., toysData, mtry = mtryToys, num.threads = 1, importance = "permutation"), times = 25) levels(mbToys$expr) <- c("RF formula", "RF", "ranger")
mbToysImp save(mbToysImp, file = "outputs/mbToysImp.Rdata") # plotToysImp <- mbplot(mbToysImp) + scale_fill_manual(values = viridis::viridis(4)[1:3]) # plotToysImp
mbToys$VI <- "no VI" mbToysImp$VI <- "VI" mbToysBoth <- rbind(mbToys, mbToysImp) toysMBplot <- mbplot(mbToysBoth) toysMBplot <- toysMBplot + facet_wrap(~VI) + scale_fill_manual(values = viridis::viridis(4)) toysMBplot
addVarNoise <- function(x, nbVarNoise){ nbObs <- nrow(x) nbVar <- ncol(x) addNoise <- matrix(rnorm(nbObs*nbVarNoise, 0, 20), nrow = nbObs) xc <- cbind(x, addNoise) colnames(xc) <- paste0("X", 1:(nbVar + nbVarNoise)) return(xc) } ajoutVarCor2grp <- function(x, j, k, d, rho = 1.0324){ if (d == 0) { return(x) } else { sigma=0.5 n=nrow(x) z=matrix(0,n,d) w=matrix(0,n,d) for (i in 1:d){ u=matrix(0,n,1) u[,1]=rnorm(n,0,1) z[,i]=x[,j]*rho+sigma*u z[,i]=(z[,i]-mean(z[,i]))/sqrt(var(z[,i])) w[,i]=x[,k]*rho+sigma*u w[,i]=(w[,i]-mean(w[,i]))/sqrt(var(w[,i])) } p=ncol(x) xc=matrix(0,n,p+2*d) for (i in 1:d){ xc[,6+i]=z[,i] xc[,6+d+i]=w[,i] } for (i in 1:6){ xc[,i]=x[,i] } for (i in 7:p){ xc[,2*d+i]=x[,i] } return(xc) } }
bigToys <- ajoutVarCor2grp(toys$x, j = 3, k = 6, d = 30) bigToys <- addVarNoise(bigToys, 2000 - ncol(bigToys)) dim(bigToys) bigToysData <- data.frame(bigToys, y = toys$y) mtryBigToys <- floor(ncol(bigToys)/10)
mbbigToys <- microbenchmark::microbenchmark( randomForest::randomForest(y ~ ., bigToysData, mtry = mtryBigToys), randomForest::randomForest(bigToys, toys$y, mtry = mtryBigToys), ranger::ranger(y ~ ., bigToysData, mtry = mtryBigToys, num.threads = 1), Rborist::Rborist(bigToys, toys$y, predFixed = mtryBigToys, minInfo = 0, nThread = 1), times = 25) levels(mbToys$expr) <- c("RF formula", "RF", "ranger", "Rborist")
mbbigToys save(mbbigToys, file = "outputs/mbbigToys.Rdata") # plotbigToys <- mbplot(mbbigToys) + scale_fill_manual(values = viridis::viridis(4)[2:4]) # plotbigToys
mbbigToysImp <- microbenchmark::microbenchmark( randomForest::randomForest(y ~ ., bigToysData, mtry = mtryBigToys, importance = TRUE), randomForest::randomForest(bigToys, toys$y, mtry = mtryBigToys, importance = TRUE), ranger::ranger(y ~ ., bigToysData, mtry = mtryBigToys, num.threads = 1, importance = "permutation"), times = 25) levels(mbbigToysImp$expr) <- c("RF formula", "RF", "ranger")
mbbigToysImp save(mbbigToysImp, file = "outputs/mbbigToysImp.Rdata") # plotbigToysImp <- mbplot(mbbigToysImp) + scale_fill_manual(values = viridis::viridis(4)[1:3]) # plotbigToysImp
mbbigToys$VI <- "no VI" mbbigToysImp$VI <- "VI" mbbigToysBoth <- rbind(mbbigToys, mbbigToysImp) bigtoysMBplot <- mbplot(mbbigToysBoth) bigtoysMBplot <- bigtoysMBplot + facet_wrap(~VI) + scale_fill_manual(values = viridis::viridis(4)[1:3]) bigtoysMBplot
BIGToys <- ajoutVarCor2grp(toys$x, j = 3, k = 6, d = 300) BIGToys <- addVarNoise(BIGToys, 5000 - ncol(BIGToys)) dim(BIGToys) BIGToysData <- data.frame(BIGToys, y = toys$y) mtryBIGToys <- floor(ncol(BIGToys)/10)
mbBIGToys <- microbenchmark::microbenchmark( randomForest::randomForest(BIGToys, toys$y, mtry = mtryBIGToys), ranger::ranger(y ~ ., BIGToysData, mtry = mtryBIGToys, num.threads = 1), times = 25) levels(mbBIGToys$expr) <- c("RF", "ranger")
mbBIGToys save(mbBIGToys, file = "outputs/mbBIGToys.Rdata") # plotBIGToys <- mbplot(mbBIGToys) + scale_fill_manual(values = viridis::viridis(4)[2:3]) # plotBIGToys
mbBIGToys <- microbenchmark::microbenchmark( randomForest::randomForest(BIGToys, toys$y, mtry = mtryBIGToys, importance = TRUE), ranger::ranger(y ~ ., BIGToysData, mtry = mtryBIGToys, num.threads = 1, importance = "permutation"), times = 25) levels(mbBIGToys$expr) <- c("RF", "ranger")
mbBIGToysImp save(mbBIGToysImp, file = "outputs/mbBIGToysImp.Rdata") # plotBIGToysImp <- mbplot(mbBIGToysImp) + scale_fill_manual(values = viridis::viridis(4)[2:3]) # plotBIGToysImp
mbBIGToys$VI <- "no VI" mbBIGToysImp$VI <- "VI" mbBIGToysBoth <- rbind(mbBIGToys, mbBIGToysImp) BIGtoysMBplot <- mbplot(mbBIGToysBoth) BIGtoysMBplot <- BIGtoysMBplot + facet_wrap(~VI) + scale_fill_manual(values = viridis::viridis(4)[2:3]) BIGtoysMBplot
BIGToys2 <- ajoutVarCor2grp(toys$x, j = 3, k = 6, d = 300) BIGToys2 <- addVarNoise(BIGToys2, 10000 - ncol(BIGToys2)) dim(BIGToys2) BIGToys2Data <- data.frame(BIGToys2, y = toys$y) mtryBIGToys2 <- floor(ncol(BIGToys2)/10)
mbBIGToys2 <- microbenchmark::microbenchmark( randomForest::randomForest(BIGToys, toys$y, mtry = mtryBIGToys), ranger::ranger(y ~ ., BIGToysData, mtry = mtryBIGToys, num.threads = 1), times = 25) levels(mbBIGToys2$expr) <- c("RF", "ranger")
mbBIGToys2 save(mbBIGToys2, file = "outputs/mbBIGToys2.Rdata") # plotBIGToys2 <- mbplot(mbBIGToys2) + scale_fill_manual(values = viridis::viridis(4)[2:3]) # plotBIGToys2
mbBIGToys2Imp <- microbenchmark::microbenchmark( randomForest::randomForest(BIGToys, toys$y, mtry = mtryBIGToys, importance = TRUE), ranger::ranger(y ~ ., BIGToysData, mtry = mtryBIGToys, num.threads = 1, importance = "permutation"), times = 25) levels(mbBIGToys2Imp$expr) <- c("RF", "ranger")
mbBIGToys2Imp save(mbBIGToys2Imp, file = "outputs/mbBIGToys2Imp.Rdata") # plotBIGToys2Imp <- mbplot(mbBIGToys2Imp) + scale_fill_manual(values = viridis::viridis(4)[2:3]) # plotBIGToys2Imp
mbBIGToys2$VI <- "no VI" mbBIGToys2Imp$VI <- "VI" mbBIGToys2Both <- rbind(mbBIGToys2, mbBIGToys2Imp) BIGToys2MBplot <- mbplot(mbBIGToys2Both) BIGToys2MBplot <- BIGToys2MBplot + facet_wrap(~VI) + scale_fill_manual(values = viridis::viridis(4)[2:3]) BIGToys2MBplot
vac18
data $n=42$, $p=1000$data("vac18", package = "mixOmics") geneExpr <- vac18$genes dim(geneExpr) stimu <- vac18$stimulation str(stimu) VAC18 <- data.frame(geneExpr, stimu) mtryVac18 <- floor(ncol(geneExpr)/3)
mbVac18 <- microbenchmark::microbenchmark( randomForest::randomForest(stimu ~ ., VAC18, mtry = mtryVac18), randomForest::randomForest(geneExpr, stimu, mtry = mtryVac18), ranger::ranger(stimu ~ ., VAC18, mtry = mtryVac18, num.threads = 1), Rborist::Rborist(geneExpr, stimu, predFixed = mtryVac18, minInfo = 0, nThread = 1), times = 25) levels(mbVac18$expr) <- c("RF formula", "RF", "ranger", "Rborist")
mbVac18 save(mbVac18, file = "outputs/mbVac18.Rdata") # plotVac18 <- mbplot(mbVac18) + scale_fill_manual(values = viridis::viridis(4)) # plotVac18
mbVac18Imp <- microbenchmark::microbenchmark( randomForest::randomForest(stimu ~ ., VAC18, mtry = mtryVac18, importance = TRUE), randomForest::randomForest(geneExpr, stimu, mtry = mtryVac18, importance = TRUE), ranger::ranger(stimu ~ ., VAC18, mtry = mtryVac18, num.threads = 1, importance = "permutation"), times = 25) levels(mbVac18Imp$expr) <- c("RF formula", "RF", "ranger")
mbVac18Imp save(mbVac18Imp, file = "outputs/mbVac18Imp.Rdata") # plotVac18Imp <- mbplot(mbVac18Imp) + scale_fill_manual(values = viridis::viridis(4)[1:3]) # plotVac18Imp
mbVac18$VI <- "no VI" mbVac18Imp$VI <- "VI" mbVac18Both <- rbind(mbVac18, mbVac18Imp) Vac18MBplot <- mbplot(mbVac18Both) Vac18MBplot <- Vac18MBplot + facet_wrap(~VI) + scale_fill_manual(values = viridis::viridis(4)) Vac18MBplot
spam
datadata("spam", package = "kernlab") nvarSpam <- ncol(spam) - 1 Xspam <- spam[, -ncol(spam)] Yspam <- spam$type dim(Xspam) str(Yspam) mtrySpam <- floor(sqrt(nvarSpam))
mbSpam <- microbenchmark( randomForest::randomForest(type ~ ., spam, mtry = mtrySpam), ranger::ranger(type ~ ., spam, mtry = mtrySpam, num.threads = 1), Rborist::Rborist(Xspam, Yspam, predFixed = mtrySpam, minInfo = 0, nThread = 1), times = 25) levels(mbSpam$expr) <- c("RF", "ranger", "Rborist")
mbSpam save(mbSpam, file = "outputs/mbSpam.Rdata") # plotSpam <- mbplot(mbSpam) + scale_fill_manual(values = viridis::viridis(4)[2:4]) # plotSpam
mbSpamImp <- microbenchmark( randomForest::randomForest(type ~ ., spam, mtry = mtrySpam, importance = TRUE), ranger::ranger(type ~ ., spam, mtry = mtrySpam, num.threads = 1, importance = "permutation"), times = 25) levels(mbSpamImp$expr) <- c("RF", "ranger")
mbSpamImp save(mbSpamImp, file = "outputs/mbSpamImp.Rdata") # plotSpamImp <- mbplot(mbSpamImp) + scale_fill_manual(values = viridis::viridis(4)[2:3]) # plotSpamImp
mbSpam$VI <- "no VI" mbSpamImp$VI <- "VI" mbSpamBoth <- rbind(mbSpam, mbSpamImp) SpamMBplot <- mbplot(mbSpamBoth) SpamMBplot <- SpamMBplot + facet_wrap(~VI) + scale_fill_manual(values = viridis::viridis(4)[2:4]) SpamMBplot
spamSmall <- spam[, c("charExclamation", "type")] str(spamSmall) XspamSmall <- spamSmall["charExclamation"] YspamSmall <- spamSmall$type
mbSpamSmall <- microbenchmark( randomForest::randomForest(type ~ ., spamSmall, mtry = 1), ranger::ranger(type ~ ., spamSmall, mtry = 1, num.threads = 1), Rborist::Rborist(XspamSmall, YspamSmall, predFixed = 1, minInfo = 0, nThread = 1), times = 25) levels(mbSpamSmall$expr) <- c("RF", "ranger", "Rborist")
mbSpamSmall save(mbSpamSmall, file = "outputs/mbSpamSmall.Rdata") # plotSpamSmall <- mbplot(mbSpamSmall) + scale_fill_manual(values = viridis::viridis(4)[2:4]) # plotSpamSmall
mbSpamSmallImp <- microbenchmark( randomForest::randomForest(type ~ ., spamSmall, mtry = 1, importance = TRUE), ranger::ranger(type ~ ., spamSmall, mtry = 1, num.threads = 1, importance = "permutation"), times = 25) levels(mbSpamSmallImp$expr) <- c("RF", "ranger")
mbSpamSmallImp save(mbSpamSmallImp, file = "outputs/mbSpamSmallImp.Rdata") # plotSpamSmallImp <- mbplot(mbSpamSmallImp) + scale_fill_manual(values = viridis::viridis(4)[2:3]) # plotSpamSmallImp
mbSpamSmall$VI <- "no VI" mbSpamSmallImp$VI <- "VI" mbSpamSmallBoth <- rbind(mbSpamSmall, mbSpamSmallImp) SpamSmallMBplot <- mbplot(mbSpamSmallBoth) SpamSmallMBplot <- SpamSmallMBplot + facet_wrap(~VI) + scale_fill_manual(values = viridis::viridis(4)[2:4]) SpamSmallMBplot
toys
data: big data casesimu_toys <- function(n, p, q = 0.7) { y <- 2 * rbinom(n, 1, 0.5) - 1 x <- matrix(NA, n, p) r <- floor(q*n) z <- 1:r for (i in 1:3) { x[z, i] <- y[z] * rnorm(r, i, 1) x[-z, i] <- y[-z] * rnorm(n - r, 0, 1) } for (i in 4:6) { x[z, i] <- y[z] * rnorm(r, 0, 1) x[-z, i] <- y[-z] * rnorm(n - r, i - 3, 1) } x[, 7:p] <- rnorm(n*(p - 6), 0, 20) x <- scale(x) y <- as.factor(y) output <- data.frame('x' = x, 'y' = y) }
ncores <- 4 set.seed(768895) allseeds <- round(runif(ncores, 1e5, 1e6)) alldata <- parallel::mclapply(1:ncores, function(ind) { set.seed(allseeds[ind]) simu_toys(n = 1000 / ncores, p = 10) }, mc.cores=ncores) TallToys <- do.call("rbind", alldata) dim(TallToys) save(TallToys, file = "data/TallToys.Rdata") mtryTallToys <- floor(sqrt(ncol(TallToys) - 1))
mbTallToys <- microbenchmark( randomForest::randomForest(y ~ ., TallToys, mtry = mtryTallToys), ranger::ranger(y ~ ., TallToys, mtry = mtryTallToys, num.threads = 1), Rborist::Rborist(subset(TallToys, select = -y), TallToys$y, predFixed = mtryTallToys, minInfo = 0, nThread = 1), times = 25) levels(mbTallToys$expr) <- c("RF", "ranger", "Rborist")
mbTallToys save(mbTallToys, file = "outputs/mbTallToys.Rdata") # plotTallToys <- mbplot(mbTallToys) + scale_fill_manual(values = viridis::viridis(4)[2:4]) # plotTallToys
mbTallToysImp <- microbenchmark( randomForest::randomForest(y ~ ., TallToys, mtry = mtryTallToys, importance = TRUE), ranger::ranger(y ~ ., TallToys, mtry = mtryTallToys, num.threads = 1, importance = "permutation"), times = 25) levels(mbTallToysImp$expr) <- c("RF", "ranger")
mbTallToysImp save(mbTallToysImp, file = "outputs/mbTallToysImp.Rdata") # plotTallToysImp <- mbplot(mbTallToysImp) + scale_fill_manual(values = viridis::viridis(4)[2:3]) # plotTallToysImp
mbTallToys$VI <- "no VI" mbTallToysImp$VI <- "VI" mbTallToysBoth <- rbind(mbTallToys, mbTallToysImp) TallToysMBplot <- mbplot(mbTallToysBoth) TallToysMBplot <- TallToysMBplot + facet_wrap(~VI) + scale_fill_manual(values = viridis::viridis(4)[2:4]) TallToysMBplot
set.seed(768895) allseeds <- round(runif(ncores, 1e5, 1e6)) alldata <- parallel::mclapply(1:ncores, function(ind) { set.seed(allseeds[ind]) simu_toys(n = 10000 / ncores, p = 10) }, mc.cores=ncores) TALLToys <- do.call("rbind", alldata) dim(TALLToys) # save(TallToys, file = "data/TallToys.Rdata") mtryTALLToys <- floor(sqrt(ncol(TALLToys) - 1))
mbTALLToys <- microbenchmark( randomForest::randomForest(y ~ ., TALLToys, mtry = mtryTALLToys), ranger::ranger(y ~ ., TALLToys, mtry = mtryTALLToys, num.threads = 1), Rborist::Rborist(subset(TALLToys, select = -y), TALLToys$y, predFixed = mtryTALLToys, minInfo = 0, nThread = 1), times = 25) levels(mbTALLToys$expr) <- c("RF", "ranger", "Rborist")
mbTALLToys save(mbTALLToys, file = "outputs/mbTALLToys.Rdata") # plotTALLToys <- mbplot(mbTALLToys) + scale_fill_manual(values = viridis::viridis(4)[2:4]) # plotTALLToys
mbTALLToysImp <- microbenchmark( randomForest::randomForest(y ~ ., TALLToys, mtry = mtryTALLToys, importance = TRUE), ranger::ranger(y ~ ., TALLToys, mtry = mtryTALLToys, num.threads = 1, importance = "permutation"), times = 25) levels(mbTALLToysImp$expr) <- c("RF", "ranger")
mbTALLToysImp save(mbTALLToysImp, file = "outputs/mbTALLToysImp.Rdata") # plotTALLToysImp <- mbplot(mbTALLToysImp) + scale_fill_manual(values = viridis::viridis(4)[2:3]) # plotTALLToysImp
mbTALLToys$VI <- "no VI" mbTALLToysImp$VI <- "VI" mbTALLToysBoth <- rbind(mbTALLToys, mbTALLToysImp) TALLToysMBplot <- mbplot(mbTALLToysBoth) TALLToysMBplot <- TALLToysMBplot + facet_wrap(~VI) + scale_fill_manual(values = viridis::viridis(4)[2:4]) TALLToysMBplot
TALLthinToys <- TALLToys TALLthinToys$x <- TALLToys$x[, 1]
mbTALLthinToys <- microbenchmark( randomForest::randomForest(y ~ ., TALLthinToys, mtry = 1), ranger::ranger(y ~ ., TALLthinToys, mtry = 1, num.threads = 1), # Rborist::Rborist(subset(TALLthinToys, select = -y), TALLthinToys$y, predFixed = 1, minInfo = 0, nThread = 1), times = 25) levels(mbTALLthinToys$expr) <- c("RF", "ranger", "Rborist")
mbTALLthinToys save(mbTALLthinToys, file = "outputs/mbTALLthinToys.Rdata") # plotTALLthinToys <- mbplot(mbTALLthinToys) + scale_fill_manual(values = viridis::viridis(4)[2:3]) # plotTALLthinToys
mbTALLthinToysImp <- microbenchmark( randomForest::randomForest(y ~ ., TALLthinToys, mtry = 1, importance = TRUE), ranger::ranger(y ~ ., TALLthinToys, mtry = 1, num.threads = 1, importance = "permutation"), times = 25) levels(mbTALLthinToysImp$expr) <- c("RF", "ranger")
mbTALLthinToysImp save(mbTALLthinToysImp, file = "outputs/mbTALLthinToysImp.Rdata") # plotTALLthinToysImp <- mbplot(mbTALLthinToysImp) + scale_fill_manual(values = viridis::viridis(4)[2:3]) # plotTALLthinToysImp
mbTALLthinToys$VI <- "no VI" mbTALLthinToysImp$VI <- "VI" mbTALLthinToysBoth <- rbind(mbTALLthinToys, mbTALLthinToysImp) TALLthinToysMBplot <- mbplot(mbTALLthinToysBoth) TALLthinToysMBplot <- TALLthinToysMBplot + facet_wrap(~VI) + scale_fill_manual(values = viridis::viridis(4)[2:4]) TALLthinToysMBplot
ncores <- 20 set.seed(768895) allseeds <- round(runif(ncores, 1e5, 1e6)) alldata <- parallel::mclapply(1:ncores, function(ind) { set.seed(allseeds[ind]) simu_toys(n = 100000 / ncores, p = 10) }, mc.cores=ncores) TALL5Toys <- do.call("rbind", alldata) dim(TALL5Toys) # save(TallToys, file = "data/TallToys.Rdata") XTALL5Toys <- subset(TALL5Toys, select = -y) mtryTALL5Toys <- floor(sqrt(ncol(XTALL5Toys)))
mbTALL5Toys <- microbenchmark( randomForest::randomForest(y ~ ., TALL5Toys, mtry = mtryTALL5Toys), # ranger::ranger(y ~ ., TALL5Toys, mtry = mtryTALL5Toys, num.threads = 1), Rborist::Rborist(XTALL5Toys, TALL5Toys$y, predFixed = mtryTALLToys, minInfo = 0, nThread = 1), times = 10) levels(mbTALL5Toys$expr) <- c("RF", "Rborist")
mbTALL5Toys save(mbTALL5Toys, file = "outputs/mbTALL5Toys.Rdata") plotTALL5Toys <- mbplot(mbTALL5Toys) + scale_fill_manual(values = viridis::viridis(4)[c(2, 4)]) plotTALL5Toys
ranger
ran on 10 cores in this example (too long otherwise).
mbTALL5ToysImp <- microbenchmark( randomForest::randomForest(y ~ ., TALL5Toys, mtry = mtryTALL5Toys, importance = TRUE), ranger::ranger(y ~ ., TALL5Toys, mtry = mtryTALL5Toys, num.threads = 10, importance = "permutation", verbose = FALSE), times = 10) levels(mbTALL5ToysImp$expr) <- c("RF", "ranger")
mbTALL5ToysImp save(mbTALL5ToysImp, file = "outputs/mbTALL5ToysImp.Rdata") plotTALL5ToysImp <- mbplot(mbTALL5ToysImp) + scale_fill_manual(values = viridis::viridis(4)[2:3]) plotTALL5ToysImp
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.