knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, cache=TRUE)
There is a parallel processing code using the foreach
package:
library(doParallel) cl <- makeCluster(detectCores()) registerDoParallel(cl) foreach(i = 1:3, .combine=c) %dopar% { i**2 } stopCluster(cl)
library(doParallel) cl <- makeCluster(detectCores()) registerDoParallel(cl) foreach(i = 1:3, .combine=c) %dopar% { i**2 } stopCluster(cl)
It can be simplified using pforeach()
instead of foreach()
:
library(pforeach) pforeach(i = 1:3)({ i**2 })
install.packages("devtools") # if you have not installed "devtools" package devtools::install_github("hoxo-m/pforeach")
Using foreach()
:
library(doParallel) library(dplyr) # With other package cl <- makeCluster(detectCores()) registerDoParallel(cl) # You must indicate .packages parameter. foreach(i = 1:3, .combine=c, .packages="dplyr") %dopar% { iris[i, ] %>% select(-Species) %>% sum } stopCluster(cl)
library(doParallel) library(dplyr) # With other package cl <- makeCluster(detectCores()) registerDoParallel(cl) # You must indicate .packages parameter. foreach(i = 1:3, .combine=c, .packages="dplyr") %dopar% { iris[i, ] %>% select(-Species) %>% sum } stopCluster(cl)
Using pforeach()
:
library(pforeach) library(dplyr) # With other package # You need not to mind that. pforeach(i = 1:3)({ iris[i, ] %>% select(-Species) %>% sum })
Using foreach()
:
# You must indicate .export parameter. library(doParallel) square <- function(x) x**2 execute <- function() { cl <- makeCluster(detectCores()) registerDoParallel(cl) result <- foreach(i = 1:3, .combine=c, .export="square") %dopar% { square(i) } stopCluster(cl) result } execute()
Using pforeach()
:
# Need not to mind! library(pforeach) square <- function(x) x**2 execute <- function() { pforeach(i = 1:3)({ square(i) }) } execute()
Iterations for data frame can simplify using rows()
and cols()
instead of iterators::iter()
.
Using iter()
:
library(doParallel) cl <- makeCluster(detectCores()) registerDoParallel(cl) data <- iris[1:5, ] foreach(row = iter(data, by="row"), .combine=c) %dopar% { sum(row[-5]) } stopCluster(cl)
library(doParallel) cl <- makeCluster(detectCores()) registerDoParallel(cl) data <- iris[1:5, ] foreach(row = iter(data, by="row"), .combine=c) %dopar% { sum(row[-5]) } stopCluster(cl)
Using rows()
:
library(pforeach) data <- iris[1:5, ] pforeach(row = rows(data))({ sum(row[-5]) })
Using cols()
:
library(pforeach) data <- iris[, 1:4] pforeach(col = cols(data))({ mean(col) })
You can indicate number of cores for parallel processing with .cores
parameter.
pforeach(i = 1:3, .cores = 2)({ i**2 })
If you set minus value to .cores
for example .cores = -1
, it means .cores = detectCores() - 1
.
pforeach(i = 1:3, .cores = -1)({ i**2 })
If you want to fix random seed, set .seed
parameter:
library(pforeach) pforeach(i = 1:3, .seed = 12345)({ rnorm(1) })
library(pforeach) pforeach(i = 1:3, .seed = 12345)({ rnorm(1) }) %>% as.vector
If you want to change a parallel code to the non-parallel code with pforeach
, you just only add one character "n".
library(pforeach) npforeach(i = 1:3)({ i**2 })
Parallelized random forest code with foreach
is below:
library(doParallel) library(randomForest) library(kernlab) data(spam) cores <- detectCores() cl <- makePSOCKcluster(cores) registerDoParallel(cl) fit.rf <- foreach(ntree=rep(250, cores), .combine=combine, .export="spam", .packages="randomForest") %dopar% { randomForest(type ~ ., data = spam, ntree = ntree) } stopCluster(cl)
Using pforeach
:
library(pforeach) library(randomForest) library(kernlab) data(spam) fit.rf <- pforeach(ntree=rep(250, .cores), .c=combine)({ randomForest(type ~ ., data = spam, ntree = ntree) })
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.