R/foreach.R

Defines functions when foreach

Documented in foreach when

#
# Copyright (c) Microsoft. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#' @title foreach
#' @description
#' `%do%` and `%dopar%` are binary operators that operate
#' on a `foreach` object and an `R` expression.
#' The expression, `ex`, is evaluated multiple times in an environment
#' that is created by the `foreach` object, and that environment is
#' modified for each evaluation as specified by the `foreach` object.
#' `%do%` evaluates the expression sequentially, while `%dopar%`
#' evaluates it in parallel.
#' The results of evaluating `ex` are returned as a list by default,
#' but this can be modified by means of the `.combine` argument.
#'
#' @param ... one or more arguments that control how `ex` is
#'    evaluated.  Named arguments specify the name and values of variables
#'    to be defined in the evaluation environment.
#'    An unnamed argument can be used to specify the number of times that
#'    `ex` should be evaluated.
#'    At least one argument must be specified in order to define the
#'    number of times `ex` should be executed.
#'
#'	If multiple arguments are supplied, the number of times `ex` is
#'	evaluated is equal to the smallest number of iterations among the supplied
#'	arguments. See the examples.
#' @param .combine function that is used to process the tasks results as
#'    they generated.  This can be specified as either a function or
#'    a non-empty character string naming the function.
#'    Specifying 'c' is useful for concatenating the results into
#'    a vector, for example.  The values 'cbind' and 'rbind' can combine
#'    vectors into a matrix.  The values '+' and '*' can be used to
#'    process numeric data.
#'    By default, the results are returned in a list.
#' @param .init initial value to pass as the first argument of the
#'    `.combine` function.
#'    This should not be specified unless `.combine` is also specified.
#' @param .final function of one argument that is called to return final result.
#' @param .inorder logical flag indicating whether the `.combine`
#'    function requires the task results to be combined in the same order
#'    that they were submitted.  If the order is not important, then it
#'    setting `.inorder` to `FALSE` can give improved performance.
#'    The default value is `TRUE.
#' @param .multicombine logical flag indicating whether the `.combine`
#'    function can accept more than two arguments.
#'    If an arbitrary `.combine` function is specified, by default,
#'    that function will always be called with two arguments.
#'    If it can take more than two arguments, then setting `.multicombine`
#'    to `TRUE` could improve the performance.
#'    The default value is `FALSE` unless the `.combine`
#'    function is `cbind`, `rbind`, or `c`, which are known
#'    to take more than two arguments.
#' @param .maxcombine maximum number of arguments to pass to the combine function.
#'    This is only relevant if `.multicombine` is `TRUE`.
#' @param .errorhandling specifies how a task evaluation error should be handled.
#'    If the value is "stop", then execution will be stopped via
#'    the `stop` function if an error occurs.
#'    If the value is "remove", the result for that task will not be
#'    returned, or passed to the `.combine` function.
#'    If it is "pass", then the error object generated by task evaluation
#'    will be included with the rest of the results.  It is assumed that
#'    the combine function (if specified) will be able to deal with the
#'    error object.
#'    The default value is "stop".
#' @param .packages character vector of packages that the tasks depend on.
#'    If `ex` requires a `R` package to be loaded, this option
#'    can be used to load that package on each of the workers.
#'    Ignored when used with `%do%`.
#' @param .export character vector of variables to export.
#'    This can be useful when accessing a variable that isn't defined in the
#'    current environment.
#'    The default value in `NULL`.
#' @param .noexport character vector of variables to exclude from exporting.
#'    This can be useful to prevent variables from being exported that aren't
#'    actually needed, perhaps because the symbol is used in a model formula.
#'    The default value in `NULL`.
#' @param .verbose logical flag enabling verbose messages.  This can be
#'    very useful for trouble shooting.
#' @param obj `foreach` object used to control the evaluation
#'    of `ex`.
#' @param e1 `foreach` object to merge.
#' @param e2 `foreach` object to merge.
#' @param ex the `R` expression to evaluate.
#' @param cond condition to evaluate.
#' @param n number of times to evaluate the `R` expression.
#'
#' @details
#' The `foreach` and `%do%`/`%dopar%` operators provide
#' a looping construct that can be viewed as a hybrid of the standard
#' `for` loop and `lapply` function.
#' It looks similar to the `for` loop, and it evaluates an expression,
#' rather than a function (as in `lapply`), but its purpose is to
#' return a value (a list, by default), rather than to cause side-effects.
#' This facilitates parallelization, but looks more natural to people that
#' prefer `for` loops to `lapply`.
#'
#' The `%:%` operator is the _nesting_ operator, used for creating
#' nested foreach loops. Type `vignette("nested")` at the R prompt for
#' more details.
#'
#' Parallel computation depends upon a _parallel backend_ that must be
#' registered before performing the computation. The parallel backends available
#' will be system-specific, but include `doParallel`, which uses R's built-in
#' \pkg{parallel} package. Each parallel backend has a specific registration function,
#' such as `registerDoParallel`.
#'
#' The `times` function is a simple convenience function that calls
#' `foreach`.  It is useful for evaluating an `R` expression multiple
#' times when there are no varying arguments.  This can be convenient for
#' resampling, for example.
#'
#' @seealso
#' [`iterators::iter`]
#' @examples
#' # equivalent to rnorm(3)
#' times(3) %do% rnorm(1)
#'
#' # equivalent to lapply(1:3, sqrt)
#' foreach(i=1:3) %do%
#'   sqrt(i)
#'
#' # multiple ... arguments
#' foreach(i=1:4, j=1:10) %do%
#' 	sqrt(i+j)
#'
#' # equivalent to colMeans(m)
#' m <- matrix(rnorm(9), 3, 3)
#' foreach(i=1:ncol(m), .combine=c) %do%
#'   mean(m[,i])
#'
#' # normalize the rows of a matrix in parallel, with parenthesis used to
#' # force proper operator precedence
#' # Need to register a parallel backend before this example will run
#' # in parallel
#' foreach(i=1:nrow(m), .combine=rbind) %dopar%
#'   (m[i,] / mean(m[i,]))
#'
#' # simple (and inefficient) parallel matrix multiply
#' library(iterators)
#' a <- matrix(1:16, 4, 4)
#' b <- t(a)
#' foreach(b=iter(b, by='col'), .combine=cbind) %dopar%
#'   (a %*% b)
#'
#' # split a data frame by row, and put them back together again without
#' # changing anything
#' d <- data.frame(x=1:10, y=rnorm(10))
#' s <- foreach(d=iter(d, by='row'), .combine=rbind) %dopar% d
#' identical(s, d)
#'
#' # a quick sort function
#' qsort <- function(x) {
#'   n <- length(x)
#'   if (n == 0) {
#'     x
#'   } else {
#'     p <- sample(n, 1)
#'     smaller <- foreach(y=x[-p], .combine=c) %:% when(y <= x[p]) %do% y
#'     larger  <- foreach(y=x[-p], .combine=c) %:% when(y >  x[p]) %do% y
#'     c(qsort(smaller), x[p], qsort(larger))
#'   }
#' }
#' qsort(runif(12))
#'
#' @keywords utilities
#' @export
#' @rdname foreach
foreach <- function(..., .combine, .init, .final=NULL, .inorder=TRUE,
                    .multicombine=FALSE,
                    .maxcombine=if (.multicombine) 100 else 2,
                    .errorhandling=c('stop', 'remove', 'pass'),
                    .packages=NULL, .export=NULL, .noexport=NULL,
                    .verbose=FALSE) {
  if (missing(.combine)) {
    if (!missing(.init))
      stop('if .init is specified, then .combine must also be specified')
    .combine <- defcombine
    hasInit <- TRUE
    init <- quote(list())
  } else {
    .combine <- match.fun(.combine)
    if (missing(.init)) {
      hasInit <- FALSE
      init <- NULL
    } else {
      hasInit <- TRUE
      init <- substitute(.init)
    }
  }

  # .multicombine defaults to TRUE if the .combine function is known to
  # take multiple arguments
  if (missing(.multicombine) &&
      (identical(.combine, cbind) ||
       identical(.combine, rbind) ||
       identical(.combine, c) ||
       identical(.combine, defcombine)))
    .multicombine <- TRUE

  # sanity check the arguments
  if (!is.null(.final) && !is.function(.final))
    stop('.final must be a function')
  if (!is.logical(.inorder) || length(.inorder) > 1)
    stop('.inorder must be a logical value')
  if (!is.logical(.multicombine) || length(.multicombine) > 1)
    stop('.multicombine must be a logical value')
  if (!is.numeric(.maxcombine) || length(.maxcombine) > 1 || .maxcombine < 2)
    stop('.maxcombine must be a numeric value >= 2')
  if (!is.character(.errorhandling))
    stop('.errorhandling must be a character string')
  if (!is.null(.packages) && !is.character(.packages))
    stop('.packages must be a character vector')
  if (!is.null(.export) && !is.character(.export))
    stop('.export must be a character vector')
  if (!is.null(.noexport) && !is.character(.noexport))
    stop('.noexport must be a character vector')
  if (!is.logical(.verbose) || length(.verbose) > 1)
    stop('.verbose must be a logical value')

  specified <- c('errorHandling', 'verbose')
  specified <- specified[c(!missing(.errorhandling), !missing(.verbose))]

  args <- substitute(list(...))[-1]

  if (length(args) == 0)
    stop('no iteration arguments specified')
  argnames <- names(args)
  if (is.null(argnames))
    argnames <- rep('', length(args))

  # check for backend-specific options
  options <- list()
  opts <- grep('^\\.options\\.[A-Za-z][A-Za-z]*$', argnames)
  if (length(opts) > 0) {
    # put the specified options objects into the options list
    for (i in opts) {
      bname <- substr(argnames[i], 10, 100)
      options[[bname]] <- list(...)[[i]]
    }

    # remove the specified options objects from args and argnames
    args <- args[-opts]
    argnames <- argnames[-opts]
  }

  # check for arguments that start with a '.', and issue an error,
  # assuming that these are misspelled options
  unrecog <- grep('^\\.', argnames)
  if (length(unrecog) > 0)
    stop(sprintf('unrecognized argument(s): %s',
                 paste(argnames[unrecog], collapse=', ')))

  # check for use of old-style arguments, and issue an error
  oldargs <- c('COMBINE', 'INIT', 'INORDER', 'MULTICOMBINE', 'MAXCOMBINE',
               'ERRORHANDLING', 'PACKAGES', 'VERBOSE', 'EXPORT', 'NOEXPORT',
               'LOADFACTOR', 'CHUNKSIZE')
  oldused <- argnames %in% oldargs
  if (any(oldused))
    stop(sprintf('old style argument(s) specified: %s',
                 paste(argnames[oldused], collapse=', ')))

  .errorhandling <- match.arg(.errorhandling)

  combineInfo <- list(fun=.combine, in.order=.inorder, has.init=hasInit,
                      init=init, final=.final, multi.combine=.multicombine,
                      max.combine=.maxcombine)
  iterable <- list(args=args, argnames=argnames, evalenv=parent.frame(),
                   specified=specified, combineInfo=combineInfo,
                   errorHandling=.errorhandling, packages=.packages,
                   export=.export, noexport=.noexport, options=options,
                   verbose=.verbose)
  class(iterable) <- 'foreach'
  iterable
}

#' @export
#' @rdname foreach
'%:%' <- function(e1, e2) {
  if (!inherits(e1, 'foreach'))
    stop('"%:%" was passed an illegal right operand')

  if (inherits(e2, 'foreach'))
    makeMerged(e1, e2)
  else if (inherits(e2, 'foreachCondition'))
    makeFiltered(e1, e2)
  else
    stop('"%:%" was passed an illegal right operand')
}

#' @export
#' @rdname foreach
when <- function(cond) {
  obj <- list(qcond=substitute(cond), evalenv=parent.frame())
  class(obj) <- 'foreachCondition'
  obj
}

Try the foreach package in your browser

Any scripts or data that you put into this service are public.

foreach documentation built on Feb. 2, 2022, 5:09 p.m.