R/seedCreator.R

Defines functions update.portableSeeds seedCreator

Documented in seedCreator update.portableSeeds

## Paul E. Johnson <pauljohn@ku.edu>
## Portable Parallel Streams Project.


##' Creates a portable sets of seeds to be used for replication of
##' simulations. The seeds are suitable for replicatable parallel
##' computations. This creates the seed object and, if the file
##' argument is specified, it writes the object on disk.
##'
##'  Using the L'Ecuyer-CMRG random generator provided by the R
##' package parallel, this follows the strategy outlined in L'Ecuyer,
##' et. al (2002). The very very long random stream is subdivided into
##' smaller streams, thus allowing the creation of several separate
##' streams of random numbers for each of the runs of a simulation
##' exercise.
##'
##' This makes it possible to execute a batch of simulations, and then
##' later re-start any particular run of interest. This works whether
##' the calculations are done in parallel on a cluster or serial on a
##' single computer. This function takes into account the possibility
##' that, within each run of the model, it may be necessary to draw
##' values from several separate streams of random numbers (for
##' example, to keep separate streams for the generation of data about
##' teachers, students, school buildings, and so forth). This function
##' creates the seeds, allowing for accurate replication on a diverse
##' network of computing devices. It generates the seeds of many
##' separate streams and returns them in an object (and also writes
##' them in a file).  The seeds to re-start a simulation are kept in a
##' list object.
##'
##' The user specifies the number of simulation runs, "nReps," so the
##' output list will have "nReps" elements. In each of those objects
##' inside the list, there will be seeds to initialize "streamsPerRep"
##' streams.
##'
##' @export seedCreator
##' @param nReps Number of replications for which starting seed values
##' are to be created.
##' @param streamsPerRep Number of streams to be created for each separate simulation.
##' @param seed An integer seed value that can be used to initialize the creation of the many-separate-substreams.
##' @param file The file name in which the list of stream seeds is to be collected, by default, that file is called "projSeeds.rds".
##' @return A list that includes "nReps" elements. Each element is a vector of "streamsPerRep" stream starting values.
##' @author Paul E. Johnson <pauljohn@@ku.edu>
##' @import parallel
##' @importFrom stats runif
##' @seealso snowft, streams, parallel
##' @references L'Ecuyer, P. (1999). Good Parameters and
##' Implementations for Combined Multiple Recursive Random Number
##' Generators. Operations Research, 47(1), 159-164.
##' L'Ecuyer, P., Simard, R., Chen, E. J., & Kelton, W. D. (2002). An
##' Object-Oriented Random-Number Package with Many Long Streams and
##' Substreams. Operations Research, 50(6), 1073-1075.
##' @examples
##' projSeeds <- seedCreator(2000, 3, seed = 123456, file = "fruits.rds")
##' A1 <- projSeeds[[787]]
##'
##' rm(projSeeds)
##' # read from file, take run 787's seed
##' myFruitySeeds <- readRDS("fruits.rds")
##' B1 <- myFruitySeeds[[787]]
##'
##' identical(A1, B1) # check
##' unlink("fruits.rds") #delete file
seedCreator <- function(nReps = 2000, streamsPerRep = 3, seed, file = NULL){
    RNGkind("L'Ecuyer-CMRG")
    
    ## projSeeds=list of lists of stream seeds
    projSeeds <- vector(mode="list", length = nReps)
    for (i in 1:nReps) projSeeds[[i]] <- vector(mode="list", streamsPerRep)
    
    if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
        runif(1) ##establishes .Random.seed
    if (!missing(seed)) set.seed(seed)
    ##Grab first seed
    s <- .Random.seed
    origSeed <- s
    
    for (i in 1:nReps) {
        for (j in 1:streamsPerRep){
            projSeeds[[i]][[j]] <- s
            s <- parallel::nextRNGStream(s)
        }
    }
    class(projSeeds) <- "portableSeeds"
    if (!is.null(file)) saveRDS(projSeeds, file)
    invisible(projSeeds)
}



##' Add initializing streams for more runs to a portableSeeds object
##'
##' After seedCreator has created a collection of seeds, sometimes it
##' is necessary to conduct additional runs that require more
##' initializing values. This function fulfills that need.
##'
##' @param object A portableSeeds object created by seedCreator
##' @param more An integer indicating the number of additional runs
##' for which seeds are to be created.
##' @param file A quoted character string representing file name into
##' which the portableSeeds object is to be placed.
##' @param ... Additional arguments, none of which are put to any use
##' @return A \code{portableSeeds} object that includes the original set, with
##' \code{more} seeds added to the end. The entirety of this returned set
##' should equal the set that would have been generated by seedCreator if
##' the same number of seeds had been requested.
##' @author Paul E. Johnson <pauljohn@@ku.edu>
##' @export
##' @rdname update.portableSeeds
##' @method update portableSeeds
##' @examples
##' require(parallel)
##' projSeeds <- seedCreator(2000, 3, seed = 123456, file = "someSeeds.rds")
##' projSeeds2 <- update(projSeeds, more = 19, file = "somePlusSeeds.rds")
##' identical(projSeeds[[1]], projSeeds2[[1]])
##' identical(projSeeds[[2000]], projSeeds2[[2000]])
##' projSeeds2[[2001]]
##' projSeedsLast <- projSeeds[[2000]][[3]]
##' projSeedsNext <- parallel::nextRNGStream(projSeedsLast)
##' identical(projSeedsNext, projSeeds2[[2001]][[1]])
update.portableSeeds <- function(object, more, file = NULL, ...){
    RNGkind("L'Ecuyer-CMRG")
    
    if ( (missing(object)) | (!class(object)== "portableSeeds") ) {
        stop("update requires an input object of type projSeeds")
    }
    
    if (missing(more)){
        msg <- paste("update needs to know how many more",
                     "runs-worth of seeds you want to add to the collection")
        stop(msg)
    }
    nReps <- length(object)
    streamsPerRep <- length(object[[1]])
    
    s <- object[[nReps]][[streamsPerRep]]
    s <- parallel::nextRNGStream(s)
    
    newSeeds <-  vector(mode="list", length = more)
    for (i in 1:more) newSeeds[[i]] <- vector(mode="list", streamsPerRep)
    object <- c(object, newSeeds)
    
    for (i in (1 + nReps):(nReps + more)) {
        for (j in 1:streamsPerRep){
            object[[i]][[j]] <- s
            s <- parallel::nextRNGStream(s)
        }
    }
    class(object) <- "portableSeeds"
    if (!is.null(file)) saveRDS(object, file)
    invisible(object)
}
webb767/ppsmac documentation built on July 28, 2020, 12:59 a.m.