Nothing
##' Parses a collection of elements into (almost) equal-sized groups
##'
##' Parses a collection of elements into (almost) equal-sized groups.
##' Useful for splitting up an R job that operates over a large dataframe or list
##' into multiple jobs.
##'
##' @export
##' @param n The number elements to be parsed
##'
##' @param njobs The number of groups
##'
##' @param collate \code{= TRUE} alternative ordering of the grouping. See example below.
##'
##' @param random.seed An integer setting the random seed, which will result in randomizing the
##' elements among the jobs. If \code{NULL}, no randomization is performed. Randomization
##' cannot be performed if \code{collate = TRUE} or if \code{text.to.eval = TRUE}.
##' Randomization is useful when the computing time for each element varies significantly
##' because it helps to even out the run times of parallel jobs.
##'
##' @param text.to.eval If \code{= TRUE}, a text expression is returned, that when
##' evaluated, will produce the sequence of elements for that group.
##' This is especially useful when \code{n} is very large. (See \code{Value} section below).
##'
##' @return When \code{text.to.eval = FALSE},
##' a list with \code{njobs} elements is returned, each element containing a numeric
##' vector of the element numbers which correspond to that group.
##' When \code{text.to.eval = TRUE}, a list with \code{njobs} elements is returned, each
##' element containing text (let's call it \code{val}), that when evaluated
##' using \code{eval(parse(text = val))}, will produce the sequence of
##' numbers corresponding to the group.
##'
##' @author Landon Sego
##'
##' @examples
##' x <- parseJob(29, 6)
##' print(x)
##'
##' # To see the range of each group
##' lapply(x, range)
##'
##' # To see the length of each group
##' lengths(x)
##'
##' # Randomize the outcome
##' parseJob(32, 5, random.seed = 231)
##'
##' # Example of 'text.to.eval = TRUE'
##' out <- parseJob(11, 3, text.to.eval = TRUE)
##' out
##' lapply(out, function(x) eval(parse(text = x)))
##'
##' # Example of 'collate = TRUE' and 'text.to.eval = TRUE'
##' parseJob(11, 3, collate = TRUE)
##' parseJob(11, 3, collate = TRUE, text.to.eval = TRUE)
parseJob <- function(n, njobs, collate = FALSE, random.seed = NULL, text.to.eval = FALSE) {
stopifnotMsg(# n
if (is.numeric(n)) (n > 0) & (n %% 1 == 0) else FALSE,
"'n' must be a positive, whole number",
# njobs
if (is.numeric(njobs)) (njobs > 0) & (njobs %% 1 == 0) else FALSE,
"'njobs' must be a positive, whole number",
# collate
is.logical(collate) & (length(collate) == 1),
"'collate' must be TRUE or FALSE",
# random.seed
if (!is.null(random.seed)) is.numeric(random.seed) & (length(random.seed) == 1) else TRUE,
"'random.seed' must be numeric or NULL",
# text.to.eval
is.logical(text.to.eval) & (length(text.to.eval) == 1),
"'text.to.eval' must be TRUE or FALSE")
# If njobs is too large
if (n < njobs) {
warning("'n' must be greater than or equal to 'njobs'\n",
"Setting 'njobs' to ", n)
njobs <- n
}
if (text.to.eval & !is.null(random.seed)) {
warning("No randomization is performed when 'text.to.eval = TRUE'")
random.seed <- NULL
}
# If we want to process the elements in sequence
if (collate) {
if (!is.null(random.seed)) {
warning("No randomization is performed when 'collate = TRUE'")
random.seed <- NULL
}
out <- vector(mode = "list", length = njobs)
for (i in 1:njobs)
out[[i]] <- paste("seq(", i, ", ", n, ", by = ", njobs, ")", sep = "")
} # if collating
else {
# Use integer division and the mod to count how many need to be in each job
n.per.job <- rep(n %/% njobs, njobs)
n.left.over <- n %% njobs
# Check 1
if (n != (n.per.job[1] * njobs + n.left.over)) {
stop("Algorithm failure 1")
}
# Add 1 to the first elements of n.per.job
if (n.left.over) {
n.per.job[1:n.left.over] <- n.per.job[1:n.left.over] + 1
}
# Check 2
if (sum(n.per.job) != n) {
stop("Algorithm failure 2")
}
if (is.null(random.seed)) {
hi <- cumsum(n.per.job)
lo <- hi - n.per.job + 1
out <- as.list(paste(lo, hi, sep = ":"))
}
else {
# Define the cases
cases <- 1:n
# Set the seed and randomize the cases
set.seed(random.seed)
cases <- sample(cases)
# The list building function
subEnv <- new.env()
assign("counter", 0, envir = subEnv)
# Function that will build the vectors that show the parsing
lb <- function(x) {
counter.local <- get("counter", envir = subEnv)
assign("counter", max(x) + counter.local, envir = subEnv)
return(cases[counter.local + 1:x])
}
# Build the output list
out <- lapply(as.list(n.per.job), lb)
# Check 3
check.out <- unlist(out)
if (!is.null(random.seed)) {
check.out <- sort(check.out)
}
if (!all(check.out == 1:n)) {
stop("Algorithm failure 3")
}
} # else randomization
} # else no collating
# Convert from text if text.to.eval == FALSE
if (!text.to.eval & is.null(random.seed)) {
out <- lapply(out, function(x) eval(parse(text = x)))
}
# Verify that the length of the list is equal to njobs
if (length(out) != njobs) {
stop("Algorithm failure 4")
}
return(out)
} # parseJob
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.