## vectorize.R
## Move looped, but unvectorized code outside of loops.
## This should *definitely* prompt a compiler warning to let the
## developer know their code is inefficient.
##
## This is like loop-invariant code motion, except the arguments are
## changed to make a vector of the _largest possible size_. Future
## versions should look for loop breaks, and if they are present, make
## a good first guess and allocate in chunks.
ex.wrong =
function(l, mu = 2, sd = 3) {
x <- numeric(l)
x[1] <- rnorm(1, mu, sd)
for (i in 2:l) {
x[i] <- x[i-1] + rnorm(1, mu, sd)
}
return(x)
}
ex.right =
function(l, mu = 2, sd = 3) {
x <- numeric(l)
r <- rnorm(l, mu, sd)
x[1] <- r[1]
for (i in 2:l) {
x[i] <- x[i-1] + r[i]
}
return(x)
}
set.seed(0)
system.time(x.wrong <- ex.wrong(10000))
set.seed(0)
system.time(x.right <- ex.right(10000))
stopifnot(x.right == x.wrong)
# Vectorizable functions, with their value being the position of the
# length argument
vectorFunctions <- c(rnorm=1, rbinom=1, sample=2)
findCalls =
# Traverse a block of code, looking for call. Each time it is found,
# the name of the call will be placed in an environment, and its
# arguments in a list.
function(code, call, results=new.env(parent=emptyenv())) {
for (i in seq_along(code)) {
if (is.call(code[[i]])) {
cat("checking:", as.character(code[[i]]), "\n")
if (as.character(code[[i]][[1]]) %in% call) {
# put entire call in results
cat("found:", as.character(code[[i]]), "\n")
call.name <- as.character(code[[i]][[1]])
if (exists(call.name, envir=results)) {
# The call exists; we now want to save all arguments.
tmp <- get(call.name, envir=results)
tmp[[length(tmp) + 1]] <- code[[i]]
} else {
# call doesn't exist; start a list of arguments
tmp <- list(code[[i]])
}
assign(call.name, tmp, envir=results)
}
# Check the arguments
findCalls(code[[i]][-1], call, results)
}
}
if (length(ls(envir=results)) > 0) {
out <- list()
for (item in ls(envir=results)) {
out[[item]] <- get(item, envir=results)
}
return(out)
}
return(NULL)
}
## TODO remove isNumericLiteral and isStringLiteral at some point;
## these are duplicated from compile.R, because I don't want to
## source('compile.R') if it's not functioning.
isNumericLiteral =
function(expr) {
# TODO no complex cases yet
if (class(expr) %in% c('numeric', 'integer'))
return(TRUE)
return(FALSE)
}
isStringLiteral =
# Test whether something is a string literal.
function(expr) {
return(is.character(expr))
}
isLiteral =
# Check against any type of literal
function(expr)
return(isStringLiteral(expr) || isNumericLiteral(expr))
examineArgDepends =
# It is absolutely necessary the arguments to any vectorized function
# be static. CodeDepends could be used in later (no proof-of-concept)
# versions. Currently, the arguments are only checked against the
# parameters, or that they are literals
function(call, params) {
# lazy-eval of params - could break this?
args <- as.list(call[-1])
for (arg in args) {
if (!(isLiteral(arg) || as.character(arg) %in% params)) {
# TODO: this naively assumes that the parameters are static
# throughout the body. CodeDepends could fix this.
return(FALSE)
}
}
return(TRUE)
}
checkVectorizedCodeMotion =
# Check a for loop for code that can be vectorized and moved out of
# it.
function(code, params, len, var) {
out.expr <- list()
# Look for candidate calls
cand.calls <- findCalls(code, names(vectorFunctions))
# With each candidate call, check that arguments are 100% in
# parameters list, or literals.
for (call.name in names(cand.calls)) {
call <- cand.calls[[call.name]]
# call is now _all_ calls made in this block. We need to deal with
# each unique one separately, but we'll add this one the simple
# case works.
call <- call[[1]] # TODO fix this
is.safe <- examineArgDepends(call, params) && call.name %in% names(vectorFunctions)
browser()
# Are we not vectorizing?
# (offset by one because of call name (not just args))
is.necessary = call[[vectorFunctions[call.name] + 1]] == 1
if (is.safe && is.necessary) {
# Now, find the right argument to expand, grab the maximum
# length, and prepare the new call to move. We also need to
# replace the call with a subset.
cat("Recommended: code motion\n") # better warnings framework?
cat(" old call:", as.character(call), "\n")
# needs to be unique name
new.call <- call
new.call[[vectorFunctions[call.name] + 1]] <- len
cat(" new (to be moved) call:", as.character(call), "\n")
# Now, figure out replacement. This is a rough pass; not robust
replace.var <- as.symbol(paste("_", call.name, "_vector_", sep=""))
replaced <- list(call, substitute(rv[i], list(rv=replace.var, i=var)))
cat(" call no to be replaced with:", as.character(replaced), "\n")
replace <- TRUE
# We break now, because better handling for multiple call
# replaces is needed
break()
}
}
if (replace)
return(list(new.call, replaced))
return(FALSE)
}
checkVectorizedCodeMotion(body(ex.wrong)[[4]][[4]], names(formals(ex.wrong)),
quote(len), quote(i))
getLimits =
# See comments in createLoop.R - this is duplicated. TODO remove duplication
function(call)
{
op = as.character(call[[1]])
ans = if(op == ":") {
list(from = call[[2]], to = call[[3]])
} else if(op == "seq_along") {
tmp = substitute(length(x), list(x = call[[2]]))
list(from = 1L, to = tmp)
} else if(op == "seq") {
k = match.call(seq, call)
argNames = names(k)[-1]
# formals(seq) returns ...
formals = c("from", "to", "by", "length.out", "along.with")
i = argNames == ""
argNames[i] = formals[which(i)]
structure(as.list(call[-1]), names = argNames)
}
# how should we get integers when we have, e.g., 1:10 which are
# numeric
ans = lapply(ans, function(val) if(is.numeric(val) && val == as.integer(val)) as.integer(val) else val)
ans
}
replaceCalls =
# Replace all calls with varied code. Right now this is recursive which
# breaks the code replacement. TODO - non-recursive version?
function(code, call, replace) {
for (i in seq_along(code)) {
if (is.call(code[[i]])) {
if (code[[i]] == call) {
cat("replacing:", as.character(code[[i]]), "with:", as.character(call), "\n")
code[[i]] <- replace
}
replaceCalls(code[[i]][-1], call, replace)
}
}
return(code)
}
replaceVectorizedCodeMotion =
# After checking for possible code motion, replace if necessary
function(for.code, params) {
var <- for.code[[2]]
len <- getLimits(for.code[[3]])$to # consider from?
motion <- checkVectorizedCodeMotion(for.code, params, len, var)
if (motion != FALSE) {
# Code motion needed, unpack results
move.out <- motion[[1]]
call.search <- motion[[2]][[1]]
call.replace <- motion[[2]][[2]]
}
}
if (FALSE) {
body(ex.wrong)[[4]][[4]][[2]]
findCalls(body(ex.wrong), "rnorm")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.