setClass("size_t", contains = "numeric")
setClass("CUdevice", contains = "integer")
tmp = function(from)
new("CUdevice", as.integer(from - 1L))
setAs("numeric", "CUdevice", tmp)
setAs("integer", "CUdevice", tmp)
setClass("CUmodule", contains = "RC++Reference")
setClass("CUfunction", contains = "RC++Reference")
setClass("CUcontext", contains = "RC++Reference")
setClass("CUdeviceptr", contains = "RC++Reference")
setClass("CUstream", contains = "RC++Reference")
setClass("cudastream_t", contains = "RC++Reference")
setClass("CUevent", contains = "RC++Reference")
setClass("cudaEvent_t", contains = "RC++Reference")
setClass("cudaPtr", contains = "RC++Reference")
setClass("cudaPtrWithLength", representation(nels = "integer", elSize = "integer", elTypeName = "character"), contains = "RC++Reference")
setClass("cudaFloatPtr", contains = "cudaPtr")
setClass("cudaIntPtr", contains = "cudaPtr")
setClass("cudaDoubleArray", contains= "cudaPtrWithLength")
setClass("cudaFloatArray", contains= "cudaPtrWithLength")
setClass("cudaIntArray", contains= "cudaPtrWithLength")
setClass("CUresult", contains = "cudaError")
CUresultValues = cudaErrorValues
# Should be the other way around, but not a big deal.
#setClass("cudaError", contains = "cudaError_t")
# This is 0 based. When converting from integer to CUDeviceNum, we do the subtraction for the user.
setClass("CUDeviceNum", contains = "integer")
tmp = function(from) new("CUDeviceNum", as.integer(from - 1L))
setAs("numeric", "CUDeviceNum", tmp)
setAs("integer", "CUDeviceNum", tmp)
setMethod("[[", c("CUmodule", "character", "missing"),
function(x, i, j, globalVar = FALSE, ...) {
if(globalVar)
cuModuleGetGlobal(x, i)
else
getFunction(x, i)
})
setMethod("$", c("CUmodule"),
function(x, name) {
getFunction(x, name)
})
getFunction =
function(module, name)
{
ans = .Call("R_Module_getFunction", as(module, "CUmodule"), as.character(name))
if(is.integer(ans)) # !is(ans, "RC++Reference"))
raiseError(ans, msg = c("failed to get function ", name))
ans
}
loadModule =
#
#
#
function(filename, ctx = cuGetContext(TRUE),
isCode = is(filename, "raw") || is(filename, "AsIs") || grepl(" __cudaparm", filename),
...)
{
if(isCode) {
force(ctx)
return(cuModuleLoadDataEx(filename, ...))
}
filename = path.expand(filename)
if(!file.exists(filename))
stop("no such file ", filename)
force(ctx)
ans = .Call("R_loadModule", as.character(filename))
if(is.integer(ans)) # !is(ans, "RC++Reference"))
raiseError(ans, msg = c("failed to load module ", filename))
else
ans
}
raiseError =
function(status, msg = character(), ...)
{
i = match(status, CUresultValues)
type = names(CUresultValues)[i]
e = simpleError(paste(c(msg, names(status), "(", type, ")"), collapse = " "), ...)
class(e) = c(type, class(e))
stop(e)
}
createContext =
function(flags = 0L, device = 1L)
{
ans = .Call("R_createContext", as(flags, "CUctx_flags"), as(as(device, "CUDeviceNum"), "integer"))
if(is(ans, "CUresult") && ans != 0)
raiseError(ans, msg = c("failed to create context"))
ans
}
.gpu = .cuda =
# should we have a .gpu argument to specify which GPU to use
# Problem is we have to get the context also.
# User can do this directly and avoid overhead by doing it once.
function(fun, ..., .args = list(...), gridDim, blockDim,
sharedMemBytes = 0L, stream = NULL, inplace = FALSE,
outputs = logical(), .gc = TRUE, gridBy = NULL,
.async = !is.null(stream), .numericAsDouble = getOption("CUDA.useDouble", FALSE))
{
if(.gc)
gc()
# if(!missing(.gpu)) {
# ctxt = createContext( device = .gpu)
# on.exit(cuCtxPopCurrent())
# }
# .args = list(...)
.numericAsDouble = as.logical(.numericAsDouble)
if(length(.numericAsDouble) != length(.args))
.numericAsDouble = rep(.numericAsDouble, length = length(.args))
if(!missing(gridBy) && missing(gridDim)) {
#
# We could use the names of the arguments by
# examining the call and determining their names
# This is probably overkill and expensive.
# So instead, have the caller specify the actual objects
# call = substitute(gridBy)
# if(is.call(call))
# call = call[-1]
# vars = sapply(call, function(x) if(is.name(x)) .args[[as.character(x)]]
# else if(is.numeric(x)) .args[[x]])
if(missing(blockDim))
blockDim = getDeviceProperties(1L)@maxThreadsPerBlock
# allow for a list of objects or a single vector
lens = if(is.list(gridBy))
sapply(gridBy, length)
else if(length(gridBy) == 1)
gridBy
else
length(gridBy)
tmp = getGridSize(lens, blockDim)
gridDim = tmp$grid
}
fun = as(fun, "CUfunction")
if(length(gridDim) < 3)
gridDim = c(gridDim, c(1L, 1L, 1L))[1:3]
if(length(blockDim) < 3)
blockDim = c(blockDim, c(1L, 1L, 1L))[1:3]
mustCopy = sapply(.args, function(x) is.atomic(x) && length(x) > 1)
if(any(mustCopy))
.args[mustCopy] = mapply(function(obj, strict)
copyToDevice(obj, strict = strict),
.args[mustCopy], .numericAsDouble[mustCopy]) #, SIMPLIFY = FALSE, USE.NAMES = FALSE)
ans = .Call("R_cuLaunchKernel", fun, as.integer(gridDim), as.integer(blockDim), .args,
as.integer(sharedMemBytes), stream, .numericAsDouble)
if(is(ans, "cudaError_t")) # !is(ans, "RC++Reference"))
raiseError(ans, msg = c("failed to launch kernel"))
if(.async)
return(.args[mustCopy])
ans = cuCtxSynchronize() # cuStreamSynchronize(stream)
if(is(ans, "cudaError_t")) # !is(ans, "RC++Reference"))
raiseError(ans, msg = c("failed to launch kernel"))
if(!missing(outputs)) {
if(length(outputs) == 0 || is.logical(outputs) && !any(outputs))
return(NULL)
ans = lapply(.args[outputs], function(x) if(is(x, "cudaPtrWithLength")) x[] else x)
return(if(length(ans) == 1) ans[[1]] else ans)
}
if(any(mustCopy)) {
ans = lapply(.args[mustCopy], `[`)
if(sum(mustCopy) == 1)
ans[[1]]
else
ans
} else
ans
}
getElementSize =
function(obj, type = typeof(obj), strict = inherits(obj, "AsIs"))
{
if(!strict && type == "double")
type = "float"
i = match(type, names(CUDAStructSizes))
if(!is.na(i))
return(CUDAStructSizes[i])
switch(type,
logical=, integer= 4L,
float=, double=, numeric = 8L,
stop("don't know size of elements"))
}
cudaAlloc = cudaMalloc =
function(numEls, sizeof = 4L, elType = NA, strict = !missing(elType) || inherits(elType, "AsIs"))
{
if(missing(sizeof) && !missing(elType))
sizeof = getElementSize(type = elType, strict = strict)
ans = .Call("R_cudaMalloc", as.numeric(as.numeric(numEls) * sizeof))
if(is.integer(ans)) # !is(ans, "RC++Reference"))
raiseError(ans, msg = c("failed to create context"))
k = "cudaPtrWithLength"
if(!is.na(elType)) {
classType = if(elType == "double" && strict)
"Double"
else if(elType %in% c("integer", "logical"))
"Int"
else if(elType %in% c("float", "double", "numeric"))
"Float"
else NA
if(!is.na(classType))
k = sprintf("cuda%sArray", classType)
}
new(k, ref = ans@ref, nels = as.integer(numEls), elSize = as.integer(sizeof), elTypeName = as.character(elType))
}
copyToDevice =
function(obj, to = cudaMalloc(length(obj), elType = elType, strict = strict),
elType = typeof(obj), strict = !missing(elType))
{
ans = .Call("R_manual_cudaMemcpy", obj, to, to@elSize)
if(is(ans, "CUresult"))
raiseError(ans, "copying data to GPU")
to
}
setGeneric("copyFromDevice",
function(obj, nels, type)
standardGeneric("copyFromDevice"))
setMethod("copyFromDevice", "cudaFloatArray",
function(obj, nels, type)
copyFromDevice(obj@ref, obj@nels, "float"))
setMethod("copyFromDevice", "cudaDoubleArray",
function(obj, nels, type)
copyFromDevice(obj@ref, obj@nels, "double"))
setMethod("copyFromDevice", "cudaFloatArray",
function(obj, nels, type)
copyFromDevice(obj@ref, obj@nels, "float"))
setMethod("copyFromDevice", "cudaIntArray",
function(obj, nels, type)
copyFromDevice(obj@ref, obj@nels, "integer"))
setMethod("copyFromDevice", c("ANY"),
function(obj, nels, type)
{
nels = as.integer(nels)
ans = if(type == "integer")
.Call("R_getCudaIntVector", obj, nels)
else if(type == "logical")
.Call("R_getCudaIntVector", obj, nels)
else if(type == "float" || type == "numeric")
.Call("R_getCudaFloatVector", obj, nels, NULL)
else if(type == "double")
.Call("R_getCudaDoubleVector", obj, nels, NULL)
if(is(ans, "CUresult"))
raiseError(ans, "copying data on device")
ans
})
# Allow
# p = cudaMalloc()
# p[] = x
# to be shorthand for this.
setMethod("[", c("cudaFloatArray", "missing", "missing"),
function(x, i, j, ...) {
copyFromDevice(x, x@nels, type = "float")
})
setMethod("[", c("cudaDoubleArray", "missing", "missing"),
function(x, i, j, ...) {
copyFromDevice(x, x@nels, type = "double")
})
setMethod("[", c("cudaIntArray", "missing", "missing"),
function(x, i, j, ...) {
copyFromDevice(x, x@nels, type = "integer")
})
# Called for integer or numeric
setMethod("[", c("cudaFloatArray", "numeric", "missing"),
function(x, i, j, ...) {
if(all(i < 0))
return(x[][i])
i = as.integer(i - 1L)
# do we need to add 1 to max(i)
.Call("R_getCudaFloatVector", x, max(i) + 1L, i)
})
setMethod("[", c("cudaIntArray", "numeric", "missing"),
function(x, i, j, ...) {
if(all(i < 0))
return(x[][i])
i = as.integer(i)
ans = .Call("R_getCudaIntVector", x, max(i))
ans[i]
})
setMethod("[", c("cudaPtrWithLength", "logical", "missing"),
function(x, i, j, ...) {
x[which(i)]
})
################
setMethod("[<-", c("cudaPtrWithLength", "missing", "missing"),
function(x, i, j, ..., value) {
if(length(value) > x@nels)
warning("only copying ", x@nels, " elements")
# coerce to the correct type
copyToDevice(rep(value, length = x@nels), x)
})
# Should be able to use the single generic version of this above
# and have the C code avoid copying the data.
setMethod("[<-", c("cudaFloatArray", "missing", "missing"),
function(x, i, j, ..., value) {
if(length(value) > x@nels)
warning("only copying ", x@nels, " elements")
copyToDevice(rep(as.numeric(value), length = x@nels), x)
})
setMethod("[<-", c("cudaIntArray", "missing", "missing"),
function(x, i, j, ..., value) {
if(length(value) > x@nels)
warning("only copying ", x@nels, " elements")
copyToDevice(rep(as.integer(value), length = x@nels), x)
})
cuInit =
function(flags = 0L)
{
ans = .Call("R_cuInit", as.integer(flags), FALSE)
if(ans != 0)
raiseError(ans, "failed to initialize CUDA")
ans
}
cuGetContext =
function(create = TRUE, ..., asContext = TRUE)
{
ans = .Call("R_cuCtxGetCurrent", as.logical(asContext))
if(is.integer(ans))
raiseError(ans, "failed to get current CUDA context")
if(isNativeNull(ans) && create)
ans = createContext(...)
ans
}
cudaVersion = cuVersion =
function()
{
structure(.Call("R_cuGetVersion"), names = c("driver", "runtime"))
}
cudaErrorString =
function()
.Call("R_cudaGetLastError")
cuMemGetInfo = cuMemInfo =
function()
{
ans = .Call("R_cuMemGetInfo")
if(is.integer(ans))
raiseError(ans, "failed to get current CUDA context")
ans[3] = ans[1]/ans[2]
names(ans) = c("free", "total", "% free")
ans
}
cuFuncGetAttributes =
function(func)
{
# ignore the MAX entry
vals = CUfunction_attributeValues
sapply(unclass(vals)[-length(vals)], cuFuncGetAttribute, as(func, "CUfunction"))
}
cuModuleLoadDataEx <-
function( image , options = integer(), opts = FALSE)
{
cuGetContext()
options = if(length(options))
sapply(options, function(x) as.integer(as(x, "CUjit_option")))
else
integer()
ans = .Call('R_cuModuleLoadDataEx', image, options, as.logical(opts))
if(is(ans, 'CUresult') && ans != 0)
raiseError(ans, 'R_cuModuleLoadDataEx')
if(opts)
ans
else
ans
}
cudaSetDevice =
function(device = 1L)
{
device = as.integer(device) - 1L
if(device < 0)
stop("negative index for device. Must be a positive number")
.Call("R_cudaSetDevice", device)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.