tests/streamUse.R

library(RCUDA)
kfile = system.file("sampleKernels", "distance_gputools.ptx", package = "RCUDA")
if(!file.exists(kfile))
   kfile = nvcc(system.file("sampleKernels", "distance_gputools.cu", package = "RCUDA"), "distance_gputools.ptx")
mod = loadModule(kfile)


N = 1e4
p = 200L
AB = matrix(rnorm(N*p), N, p)

gdist.same = 
function(AB, mod, blockSize = 32L, .async = FALSE, ...)
{
   out = .gpu(mod$euclidean_kernel_same,
              t(AB), ncol(AB), nrow(AB),
              NULL, 0L, 0L, 
              ncol(AB), ans = numeric(nrow(AB)^2), nrow(AB), 2.0,
              outputs = 8L, gridDim = c(nrow(AB), nrow(AB)), blockDim = blockSize, .async = .async, ...)
    if(!.async)
       matrix(out, nrow(AB), nrow(AB))
    else
       out
}

if(FALSE) {
stream = cuStreamCreateWithPriority(0, -1)
o = gdist.same(AB, mod, .async = TRUE, stream = stream)
# The next two calls should return NOT_READY
print(cuStreamQuery(stream))
print(cuStreamQuery(stream))
 # Then we just wait
cudaStreamSynchronize(stream)
 # Get the answer
(o$ans[])[1:10]
cuStreamDestroy(stream)
}
duncantl/RCUDA documentation built on May 15, 2019, 5:26 p.m.