library(RCUDA)
kfile = system.file("sampleKernels", "distance_gputools.ptx", package = "RCUDA")
if(!file.exists(kfile))
kfile = nvcc(system.file("sampleKernels", "distance_gputools.cu", package = "RCUDA"), "distance_gputools.ptx")
mod = loadModule(kfile)
N = 1e4
p = 200L
AB = matrix(rnorm(N*p), N, p)
gdist.same =
function(AB, mod, blockSize = 32L, .async = FALSE, ...)
{
out = .gpu(mod$euclidean_kernel_same,
t(AB), ncol(AB), nrow(AB),
NULL, 0L, 0L,
ncol(AB), ans = numeric(nrow(AB)^2), nrow(AB), 2.0,
outputs = 8L, gridDim = c(nrow(AB), nrow(AB)), blockDim = blockSize, .async = .async, ...)
if(!.async)
matrix(out, nrow(AB), nrow(AB))
else
out
}
if(FALSE) {
stream = cuStreamCreateWithPriority(0, -1)
o = gdist.same(AB, mod, .async = TRUE, stream = stream)
# The next two calls should return NOT_READY
print(cuStreamQuery(stream))
print(cuStreamQuery(stream))
# Then we just wait
cudaStreamSynchronize(stream)
# Get the answer
(o$ans[])[1:10]
cuStreamDestroy(stream)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.