library(RCUDA)
# This overwrites the input values so is intended to process
# the data once and no second passes.
m = loadModule("inst/sampleKernels/dnorm.ptx")
k = m$dnorm_kernel
N = 1e6L
x = rnorm(N)
mu = 0
sigma = 1
cx = copyToDevice(x)
.cuda(k, cx, N, mu, sigma, gridDim = c(64L, 32L), blockDim = 512L)
i = cx[]
cu.tm = system.time(replicate(10, {
cx = copyToDevice(x)
.cuda(k, cx, N, mu, sigma, gridDim = c(64L, 32L), blockDim = 512L)
i = cx[]
}))
r.tm = system.time(replicate(10, dnorm(x, mu, sigma)))
cu.tm/r.tm
head(i)
# The values will not be the same as if we had computed them in R.
# This is due to the difference in numerical precision with floats
# and with the GPUs themselves.
summary(dnorm(x[1:100], mu, sigma) - i[1:100])
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.