Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
library("gpuMagic")
## -----------------------------------------------------------------------------
getDeviceList()
## -----------------------------------------------------------------------------
getDeviceInfo(1)
setDevice(1)
## -----------------------------------------------------------------------------
gpuMagic.getMemUsage()
## -----------------------------------------------------------------------------
#C=A%*%B
#Each time the function will compute a column of C
matMul<-function(ind,A,B){
C=A%*%B[,ind]
return(C)
}
## -----------------------------------------------------------------------------
n=10
m=50
k=100
A=matrix(runif(n*m),n,m)
B=matrix(runif(m*k),m,k)
C_sapply=sapply(1:k,matMul,A,B)
#The internal matrix operation function in R
C_internel=A%*%B
#Compute error
max(abs(C_sapply-C_internel))
## -----------------------------------------------------------------------------
C_gpu=gpuSapply(1:k,matMul,A,B)
#Compute error
max(abs(C_gpu-C_internel))
## -----------------------------------------------------------------------------
n=1000
m=1000
k=1000
A=matrix(runif(n*m),n,m)
B=matrix(runif(n*m),m,k)
system.time(
gpuSapply(1:k,matMul,A,B)
)
system.time(
A%*%B
)
## ----eval = FALSE-------------------------------------------------------------
# n=sample(1:10,1)
# A=matrix(0,n,n)
## -----------------------------------------------------------------------------
dynamic_example1<-function(ind,A,n){
B=matrix(0,n)
C=A+B
return(C)
}
n=10
A=matrix(runif(n),n)
#The code below wouldn't work,
#the value of n is unknown in the compilation stage
#res_device=gpuSapply(1,dynamic_example1,A,n)
#works, the value of n is knwown as a macro in the compilation stage
res_device=gpuSapply(1,dynamic_example1,A,n,.macroParms = c("n"))
## -----------------------------------------------------------------------------
#Perform the column sum of the first n elements in each column of A
dynamic_example2<-function(ind,A,n){
#Dynamically subsetting the first n elements in the ind column of the matrix A
#Almost equivalent to A_dyn=A[1:n,ind]
#The size of A_dyn is unknown in the compilation stage since the value of n is unknown
A_dyn=subRef(A,1:n,ind)
C=sum(A_dyn)
return(C)
}
n=10
M=100
A=matrix(runif(M),M,M)
res_device=gpuSapply(1:M,dynamic_example2,A,n)
res_cpu=sapply(1:M,dynamic_example2,A,n)
#check error
range(res_device-res_cpu)
## -----------------------------------------------------------------------------
dynamic_return<-function(ind,A,n){
#Dynamically subsetting the first n elements in the ind column of the matrix A
#equivalent to A_dyn=A[1:n,ind]
A_dyn=subRef(A,1:n,ind)
#return A_dyn, the size is unknown
return(A_dyn)
#The largest length of the variable A_dyn
tmp=matrix(0,nrow(A))
#tell the compiler the return size should be at least the same as the variable tmp
return(tmp)
}
n=10
M=20
A=matrix(runif(M*M),M,M)
#Expect a warning message: Undetermined return size has been found
suppressWarnings({
res_device=gpuSapply(1:M,dynamic_return,A,n)
})
#retrieve the result (The first n rows)
res_device=res_device[1:n,]
#check error
range(res_device-A[1:n,])
## -----------------------------------------------------------------------------
largestK<-function(ind,A,k){
#get a column of A
A_col=subRef(A,,ind)
#the largest k values and indices of A_col
largest_value=matrix(0,k)
largest_ind=matrix(0,k)
#loop over A_col to find the largest k values
for(i in 1:length(A_col)){
for(j in 1:k){
if(A_col[i]>largest_value[j]){
if(j!=k){
#Backward assignment to avoid self assignment problem
#(see section: Important note)
ind_src=(k-1):j
largest_value[ind_src+1]=largest_value[ind_src]
largest_ind[ind_src+1]=largest_ind[ind_src]
}
largest_value[j]=A_col[i]
largest_ind[j]=i
break
}
}
}
return_nocpy(largest_ind)
}
N=1000
M=1000
k=10
A=matrix(runif(N*M),N,M)
#Warmup
warm_res=gpuSapply(1:M,largestK,A=A,k=k,.macroParms = "k")
warm_res=sapply(1:M,largestK,A=A,k=k)
#count the time
system.time({res_device=gpuSapply(1:M,largestK,A=A,k=k,.macroParms = "k")})
system.time({res_cpu=sapply(1:M,largestK,A=A,k=k)})
#Check the error
range(res_device-res_cpu)
## -----------------------------------------------------------------------------
computeLoss<-function(ind,x,y,parms){
#Find the parameters for the thread
parm=parms[ind,]
#Compute y hat, use no-copy transpose
y_hat=x%*%t_nocpy(parm)
#absolute loss value(L1 loss)
loss=sum(abs(y-y_hat))
return(loss)
}
#Sample size
n=1000
#Number of parameters
p=2
beta=c(2,3)
#Generate data
x=matrix(runif(n*p),n,p)
e=runif(n)
y=x%*%beta+e
#Brute-force search
#The seaching range and precision
search_range=seq(0,10,0.1)
parms=expand.grid(search_range,search_range)
#Warmup
warm_res=gpuSapply(seq_len(nrow(parms)),computeLoss,x=x,y=y,parms=parms)
warm_res=sapply(seq_len(nrow(parms)),computeLoss,x=x,y=y,parms=parms)
#count the time
system.time({res_device=gpuSapply(seq_len(nrow(parms)),computeLoss,x=x,y=y,parms=parms)})
system.time({res_cpu=sapply(seq_len(nrow(parms)),computeLoss,x=x,y=y,parms=parms)})
#print out the parameters which minimize the loss function
parms[which.min(res_device),]
parms[which.min(res_cpu),]
## -----------------------------------------------------------------------------
version
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.