inst/doc/Quick_start_guide.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library("gpuMagic")

## -----------------------------------------------------------------------------
getDeviceList()

## -----------------------------------------------------------------------------
getDeviceInfo(1)
setDevice(1)

## -----------------------------------------------------------------------------
gpuMagic.getMemUsage()

## -----------------------------------------------------------------------------
#C=A%*%B
#Each time the function will compute a column of C
matMul<-function(ind,A,B){
  C=A%*%B[,ind]
  return(C)
}

## -----------------------------------------------------------------------------
n=10
m=50
k=100
A=matrix(runif(n*m),n,m)
B=matrix(runif(m*k),m,k)
C_sapply=sapply(1:k,matMul,A,B)
#The internal matrix operation function in R
C_internel=A%*%B
#Compute error
max(abs(C_sapply-C_internel))

## -----------------------------------------------------------------------------
C_gpu=gpuSapply(1:k,matMul,A,B)
#Compute error
max(abs(C_gpu-C_internel))

## -----------------------------------------------------------------------------
n=1000
m=1000
k=1000
A=matrix(runif(n*m),n,m)
B=matrix(runif(n*m),m,k)
system.time(
  gpuSapply(1:k,matMul,A,B)
)
system.time(
  A%*%B
)

## ----eval = FALSE-------------------------------------------------------------
#  n=sample(1:10,1)
#  A=matrix(0,n,n)

## -----------------------------------------------------------------------------
dynamic_example1<-function(ind,A,n){
  B=matrix(0,n)
  C=A+B
  return(C)
}
n=10
A=matrix(runif(n),n)
#The code below wouldn't work, 
#the value of n is unknown in the compilation stage

#res_device=gpuSapply(1,dynamic_example1,A,n)

#works, the value of n is knwown as a macro in the compilation stage
res_device=gpuSapply(1,dynamic_example1,A,n,.macroParms = c("n"))

## -----------------------------------------------------------------------------
#Perform the column sum of the first n elements in each column of A
dynamic_example2<-function(ind,A,n){
  #Dynamically subsetting the first n elements in the ind column of the matrix A
  #Almost equivalent to A_dyn=A[1:n,ind]
  #The size of A_dyn is unknown in the compilation stage since the value of n is unknown
  A_dyn=subRef(A,1:n,ind)
  C=sum(A_dyn)
  return(C)
}
n=10
M=100
A=matrix(runif(M),M,M)
res_device=gpuSapply(1:M,dynamic_example2,A,n)
res_cpu=sapply(1:M,dynamic_example2,A,n)
#check error
range(res_device-res_cpu)

## -----------------------------------------------------------------------------
dynamic_return<-function(ind,A,n){
  #Dynamically subsetting the first n elements in the ind column of the matrix A
  #equivalent to A_dyn=A[1:n,ind]
  A_dyn=subRef(A,1:n,ind)
  #return A_dyn, the size is unknown
  return(A_dyn)
  
  #The largest length of the variable A_dyn
  tmp=matrix(0,nrow(A))
  #tell the compiler the return size should be at least the same as the variable tmp
  return(tmp)

}
n=10
M=20
A=matrix(runif(M*M),M,M)
#Expect a warning message: Undetermined return size has been found
suppressWarnings({
  res_device=gpuSapply(1:M,dynamic_return,A,n)
})
#retrieve the result (The first n rows)
res_device=res_device[1:n,]

#check error
range(res_device-A[1:n,])

## -----------------------------------------------------------------------------
largestK<-function(ind,A,k){
  #get a column of A
  A_col=subRef(A,,ind)
  #the largest k values and indices of A_col
  largest_value=matrix(0,k)
  largest_ind=matrix(0,k)
  #loop over A_col to find the largest k values
  for(i in 1:length(A_col)){
    for(j in 1:k){
      if(A_col[i]>largest_value[j]){
        if(j!=k){
          #Backward assignment to avoid self assignment problem
          #(see section: Important note)
          ind_src=(k-1):j
          largest_value[ind_src+1]=largest_value[ind_src]
          largest_ind[ind_src+1]=largest_ind[ind_src]
        }
        largest_value[j]=A_col[i]
        largest_ind[j]=i
        break
      }
    }
  }
  return_nocpy(largest_ind)
}

N=1000
M=1000
k=10
A=matrix(runif(N*M),N,M)
#Warmup
warm_res=gpuSapply(1:M,largestK,A=A,k=k,.macroParms = "k")
warm_res=sapply(1:M,largestK,A=A,k=k)
#count the time
system.time({res_device=gpuSapply(1:M,largestK,A=A,k=k,.macroParms = "k")})
system.time({res_cpu=sapply(1:M,largestK,A=A,k=k)})
#Check the error
range(res_device-res_cpu)

## -----------------------------------------------------------------------------
computeLoss<-function(ind,x,y,parms){
  #Find the parameters for the thread
  parm=parms[ind,]
  #Compute y hat, use no-copy transpose
  y_hat=x%*%t_nocpy(parm)
  #absolute loss value(L1 loss)
  loss=sum(abs(y-y_hat))
  return(loss)
}
#Sample size
n=1000
#Number of parameters
p=2

beta=c(2,3)
#Generate data
x=matrix(runif(n*p),n,p)
e=runif(n)
y=x%*%beta+e

#Brute-force search
#The seaching range and precision
search_range=seq(0,10,0.1)
parms=expand.grid(search_range,search_range)
#Warmup
warm_res=gpuSapply(seq_len(nrow(parms)),computeLoss,x=x,y=y,parms=parms)
warm_res=sapply(seq_len(nrow(parms)),computeLoss,x=x,y=y,parms=parms)

#count the time
system.time({res_device=gpuSapply(seq_len(nrow(parms)),computeLoss,x=x,y=y,parms=parms)})
system.time({res_cpu=sapply(seq_len(nrow(parms)),computeLoss,x=x,y=y,parms=parms)})

#print out the parameters which minimize the loss function
parms[which.min(res_device),]
parms[which.min(res_cpu),]

## -----------------------------------------------------------------------------
version
sessionInfo()

Try the gpuMagic package in your browser

Any scripts or data that you put into this service are public.

gpuMagic documentation built on Nov. 8, 2020, 5:15 p.m.