Nothing
# BENCHMARK FILE.
# - Demo file for comparing hash benchmarks.
#
cat( "The hash-benchmark compares named access and update speed of R's native " ,
"vectors, lists, envrionments and hashes from the hash package . . . \n\n"
)
library(hash)
library(rbenchmark)
# STEP 0. CREATE A SAMPLE SET OF KEYS AND VALUES.
# size: the sample size
# keys: hash's keys
# values: hash's values
size <- 2^18 # The size of the refernece objects.
keys <- as.character( sample(1:size) ) # A vector of
values <- as.character( rnorm( size ) )
# Which is faster setting by mapply or doing a for-loop
# Intialize parameters and prepare things.
# ---------------------------------------------------------------------
# BENCHMARK 1:
# Speed for assigning values to an environment
# The following benchmark compares the speeds of setting key,values
# on an environment by using mapply, a for-loop and lapply.
#
# CONCLUSION:
# Use for-loop for setting it is at least 5% faster than the other
# methods.
#
# R-2.9.2:
# Using the for-loop is about 15-20% faster than apply and 2-3x faster
# than mapply
#
# R-2.11.0: size 5e4
# results from benchmark()
# test replications elapsed relative user.self sys.self user.child sys.child
# 2 for_loop 5 7.026 1.000000 7.025 0 0 0
# 3 lapply 5 7.383 1.050811 7.384 0 0 0
# 1 mapply 5 7.750 1.103046 7.753 0 0 0
#
#
# ---------------------------------------------------------------------
cat( "BENCHMARK 1:\n Testing the best method to assign many keys to a new environment\n" )
env.mapply <- new.env( hash = T , parent = emptyenv() )
env.lapply <- new.env( hash = T , parent = emptyenv() )
env.for <- new.env( hash = T , parent = emptyenv() )
h <- hash()
benchmark(
for_loop = for( i in 1:length(keys) ) assign( keys[[i]], values[[i]], envir = env.for ) ,
mapply = mapply( assign, keys, values, MoreArgs = list( envir = env.mapply ) ) ,
lapply = lapply(
( 1:length(keys) ) ,
FUN = function(i) assign( keys[[i]], values[[i]], envir = env.lapply )
) ,
replications = 5 ,
order = "relative"
)
cat( "\n\n" )
# ---------------------------------------------------------------------
# BENCHMARK 2: ACCESSING SINGLE VALUES
# Compare times for accessing single elements of a list vs vector vs hash
#
# CONCLUSIONS:
# - For number of items, looking up in a list is faster than looking
# up in an environment.
#
# ---------------------------------------------------------------------
# Create a list using mapply, n.b much faster than for-loop
cat( "BENCHMARK 2: Accessing a sinle value in a large hash structure\n" )
number.of.lookups <- 1e3
bm2 <- data.frame()
# LOOP OVER SIX ORDERS OF MAGNITUDES.
for( size in 2^(0:13) ) {
cat( "\nComparing access time for object of size", size, "\n" )
# CREATE NAMED-LIST:
li<-mapply(
function(k,v) {
li<-list()
li[[k]]<-v
li
} ,
keys[1:size] ,
values[1:size] ,
USE.NAMES=F
)
# CREATE NAMED-HASH:
ha <- hash( keys[1:size], values[1:size] )
# CREATE A VECTOR
ve <- values[1:size]
names(ve) <- keys[1:size]
# CREATE KEYS TO LOOK UP:
ke <- keys[ round(runif(max=size,min=1,n=number.of.lookups )) ]
print(
res <- benchmark(
# `get/env` = for( k in ke ) get( k, ha@.xData ) ,
# `get/hash` = for( k in ke ) get(k, ha) ,
#`hash` = for( k in ke ) ha[[k]] ,
`list` = for( k in ke ) li[[k]] ,
`vector`= for( k in ke ) ve[[k]] ,
replications = 10 ,
order = "relative"
)
)
res$size <- size
bm2 <- rbind( bm2, res )
}
xyplot(
elapsed ~ size, groups=test,
data=bm2,
type="b", pch=16:20, col=rainbow(5),
lwd=2, main="Reading from data structures", cex=1.2, cex.title=4,
auto.key=list(space = "right", points = FALSE, lines = FALSE, lwd=4, cex=1, col=rainbow(5)) ,
scales=list( cex=2 ),
ylab = "Elapsed Time ( per 1K Reads)" ,
xlab = "Object Size ( n elements )"
)
p <- ggplot(bm2 , aes(x=size, y=elapsed, group=test ))
p + geom_line()
cat("\n\n")
# ---------------------------------------------------------------------
# BENCHMARK 3: Slices [
# Take slices of an object. This is equivalent to [[.
# We compare
#
# Notes:
# - There is no native slice operation for env
# -
#
# ---------------------------------------------------------------------
cat( "BENCHMARK 3: Slices\n" )
slice.pct <- 0.01
n.lookups <- 100
bm3 <- data.frame()
for( size in 2^(17:18) ) {
slice.size <- floor( size * slice.pct ) + 1
cat( "\nComparing slice time for object of size", size, "with slice pct", slice.pct, "\n" )
# CREATE NAMED-LIST:
li<-mapply(
function(k,v) {
li<-list()
li[[k]]<-v
li
} ,
keys[1:size] ,
values[1:size] ,
USE.NAMES=F
)
# CREATE NAMED-HASH:
ha <- hash( keys[1:size], values[1:size] )
# CREATE A VECTOR
ve <- values[1:size]
names(ve) <- keys[1:size]
# CREATE KEYS TO LOOK UP:
kes <- lapply( 1:n.lookups, function(x) keys[ round(runif(max=size,min=1,n=slice.size )) ] )
# ke <- keys[ round(runif(max=size,min=1,n=slice.size )) ]
print(
res <-
benchmark(
`hash` = for( ke in kes ) ha[ ke ] ,
`list` = for( ke in kes ) li[ ke ] ,
`vector` = for( ke in kes ) ve[ ke ] ,
`mget` = for( ke in kes ) mget( ke, ha@.xData ) ,
replications = 5 ,
order = "relative"
)
)
res$size <- size
bm3 <- if( nrow(bm3)==0) res else rbind( bm3, res )
}
xyplot(
elapsed ~ size, groups=test,
data=bm3,
type="b", pch=16:20, col=rainbow(5),
lwd=2, main="Reading from data structures", cex=1.2, cex.title=4,
auto.key=list(space = "right", points = FALSE, lines = FALSE, lwd=4, cex=1, col=rainbow(5)) ,
scales=list( cex=2 ),
ylab = "Elapsed Time ( per 1K Reads)" ,
xlab = "Object Size ( n elements )"
)
cat( "BENCHMARK 3: [[ Single Element ]] <- Writes \n" )
n.writes <- 100
bm4 <- data.frame()
for( size in 2^(0:12) ) {
# CREATE NAMED-LIST:
li<-mapply(
function(k,v) {
li<-list()
li[[k]]<-v
li
} ,
keys[1:size] ,
values[1:size] ,
USE.NAMES=F
)
# CREATE NAMED-HASH:
ha <- hash( keys[1:size], values[1:size] )
# CREATE ENV
en <- new.env( hash=TRUE )
for( i in 1:size ) assign( keys[[i]], values[[i]], en )
# CREATE A VECTOR
ve <- values[1:size]
names(ve) <- keys[1:size]
# CREATE KEYS TO LOOK UP:
kes <- keys[ round(runif(n=n.writes,min=1,max=length(keys) )) ]
# ke <- keys[ round(runif(max=size,min=1,n=slice.size )) ]
print(
res <-
benchmark(
# `hash` = for( ke in kes ) ha[[ ke ]] <- "a" ,
# `list` = for( ke in kes ) li[[ ke ]] <- "a" ,
`vector` = for( ke in kes ) ve[[ ke ]] <- "a" ,
# `env/assign` = for( ke in kes ) assign( ke, "a" , en ) ,
replications = 5 ,
order = "relative"
)
)
res$size <- size
bm4 <- if( nrow(bm4)==0) res else rbind( bm4, res )
}
xyplot(
elapsed ~ size, groups=test,
data=bm4,
type="b", pch=16:20, col=rainbow(5),
lwd=2, main="Writing 100 Values to data structure", cex=1.2, cex.title=4,
auto.key=list(space = "right", points = FALSE, lines = FALSE, lwd=4, cex=1, col=rainbow(5)) ,
scales=list( cex=2 ),
ylab = "Elapsed Time ( per 100 Writes" ,
xlab = "Object Size ( n elements )"
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.