R/highlevel64.R
In bit64: A S3 Class for Vectors of 64bit Integers

Documented in as.factor.integer64 as.integer64.factor as.ordered.integer64 benchmark64 duplicated.integer64 keypos keypos.integer64 match.integer64 mean.integer64 optimizer64 prank prank.integer64 qtile qtile.integer64 quantile.integer64 rank.integer64 table.integer64 tiepos tiepos.integer64 unipos unipos.integer64 unique.integer64

# /*
# R-Code for matching and other functions based on hashing
# S3 atomic 64bit integers for R
# (c) 2012 Jens Oehlschägel
# Licence: GPL2
# Provided 'as is', use at your own risk
# Created: 2011-12-11
# Last changed:  2011-12-11
# */

#! \name{benchmark64}
#! \alias{benchmark64}
#! \alias{optimizer64}
#! \title{
#!  Function for measuring algorithmic performance \cr 
#!  of high-level and low-level integer64 functions
#! }
#! \description{
#!  \code{benchmark64} compares high-level integer64 functions against the integer functions from Base R \cr
#!  \code{optimizer64} compares for each high-level integer64 function the Base R integer function with several low-level integer64 functions with and without caching \cr
#! }
#! \usage{
#! benchmark64(nsmall = 2^16, nbig = 2^25, timefun = repeat.time
#! )
#! optimizer64(nsmall = 2^16, nbig = 2^25, timefun = repeat.time
#! , what = c("match", "\%in\%", "duplicated", "unique", "unipos", "table", "rank", "quantile")
#! , uniorder = c("original", "values", "any")
#! , taborder = c("values", "counts")
#! , plot = TRUE
#! )
#! }
#! \arguments{
#!   \item{nsmall}{ size of smaller vector }
#!   \item{nbig}{ size of larger bigger vector }
#!   \item{timefun}{ a function for timing such as \code{\link[bit]{repeat.time}} or \code{\link{system.time}} }
#!   \item{what}{
#!  a vector of names of high-level functions
#! }
#!   \item{uniorder}{
#!  one of the order parameters that are allowed in \code{\link{unique.integer64}} and \code{\link{unipos.integer64}}
#! }
#!   \item{taborder}{
#!  one of the order parameters that are allowed in \code{\link{table.integer64}} 
#! }
#!   \item{plot}{
#!  set to FALSE to suppress plotting 
#! }
#! }
#! \details{
#!  \code{benchmark64} compares the following scenarios for the following use cases: 
#!  \tabular{rl}{
#!   \bold{scenario name} \tab \bold{explanation} \cr
#!   32-bit  \tab applying Base R function to 32-bit integer data \cr
#!   64-bit \tab applying bit64 function to 64-bit integer data (with no cache) \cr
#!   hashcache \tab dito when cache contains \code{\link{hashmap}}, see \code{\link{hashcache}} \cr
#!   sortordercache \tab dito when cache contains sorting and ordering, see \code{\link{sortordercache}} \cr
#!   ordercache \tab dito when cache contains ordering only, see \code{\link{ordercache}} \cr
#!   allcache \tab dito when cache contains sorting, ordering and hashing \cr
#!  }
#!  \tabular{rl}{
#!   \bold{use case name} \tab \bold{explanation} \cr
#!   cache         \tab filling the cache according to scenario \cr
#!   match(s,b)    \tab match small in big vector \cr
#!   s \%in\% b      \tab small \%in\% big vector \cr
#!   match(b,s)    \tab match big in small vector \cr
#!   b \%in\% s      \tab big \%in\% small vector \cr
#!   match(b,b)    \tab match big in (different) big vector \cr
#!   b \%in\% b      \tab big \%in\% (different) big vector \cr
#!   duplicated(b) \tab duplicated of big vector \cr
#!   unique(b)     \tab unique of big vector \cr
#!   table(b)      \tab table of big vector \cr
#!   sort(b)       \tab sorting of big vector \cr
#!   order(b)      \tab ordering of big vector \cr
#!   rank(b)       \tab ranking of big vector \cr
#!   quantile(b)   \tab quantiles of big vector \cr
#!   summary(b)    \tab summary of of big vector \cr
#!   SESSION       \tab exemplary session involving multiple calls (including cache filling costs) \cr
#!  }
#!  Note that the timings for the cached variants do \emph{not} contain the time costs of building the cache, except for the timing of the exemplary user session, where the cache costs are included in order to evaluate amortization. 
#! }
#! \value{
#!  \code{benchmark64} returns a matrix with elapsed seconds, different high-level tasks in rows and different scenarios to solve the task in columns. The last row named 'SESSION' contains the elapsed seconds of the exemplary sesssion.
#!  \cr
#!  \code{optimizer64} returns a dimensioned list with one row for each high-level function timed and two columns named after the values of the \code{nsmall} and \code{nbig} sample sizes. Each list cell contains a matrix with timings, low-level-methods in rows and three measurements \code{c("prep","both","use")} in columns. If it can be measured separately, \code{prep} contains the timing of preparatory work such as sorting and hashing, and \code{use} contains the timing of using the prepared work. If the function timed does both, preparation and use, the timing is in \code{both}.  
#! }
#! \author{
#!  Jens Oehlschlägel <Jens.Oehlschlaegel@truecluster.com>
#! }
#! \seealso{
#!  \code{\link{integer64}}
#! }
#! \examples{
#! message("this small example using system.time does not give serious timings\n
#! this we do this only to run regression tests")
#! benchmark64(nsmall=2^7, nbig=2^13, timefun=function(expr)system.time(expr, gcFirst=FALSE))
#! optimizer64(nsmall=2^7, nbig=2^13, timefun=function(expr)system.time(expr, gcFirst=FALSE)
#! , plot=FALSE
#! )
#!\dontrun{
#! message("for real measurement of sufficiently large datasets run this on your machine")
#! benchmark64()
#! optimizer64()
#!}
#! message("let's look at the performance results on Core i7 Lenovo T410 with 8 GB RAM")
#! data(benchmark64.data)
#! print(benchmark64.data)
#! 
#! matplot(log2(benchmark64.data[-1,1]/benchmark64.data[-1,])
#! , pch=c("3", "6", "h", "s", "o", "a") 
#! , xlab="tasks [last=session]"
#! , ylab="log2(relative speed) [bigger is better]"
#! )
#! matplot(t(log2(benchmark64.data[-1,1]/benchmark64.data[-1,]))
#! , type="b", axes=FALSE 
#! , lwd=c(rep(1, 14), 3)
#! , xlab="context"
#! , ylab="log2(relative speed) [bigger is better]"
#! )
#! axis(1
#! , labels=c("32-bit", "64-bit", "hash", "sortorder", "order", "hash+sortorder")
#! , at=1:6
#! )
#! axis(2)
#! data(optimizer64.data)
#! print(optimizer64.data)
#! oldpar <- par(no.readonly = TRUE)
#! par(mfrow=c(2,1))
#! par(cex=0.7)
#! for (i in 1:nrow(optimizer64.data)){
#!  for (j in 1:2){
#!    tim <- optimizer64.data[[i,j]]
#!   barplot(t(tim))
#!   if (rownames(optimizer64.data)[i]=="match")
#!    title(paste("match", colnames(optimizer64.data)[j], "in", colnames(optimizer64.data)[3-j]))
#!   else if (rownames(optimizer64.data)[i]=="\%in\%")
#!    title(paste(colnames(optimizer64.data)[j], "\%in\%", colnames(optimizer64.data)[3-j]))
#!   else
#!    title(paste(rownames(optimizer64.data)[i], colnames(optimizer64.data)[j]))
#!  }
#! }
#! par(mfrow=c(1,1))
#!}
#! \keyword{ misc }

#! \name{benchmark64.data}
#! \alias{benchmark64.data}
#! \docType{data}
#! \title{
#!  Results of performance measurement on a Core i7 Lenovo T410 8 GB RAM under Windows 7 64bit
#! }
#! \description{
#!   These are the results of calling \code{\link{benchmark64}}
#! }
#! \usage{data(benchmark64.data)}
#! \format{
#!   The format is:
#!  num [1:16, 1:6] 2.55e-05 2.37 2.39 1.28 1.39 ...
#!  - attr(*, "dimnames")=List of 2
#!   ..$ : chr [1:16] "cache" "match(s,b)" "s \%in\% b" "match(b,s)" ...
#!   ..$ : chr [1:6] "32-bit" "64-bit" "hashcache" "sortordercache" ...
#! }
#! \examples{
#! data(benchmark64.data)
#! print(benchmark64.data)
#! matplot(log2(benchmark64.data[-1,1]/benchmark64.data[-1,])
#! , pch=c("3", "6", "h", "s", "o", "a")
#! , xlab="tasks [last=session]"
#! , ylab="log2(relative speed) [bigger is better]"
#! )
#! matplot(t(log2(benchmark64.data[-1,1]/benchmark64.data[-1,]))
#! , axes=FALSE
#! , type="b"
#! , lwd=c(rep(1, 14), 3)
#! , xlab="context"
#! , ylab="log2(relative speed) [bigger is better]"
#! )
#! axis(1
#! , labels=c("32-bit", "64-bit", "hash", "sortorder", "order", "hash+sortorder")
#! , at=1:6
#! )
#! axis(2)
#! }
#! \keyword{datasets}


#! \name{optimizer64.data}
#! \alias{optimizer64.data}
#! \docType{data}
#! \title{
#!  Results of performance measurement on a Core i7 Lenovo T410 8 GB RAM under Windows 7 64bit
#! }
#! \description{
#!   These are the results of calling \code{\link{optimizer64}}
#! }
#! \usage{data(optimizer64.data)}
#! \format{
#!   The format is:
#! List of 16
#!  $ : num [1:9, 1:3] 0 0 1.63 0.00114 2.44 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:9] "match" "match.64" "hashpos" "hashrev" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:10, 1:3] 0 0 0 1.62 0.00114 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:10] "\%in\%" "match.64" "\%in\%.64" "hashfin" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:10, 1:3] 0 0 0.00105 0.00313 0.00313 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:10] "duplicated" "duplicated.64" "hashdup" "sortorderdup1" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:15, 1:3] 0 0 0 0.00104 0.00104 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:15] "unique" "unique.64" "hashmapuni" "hashuni" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:14, 1:3] 0 0 0 0.000992 0.000992 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:14] "unique" "unipos.64" "hashmapupo" "hashupo" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:13, 1:3] 0 0 0 0 0.000419 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:13] "tabulate" "table" "table.64" "hashmaptab" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:7, 1:3] 0 0 0 0.00236 0.00714 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:7] "rank" "rank.keep" "rank.64" "sortorderrnk" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:6, 1:3] 0 0 0.00189 0.00714 0 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:6] "quantile" "quantile.64" "sortqtl" "orderqtl" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:9, 1:3] 0 0 0.00105 1.17 0 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:9] "match" "match.64" "hashpos" "hashrev" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:10, 1:3] 0 0 0 0.00104 1.18 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:10] "\%in\%" "match.64" "\%in\%.64" "hashfin" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:10, 1:3] 0 0 1.64 2.48 2.48 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:10] "duplicated" "duplicated.64" "hashdup" "sortorderdup1" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:15, 1:3] 0 0 0 1.64 1.64 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:15] "unique" "unique.64" "hashmapuni" "hashuni" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:14, 1:3] 0 0 0 1.62 1.62 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:14] "unique" "unipos.64" "hashmapupo" "hashupo" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:13, 1:3] 0 0 0 0 0.32 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:13] "tabulate" "table" "table.64" "hashmaptab" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:7, 1:3] 0 0 0 2.96 10.69 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:7] "rank" "rank.keep" "rank.64" "sortorderrnk" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  $ : num [1:6, 1:3] 0 0 1.62 10.61 0 ...
#!   ..- attr(*, "dimnames")=List of 2
#!   .. ..$ : chr [1:6] "quantile" "quantile.64" "sortqtl" "orderqtl" ...
#!   .. ..$ : chr [1:3] "prep" "both" "use"
#!  - attr(*, "dim")= int [1:2] 8 2
#!  - attr(*, "dimnames")=List of 2
#!   ..$ : chr [1:8] "match" "\%in\%" "duplicated" "unique" ...
#!   ..$ : chr [1:2] "65536" "33554432"
#! }
#! \examples{
#! data(optimizer64.data)
#! print(optimizer64.data)
#! oldpar <- par(no.readonly = TRUE)
#! par(mfrow=c(2,1))
#! par(cex=0.7)
#! for (i in 1:nrow(optimizer64.data)){
#!  for (j in 1:2){
#!    tim <- optimizer64.data[[i,j]]
#!   barplot(t(tim))
#!   if (rownames(optimizer64.data)[i]=="match")
#!    title(paste("match", colnames(optimizer64.data)[j], "in", colnames(optimizer64.data)[3-j]))
#!   else if (rownames(optimizer64.data)[i]=="\%in\%")
#!    title(paste(colnames(optimizer64.data)[j], "\%in\%", colnames(optimizer64.data)[3-j]))
#!   else
#!    title(paste(rownames(optimizer64.data)[i], colnames(optimizer64.data)[j]))
#!  }
#! }
#! par(mfrow=c(1,1))
#! }
#! \keyword{datasets}


benchmark64 <- function(nsmall=2^16, nbig=2^25, timefun=repeat.time)
{
 
 message('\ncompare performance for a complete sessions of calls')
 s <- sample(nbig, nsmall, TRUE)
 b <- sample(nbig, nbig, TRUE)
 b2 <- sample(nbig, nbig, TRUE)
 
 tim1 <- double(6)
 names(tim1) <- c("32-bit","64-bit","hashcache","sortordercache","ordercache","allcache")

 s <- as.integer(s)
 b <- as.integer(b)
 b2 <- as.integer(b2)
 
 i <- 1
 for (i in 1:6){
  message("\n=== ", names(tim1)[i], " ===")
  
  if (i==2){
   s <- as.integer64(s)
   b <- as.integer64(b)
   b2 <- as.integer64(b2)
  }

  tim1[i] <- 0
 
  tim1[i] <- tim1[i] + timefun({
   switch(as.character(i)
   , "3" = {hashcache(s); hashcache(b); hashcache(b2)}
   , "4" = {sortordercache(s); sortordercache(b); sortordercache(b2)}
   , "5" = {ordercache(s); ordercache(b); ordercache(b2)}
   , "6" = {hashcache(s); hashcache(b); hashcache(b2);sortordercache(s); sortordercache(b); sortordercache(b2)}
   )
  })[3]
 
  message('check data range, mean etc.')
  tim1[i] <- tim1[i] + timefun({
   summary(b)
  })[3]
  message('get all percentiles for plotting distribution shape')
  tim1[i] <- tim1[i] + timefun({
   quantile(b, probs=seq(0, 1, 0.01))
  })[3]
  message('list the upper and lower permille of values')
  tim1[i] <- tim1[i] + timefun({
   quantile(b, probs=c(0.001, 0.999))
   sort(b, na.last=NA)
  })[3]
  message('OK, for some of these values I want to see the complete ROW, so I need their positions in the data.frame')
  tim1[i] <- tim1[i] + timefun({
   if(i==1)order(b) else order.integer64(b)
  })[3]
  message('check if any values are duplicated')
  tim1[i] <- tim1[i] + timefun({
   any(duplicated(b))
  })[3]
  message('since not unique, then check distribution of frequencies')
  tim1[i] <- tim1[i] + timefun({
   if(i==1)tabulate(table(b, exclude=NULL)) else tabulate(table.integer64(b, return='list')$counts)
  })[3]
  message("OK, let's plot the percentiles of unique values versus the percentiles allowing for duplicates")
  tim1[i] <- tim1[i] + timefun({
   quantile(b, probs=seq(0, 1, 0.01))
   quantile(unique(b), probs=seq(0, 1, 0.01))
  })[3]
  message('check whether we find a match for each fact in the dimension table')
  tim1[i] <- tim1[i] + timefun({
   all(if(i==1) b %in% s else "%in%.integer64"(b, s))
  })[3]
  message('check whether there are any dimension table entries not in the fact table')
  tim1[i] <- tim1[i] + timefun({
   all(if(i==1) s %in% b else "%in%.integer64"(s, b))
  })[3]
  message('check whether we find a match for each fact in a parallel fact table')
  tim1[i] <- tim1[i] + timefun({
   all(if(i==1) b %in% b2 else "%in%.integer64"(b, b2))
  })[3]
  message('find positions of facts in dimension table for joining')
  tim1[i] <- tim1[i] + timefun({
   if(i==1) match(b, s) else match.integer64(b, s)
  })[3]
  message('find positions of facts in parallel fact table for joining')
  tim1[i] <- tim1[i] + timefun({
   if(i==1) match(b, b2) else match.integer64(b, b2)
  })[3]
  message('out of curiosity: how well rank-correlated are fact and parallel fact table?')
  tim1[i] <- tim1[i] + timefun({
   if (i==1){
    cor(rank(b, na.last="keep"), rank(b2, na.last="keep"), use="na.or.complete")
   }else{
    cor(rank.integer64(b), rank.integer64(b2), use="na.or.complete")
   }
  })[3]
  
  remcache(s)
  remcache(b)
  remcache(b2)
  
  print(round(rbind(seconds=tim1, factor=tim1[1]/tim1), 3))
 
 }

        # 32-bit         64-bit      hashcache sortordercache     ordercache       allcache 
       # 196.510          8.963          8.242          5.183         12.325          6.043 
        # 32-bit         64-bit      hashcache sortordercache     ordercache       allcache 
         # 1.000         21.924         23.842         37.913         15.944         32.519 

   
 message("\nnow let's look more systematically at the components involved")
 s <- sample(nbig, nsmall, TRUE)
 b <- sample(nbig, nbig, TRUE)
 b2 <- sample(nbig, nbig, TRUE)
 
 tim2 <- matrix(0, 15, 6)
 dimnames(tim2) <- list(c("cache", "match(s,b)", "s %in% b", "match(b,s)", "b %in% s", "match(b,b)", "b %in% b", "duplicated(b)", "unique(b)", "table(b)", "sort(b)", "order(b)", "rank(b)", "quantile(b)", "summary(b)")
 , c("32-bit","64-bit","hashcache","sortordercache","ordercache","allcache"))

 s <- as.integer(s)
 b <- as.integer(b)
 b2 <- as.integer(b2)
 
 i <- 1
 for (i in 1:6){
  if (i==2){
   s <- as.integer64(s)
   b <- as.integer64(b)
   b2 <- as.integer64(b2)
  }
 
  if (i>2)message(colnames(tim2)[i], " cache")
  tim2["cache",i] <- timefun({
   switch(as.character(i)
   , "3" = {hashcache(s); hashcache(b); hashcache(b2)}
   , "4" = {sortordercache(s); sortordercache(b); sortordercache(b2)}
   , "5" = {ordercache(s); ordercache(b); ordercache(b2)}
   , "6" = {hashcache(s); hashcache(b); hashcache(b2);sortordercache(s); sortordercache(b); sortordercache(b2)}
   )
  })[3]
 
  message(colnames(tim2)[i], " match(s,b)")
  tim2["match(s,b)",i] <- timefun({
   if (i==1) match(s, b) else match.integer64(s, b)
  })[3]
 
  message(colnames(tim2)[i], " s %in% b")
  tim2["s %in% b",i] <- timefun({
   if (i==1) s %in% b else "%in%.integer64"(s,b)
  })[3]
 
  message(colnames(tim2)[i], " match(b,s)")
  tim2["match(b,s)",i] <- timefun({
   if (i==1) match(b, s) else match.integer64(b, s)
  })[3]
 
  message(colnames(tim2)[i], " b %in% s")
  tim2["b %in% s",i] <- timefun({
   if (i==1) b %in% s else "%in%.integer64"(b,s)
  })[3]
 
  message(colnames(tim2)[i], " match(b,b)")
  tim2["match(b,b)",i] <- timefun({
   if (i==1) match(b, b2) else match.integer64(b, b2)
  })[3]
 
  message(colnames(tim2)[i], " b %in% b")
  tim2["b %in% b",i] <- timefun({
   if (i==1) b %in% b2 else "%in%.integer64"(b,b2)
  })[3]
 
  message(colnames(tim2)[i], " duplicated(b)")
  tim2["duplicated(b)",i] <- timefun({
   duplicated(b)
  })[3]
 
  message(colnames(tim2)[i], " unique(b)")
  tim2["unique(b)",i] <- timefun({
   unique(b)
  })[3]
 
  message(colnames(tim2)[i], " table(b)")
  tim2["table(b)",i] <- timefun({
   if(i==1) table(b) else table.integer64(b, return='list')
  })[3]
 
  message(colnames(tim2)[i], " sort(b)")
  tim2["sort(b)",i] <- timefun({
   sort(b)
  })[3]
 
  message(colnames(tim2)[i], " order(b)")
  tim2["order(b)",i] <- timefun({
   if(i==1) order(b) else order.integer64(b)
  })[3]
 
  message(colnames(tim2)[i], " rank(b)")
  tim2["rank(b)",i] <- timefun({
   if(i==1) rank(b) else rank.integer64(b)
  })[3]
 
  message(colnames(tim2)[i], " quantile(b)")
  tim2["quantile(b)",i] <- timefun({
   quantile(b)
  })[3]
 
  message(colnames(tim2)[i], " summary(b)")
  tim2["summary(b)",i] <- timefun({
   summary(b)
  })[3]
  
  remcache(s)
  remcache(b)
  remcache(b2)
  
  tim3 <- rbind(tim2, SESSION=tim1)
  #tim2 <- tim2[,1]/tim2
  
  cat("seconds")
  print(round(tim3, 3))
  cat("factor")
  print(round(tim3[,1]/tim3, 3))
 
 }


 
               # 32-bit 64-bit hashcache sortordercache ordercache allcache
# cache           0.000  0.000     0.775          1.330      6.500    2.660
# match(s,b)      0.820  0.218     0.004          0.025      0.093    0.004
# s %in% b        0.810  0.234     0.003          0.022      0.093    0.003
# match(b,s)      0.450  0.228     0.232          0.224      0.224    0.226
# b %in% s        0.510  0.226     0.224          0.222      0.218    0.222
# match(b,b)      2.370  0.870     0.505          0.890      0.880    0.505
# b %in% b        2.350  0.850     0.480          0.865      0.870    0.483
# duplicated(b)   0.875  0.510     0.141          0.116      0.383    0.117
# unique(b)       0.930  0.555     0.447          0.156      0.427    0.450
# table(b)      110.340  0.725     0.680          0.234      0.575    0.202
# sort(b)         2.440  0.400     0.433          0.072      0.460    0.069
# order(b)       12.780  0.680     0.615          0.036      0.036    0.035
# rank(b)        13.480  0.860     0.915          0.240      0.545    0.246
# quantile(b)     0.373  0.400     0.410          0.000      0.000    0.000
# summary(b)      0.645  0.423     0.427          0.016      0.016    0.016
# TOTAL         149.173  7.179     6.291          4.448     11.320    5.239
              # 32-bit  64-bit hashcache sortordercache ordercache allcache
# cache              1   1.062     0.000          0.000      0.000    0.000
# match(s,b)         1   3.761   230.420         32.475      8.843  217.300
# s %in% b           1   3.462   234.090         36.450      8.735  237.386
# match(b,s)         1   1.974     1.940          2.009      2.009    1.991
# b %in% s           1   2.257     2.277          2.297      2.339    2.297
# match(b,b)         1   2.724     4.693          2.663      2.693    4.693
# b %in% b           1   2.765     4.896          2.717      2.701    4.862
# duplicated(b)      1   1.716     6.195          7.572      2.283    7.500
# unique(b)          1   1.676     2.082          5.972      2.180    2.067
# table(b)           1 152.193   162.265        471.538    191.896  546.238
# sort(b)            1   6.100     5.631         33.822      5.304   35.534
# order(b)           1  18.794    20.780        357.840    354.297  366.950
# rank(b)            1  15.674    14.732         56.167     24.734   54.797
# quantile(b)        1   0.933     0.911        804.907    806.027  810.133
# summary(b)         1   1.524     1.512         39.345     39.345   39.345
# TOTAL              1  20.778    23.712         33.534     13.177   28.476 

  tim3
}


optimizer64 <- function(nsmall=2^16, nbig=2^25, timefun=repeat.time
, what=c("match","%in%","duplicated","unique","unipos","table","rank","quantile")
, uniorder = c("original", "values", "any")
, taborder = c("values", "counts")
, plot = TRUE
)
{
 uniorder <- match.arg(uniorder)
 taborder <- match.arg(taborder)
 ret <- vector("list", 2*length(what))
 dim(ret) <- c(length(what), 2L)
 dimnames(ret) <- list(what, c(nsmall, nbig))
 
 if (plot){
  oldpar <- par(no.readonly = TRUE)
  on.exit(par(oldpar))
  par(mfrow=c(2,1))
 }
 
 if ("match" %in% what){
  message("match: timings of different methods")
  N1 <- c(nsmall, nbig)
  N2 <- c(nbig, nsmall)
  for (i in seq_along(N1)){
   n1 <- N1[i]
   n2 <- N2[i]
   x1 <- c(sample(n2, n1-1, TRUE), NA)
   x2 <- c(sample(n2, n2-1, TRUE), NA)
   tim <- matrix(0, 9, 3)
   dimnames(tim) <- list(c("match","match.64","hashpos","hashrev","sortorderpos","orderpos","hashcache","sortorder.cache","order.cache"), c("prep","both","use"))

   tim["match","both"] <- timefun({
    p <- match(x1, x2)
   })[3]
   x1 <- as.integer64(x1)
   x2 <- as.integer64(x2)

   tim["match.64","both"] <- timefun({
    p2 <- match.integer64(x1, x2)
   })[3]
   stopifnot(identical(p2, p))

   tim["hashpos","prep"] <- timefun({
    h2 <- hashmap(x2)
   })[3]
   tim["hashpos","use"] <- timefun({
    p2 <- hashpos(h2, x1)
   })[3]
   stopifnot(identical(p2, p))
   
   tim["hashrev","prep"] <- timefun({
    h1 <- hashmap(x1)
   })[3]
   tim["hashrev","use"] <- timefun({
    p1 <- hashrev(h1, x2)
   })[3]
   stopifnot(identical(p2, p))
   
   tim["sortorderpos","prep"] <- system.time({
    s2 <- clone(x2)
    o2 <- seq_along(x2)
    ramsortorder(s2, o2, na.last=FALSE)
   })[3]
   tim["sortorderpos","use"] <- timefun({
    p2 <- sortorderpos(s2, o2, x1)
   })[3]
   stopifnot(identical(p2, p))
   
   tim["orderpos","prep"] <- timefun({
    o2 <- seq_along(x2)
    ramorder(x2, o2, na.last=FALSE)
   })[3]
   tim["orderpos","use"] <- timefun({
    p2 <- orderpos(x2, o2, x1, method=2)
   })[3]
   stopifnot(identical(p2, p))
   
   hashcache(x2)
   tim["hashcache","use"] <- timefun({
    p2 <- match.integer64(x1, x2)
   })[3]
   stopifnot(identical(p2, p))
   remcache(x2)
   
   sortordercache(x2)
   tim["sortorder.cache","use"] <- timefun({
    p2 <- match.integer64(x1, x2)
   })[3]
   stopifnot(identical(p2, p))
   remcache(x2)
   
   ordercache(x2)
   tim["order.cache","use"] <- timefun({
    p2 <- match.integer64(x1, x2)
   })[3]
   stopifnot(identical(p2, p))
   remcache(x2)

   if (plot){
    barplot(t(tim))
    n <- format(c(n1, n2))
    title(paste("match", n[1], "in", n[2]))
   }
   
   ret[["match",as.character(n1)]] <- tim
  }
 }

 if ("%in%" %in% what){
  message("%in%: timings of different methods")
  N1 <- c(nsmall, nbig)
  N2 <- c(nbig, nsmall)
  for (i in seq_along(N1)){
   n1 <- N1[i]
   n2 <- N2[i]
   x1 <- c(sample(n2, n1-1, TRUE), NA)
   x2 <- c(sample(n2, n2-1, TRUE), NA)
   tim <- matrix(0, 10, 3)
   dimnames(tim) <- list(c("%in%","match.64","%in%.64","hashfin","hashrin","sortfin","orderfin","hash.cache","sortorder.cache","order.cache"), c("prep","both","use"))

   tim["%in%","both"] <- timefun({
    p <- x1 %in% x2
   })[3]
   x1 <- as.integer64(x1)
   x2 <- as.integer64(x2)

   tim["match.64","both"] <- timefun({
    p2 <- match.integer64(x1,x2, nomatch = 0L) > 0L
   })[3]
   stopifnot(identical(p2, p))

   tim["%in%.64","both"] <- timefun({
    p2 <- "%in%.integer64"(x1,x2) # this is using the custom version
   })[3]
   stopifnot(identical(p2, p))

   tim["hashfin","prep"] <- timefun({
    h2 <- hashmap(x2)
   })[3]
   tim["hashfin","use"] <- timefun({
    p2 <- hashfin(h2, x1)
   })[3]
   stopifnot(identical(p2, p))
   
   tim["hashrin","prep"] <- timefun({
    h1 <- hashmap(x1)
   })[3]
   tim["hashrin","use"] <- timefun({
    p1 <- hashrin(h1, x2)
   })[3]
   stopifnot(identical(p2, p))
   
   tim["sortfin","prep"] <- timefun({
    s2 <- clone(x2)
    ramsort(s2, na.last=FALSE)
   })[3]
   tim["sortfin","use"] <- timefun({
    p2 <- sortfin(s2, x1)
   })[3]
   stopifnot(identical(p2, p))
   
   tim["orderfin","prep"] <- timefun({
    o2 <- seq_along(x2)
    ramorder(x2, o2, na.last=FALSE)
   })[3]
   tim["orderfin","use"] <- timefun({
    p2 <- orderfin(x2, o2, x1)
   })[3]
   stopifnot(identical(p2, p))
   
   hashcache(x2)
   tim["hash.cache","use"] <- timefun({
    p2 <- "%in%.integer64"(x1, x2)
   })[3]
   stopifnot(identical(p2, p))
   remcache(x2)
   
   sortordercache(x2)
   tim["sortorder.cache","use"] <- timefun({
    p2 <- "%in%.integer64"(x1, x2)
   })[3]
   stopifnot(identical(p2, p))
   remcache(x2)
   
   ordercache(x2)
   tim["order.cache","use"] <- timefun({
    p2 <- "%in%.integer64"(x1, x2)
   })[3]
   stopifnot(identical(p2, p))
   remcache(x2)
   
   if (plot){
    barplot(t(tim))
    n <- format(c(n1, n2))
    title(paste(n[1], "%in%", n[2]))
   }
    
   ret[["%in%",as.character(n1)]] <- tim
  }
 }
 if ("duplicated" %in% what){
  message("duplicated: timings of different methods")
  N <- c(nsmall, nbig)
  for (i in seq_along(N)){
   n <- N[i]
   x <- c(sample(n, n-1, TRUE), NA)
   tim <- matrix(0, 10, 3)
   dimnames(tim) <- list(c("duplicated","duplicated.64","hashdup","sortorderdup1","sortorderdup2","orderdup1","orderdup2"
    ,"hash.cache","sortorder.cache","order.cache")
   , c("prep","both","use"))

   tim["duplicated","both"] <- timefun({
    p <- duplicated(x)
   })[3]
   x <- as.integer64(x)

   tim["duplicated.64","both"] <- timefun({
    p2 <- duplicated(x)
   })[3]
   stopifnot(identical(p2, p))

   tim["hashdup","prep"] <- timefun({
    h <- hashmap(x)
   })[3]
   tim["hashdup","use"] <- timefun({
    p2 <- hashdup(h)
   })[3]
   stopifnot(identical(p2, p))
   
   tim["sortorderdup1","prep"] <- timefun({
    s <- clone(x)
    o <- seq_along(x)
    ramsortorder(s, o, na.last=FALSE)
    nunique <- sortnut(s)[1]
   })[3]
   tim["sortorderdup1","use"] <- timefun({
    p2 <- sortorderdup(s, o, method=1)
   })[3]
   stopifnot(identical(p2, p))
    
   tim["sortorderdup2","prep"] <- tim["sortorderdup1","prep"]
   tim["sortorderdup2","use"] <- timefun({
    p2 <- sortorderdup(s, o, method=2)
   })[3]
   stopifnot(identical(p2, p))
    
   tim["orderdup1","prep"] <- timefun({
    o <- seq_along(x)
    ramorder(x, o, na.last=FALSE)
    nunique <- ordernut(x,o)[1]
   })[3]
   tim["orderdup1","use"] <- timefun({
    p2 <- orderdup(x, o, method=1)
   })[3]
   stopifnot(identical(p2, p))
   
   tim["orderdup2","prep"] <- tim["orderdup1","prep"]
   tim["orderdup2","use"] <- timefun({
    p2 <- orderdup(x, o, method=2)
   })[3]
   stopifnot(identical(p2, p))
   
   hashcache(x)
   tim["hash.cache","use"] <- timefun({
    p2 <- duplicated(x)
   })[3]
   stopifnot(identical(p2, p))
   remcache(x)

   sortordercache(x)
   tim["sortorder.cache","use"] <- timefun({
    p2 <- duplicated(x)
   })[3]
   stopifnot(identical(p2, p))
   remcache(x)

   ordercache(x)
   tim["order.cache","use"] <- timefun({
    p2 <- duplicated(x)
   })[3]
   stopifnot(identical(p2, p))
   remcache(x)
   
   if (plot){
    barplot(t(tim), cex.names=0.7)
    title(paste("duplicated(",n,")", sep=""))
   }
   
   ret[["duplicated",as.character(n)]] <- tim
  }
 }
 if ("unique" %in% what){
  message("unique: timings of different methods")
  N <- c(nsmall, nbig)
  for (i in seq_along(N)){
   n <- N[i]
   x <- c(sample(n, n-1, TRUE), NA)
   tim <- matrix(0, 15, 3)
   dimnames(tim) <- list(
   c("unique","unique.64","hashmapuni","hashuni","hashunikeep","sortuni","sortunikeep","orderuni","orderunikeep","hashdup","sortorderdup"
    ,"hash.cache","sort.cache","sortorder.cache","order.cache")
   , c("prep","both","use"))

   tim["unique","both"] <- timefun({
    p <- unique(x)
   })[3]
   x <- as.integer64(x)
   p <- as.integer64(p)
   if (uniorder=="values")
    ramsort(p, na.last=FALSE)

   tim["unique.64","both"] <- timefun({
    p2 <- unique(x, order=uniorder)
   })[3]
   if (uniorder!="any")
    stopifnot(identical.integer64(p2, p))

   tim["hashmapuni","both"] <- timefun({
    p2 <- hashmapuni(x)
   })[3]
   if (uniorder=="original")
    stopifnot(identical.integer64(p2, p))
   
   tim["hashuni","prep"] <- timefun({
    h <- hashmap(x)
    # for(r in 1:r)h <- hashmap(x, nunique=h$nunique)
   })[3]
   tim["hashuni","use"] <- timefun({
    p2 <- hashuni(h)
   })[3]
   if (uniorder=="values")
    stopifnot(identical.integer64(sort(p2, na.last=FALSE), p))
   
   tim["hashunikeep","prep"] <- tim["hashuni","prep"] 
   tim["hashunikeep","use"] <- timefun({
    p2 <- hashuni(h, keep.order=TRUE)
   })[3]
   if (uniorder=="original")
    stopifnot(identical.integer64(p2, p))

   tim["sortuni","prep"] <- timefun({
    s <- clone(x)
    ramsort(s, na.last=FALSE)
    nunique <- sortnut(s)[1]
   })[3]
   tim["sortuni","use"] <- timefun({
    p2 <- sortuni(s, nunique)
   })[3]
   if (uniorder=="values")
    stopifnot(identical.integer64(sort(p2, na.last=FALSE), p))
   
   tim["sortunikeep","prep"] <- timefun({
    s <- clone(x)
    o <- seq_along(x)
    ramsortorder(s, o, na.last=FALSE)
    nunique <- sortnut(s)[1]
   })[3]
   tim["sortunikeep","use"] <- timefun({
    p2 <- sortorderuni(x, s, o, nunique)
   })[3]
   if (uniorder=="original")
    stopifnot(identical.integer64(p2, p))
    
   tim["orderuni","prep"] <- timefun({
    o <- seq_along(x)
    ramorder(x, o, na.last=FALSE)
    nunique <- ordernut(x,o)[1]
   })[3]
   tim["orderuni","use"] <- timefun({
    p2 <- orderuni(x, o, nunique)
   })[3]
   if (uniorder=="values")
    stopifnot(identical.integer64(sort(p2, na.last=FALSE), p))
   
   tim["orderunikeep","prep"] <- tim["orderuni","prep"]
   tim["orderunikeep","use"] <- timefun({
    p2 <- orderuni(x, o, nunique, keep.order=TRUE)
    nunique <- ordernut(x,o)[1]
   })[3]
   if (uniorder=="original")
    stopifnot(identical.integer64(p2, p))

   tim["hashdup","prep"] <- tim["hashuni","prep"]
   tim["hashdup","use"] <- timefun({
    p2 <- x[!hashdup(h)]
   })[3]
   if (uniorder=="original")
    stopifnot(identical.integer64(p2, p))

   tim["sortorderdup","prep"] <- tim["sortunikeep","prep"]
   tim["sortorderdup","use"] <- timefun({
    p2 <- x[!sortorderdup(s, o)]
   })[3]
   if (uniorder=="original")
    stopifnot(identical.integer64(p2, p))

   
   hashcache(x)
   tim["hash.cache","use"] <- timefun({
    p2 <- unique(x, order=uniorder)
   })[3]
   if (uniorder!="any")
    stopifnot(identical.integer64(p2, p))
   remcache(x)

   sortcache(x)
   tim["sort.cache","use"] <- timefun({
    p2 <- unique(x, order=uniorder)
   })[3]
   if (uniorder!="any")
    stopifnot(identical.integer64(p2, p))
   remcache(x)

   sortordercache(x)
   tim["sortorder.cache","use"] <- timefun({
    p2 <- unique(x, order=uniorder)
   })[3]
   if (uniorder!="any")
    stopifnot(identical.integer64(p2, p))
   remcache(x)

   ordercache(x)
   tim["order.cache","use"] <- timefun({
    p2 <- unique(x, order=uniorder)
   })[3]
   if (uniorder!="any")
    stopifnot(identical.integer64(p2, p))
   remcache(x)
   
   if (plot){
    barplot(t(tim), cex.names=0.7)
    title(paste("unique(",n,", order=",uniorder,")", sep=""))
   }
   
   ret[["unique",as.character(n)]] <- tim
  }
 }
 if ("unipos" %in% what){
  message("unipos: timings of different methods")
  N <- c(nsmall, nbig)
  for (i in seq_along(N)){
   n <- N[i]
   x <- c(sample(n, n-1, TRUE), NA)
   tim <- matrix(0, 14, 3)
   dimnames(tim) <- list(
   c("unique","unipos.64","hashmapupo","hashupo","hashupokeep","sortorderupo","sortorderupokeep","orderupo","orderupokeep","hashdup","sortorderdup"
    ,"hash.cache","sortorder.cache","order.cache")
   , c("prep","both","use"))

   tim["unique","both"] <- timefun({
    unique(x)
   })[3]
   x <- as.integer64(x)

   tim["unipos.64","both"] <- timefun({
    p <- unipos(x, order=uniorder)
   })[3]

   tim["hashmapupo","both"] <- timefun({
    p2 <- hashmapupo(x)
   })[3]
   if (uniorder=="original")
    stopifnot(identical(p2, p))
   
   tim["hashupo","prep"] <- timefun({
    h <- hashmap(x)
    # if nunique is small we could re-build the hashmap at a smaller size
    # h <- hashmap(x, nunique=h$nunique)
   })[3]
   tim["hashupo","use"] <- timefun({
    p2 <- hashupo(h)
   })[3]
   if (uniorder=="values")
    stopifnot(identical(sort(p2, na.last=FALSE), sort(p, na.last=FALSE)))
   
   tim["hashupokeep","prep"] <- tim["hashupo","prep"] 
   tim["hashupokeep","use"] <- timefun({
    p2 <- hashupo(h, keep.order=TRUE)
   })[3]
   if (uniorder=="original")
    stopifnot(identical(p2, p))

   
   tim["sortorderupo","prep"] <- timefun({
    s <- clone(x)
    o <- seq_along(x)
    ramsortorder(s, o, na.last=FALSE)
    nunique <- sortnut(s)[1]
   })[3]
   tim["sortorderupo","use"] <- timefun({
    p2 <- sortorderupo(s, o, nunique)
   })[3]
   if (uniorder=="values")
    stopifnot(identical(p2, p))
    
   tim["sortorderupokeep","prep"] <- timefun({
    s <- clone(x)
    o <- seq_along(x)
    ramsortorder(s, o, na.last=FALSE)
    nunique <- sortnut(s)[1]
   })[3]
   tim["sortorderupokeep","use"] <- timefun({
    p2 <- sortorderupo(s, o, nunique, keep.order=TRUE)
   })[3]
   if (uniorder=="original")
    stopifnot(identical(p2, p))
    
   tim["orderupo","prep"] <- timefun({
    o <- seq_along(x)
    ramorder(x, o, na.last=FALSE)
    nunique <- ordernut(x,o)[1]
   })[3]
   tim["orderupo","use"] <- timefun({
    p2 <- orderupo(x, o, nunique)
   })[3]
   if (uniorder=="values")
    stopifnot(identical(p2, p))
   
   tim["orderupokeep","prep"] <- tim["orderupo","prep"]
   tim["orderupokeep","use"] <- timefun({
    p2 <- orderupo(x, o, nunique, keep.order=TRUE)
    nunique <- ordernut(x,o)[1]
   })[3]
   if (uniorder=="original")
    stopifnot(identical(p2, p))

   tim["hashdup","prep"] <- tim["hashupo","prep"]
   tim["hashdup","use"] <- timefun({
    p2 <- (1:n)[!hashdup(h)]
   })[3]
   if (uniorder=="original")
    stopifnot(identical(p2, p))

   tim["sortorderdup","prep"] <- tim["sortorderupokeep","prep"]
   tim["sortorderdup","use"] <- timefun({
    p2 <- (1:n)[!sortorderdup(s, o)]
   })[3]
   if (uniorder=="original")
    stopifnot(identical(p2, p))
   
   hashcache(x)
   tim["hash.cache","use"] <- timefun({
    p2 <- unipos(x, order=uniorder)
   })[3]
   if (uniorder!="any")
    stopifnot(identical(p2, p))
   remcache(x)

   sortordercache(x)
   tim["sortorder.cache","use"] <- timefun({
    p2 <- unipos(x, order=uniorder)
   })[3]
   if (uniorder!="any")
    stopifnot(identical(p2, p))
   remcache(x)

   ordercache(x)
   tim["order.cache","use"] <- timefun({
    p2 <- unipos(x, order=uniorder)
   })[3]
   if (uniorder!="any")
    stopifnot(identical(p2, p))
   remcache(x)
   
   if (plot){
    barplot(t(tim), cex.names=0.7)
    title(paste("unipos(",n,", order=",uniorder,")", sep=""))
   }
   
   ret[["unipos",as.character(n)]] <- tim
  }
 }
 if ("table" %in% what){
  message("table: timings of different methods")
  N <- c(nsmall, nbig)
  for (i in seq_along(N)){
   n <- N[i]
   x <- c(sample(1024, n-1, TRUE), NA)
   tim <- matrix(0, 13, 3)
   dimnames(tim) <- list(c("tabulate","table","table.64","hashmaptab","hashtab","hashtab2","sorttab","sortordertab","ordertab","ordertabkeep"
    ,"hash.cache","sort.cache","order.cache")
   , c("prep","both","use"))

   tim["tabulate","both"] <- timefun({
    tabulate(x)
   })[3]
   
   tim["table","both"] <- timefun({
    p <- table(x, exclude=NULL)
   })[3]
   p <- p[-length(p)]
   
   x <- as.integer64(x)

   tim["table.64","both"] <- timefun({
    p2 <- table.integer64(x, order=taborder)
   })[3]
   p2 <- p2[-1]
   stopifnot(identical(p2, p))

   tim["hashmaptab","both"] <- timefun({
    p <- hashmaptab(x)
   })[3]
   
   tim["hashtab","prep"] <- timefun({
    h <- hashmap(x)
   })[3]
   tim["hashtab","use"] <- timefun({
    p2 <- hashtab(h)
   })[3]
   stopifnot(identical(p2, p))
   
   tim["hashtab2","prep"] <- tim["hashtab","prep"] + timefun({
    h <- hashmap(x, nunique=h$nunique)
   })[3]
   tim["hashtab2","use"] <- timefun({
    p2 <- hashtab(h)
   })[3]
   
   sortp <- function(p){
    s <- p$values
    o <- seq_along(s)
    ramsortorder(s,o, na.last=FALSE)
    list(values=s, counts=p$counts[o])
   }
   p <- sortp(p)
   p2 <- sortp(p2)
   stopifnot(identical(p2, p))
   
   tim["sorttab","prep"] <- timefun({
    s <- clone(x)
    ramsort(s, na.last=FALSE)
    nunique <- sortnut(s)[1]
   })[3]
   tim["sorttab","use"] <- timefun({
    p2 <- list(values=sortuni(s, nunique), counts=sorttab(s, nunique))
   })[3]
   stopifnot(identical(p2, p))
    
   tim["sortordertab","prep"] <- timefun({
    s <- clone(x)
    o <- seq_along(x)
    ramsortorder(s, o, na.last=FALSE)
    nunique <- sortnut(s)[1]
  	})[3]
			tim["sortordertab","use"] <- timefun({
				p2 <- list(values=sortorderuni(x, s, o, nunique), counts=sortordertab(s, o))
			})[3]
			p2 <- sortp(p2)
			stopifnot(identical(p2, p))
				
			tim["ordertab","prep"] <- timefun({
				o <- seq_along(x)
				ramorder(x, o, na.last=FALSE)
				nunique <- ordernut(x, o)[1]
			})[3]
			tim["ordertab","use"] <- timefun({
				p2 <- list(values=orderuni(x, o, nunique), counts=ordertab(x, o, nunique))
			})[3]
			stopifnot(identical(p2, p))
				
			tim["ordertabkeep","prep"] <- tim["ordertab","prep"] 
			tim["ordertabkeep","use"] <- timefun({
				p2 <- list(values=orderuni(x, o, nunique, keep.order=TRUE), counts=ordertab(x, o, nunique, keep.order=TRUE))
			})[3]
			p2 <- sortp(p2)
			stopifnot(identical(p2, p))
			
			hashcache(x)
			tim["hash.cache","use"] <- timefun({
				p <- table.integer64(x, order=taborder)
			})[3]
			remcache(x)

			sortordercache(x)
			tim["sort.cache","use"] <- timefun({
				p2 <- table.integer64(x, order=taborder)
			})[3]
			stopifnot(identical(p2, p))
			remcache(x)

			ordercache(x)
			tim["order.cache","use"] <- timefun({
				p2 <- table.integer64(x, order=taborder)
			})[3]
			stopifnot(identical(p2, p))
			remcache(x)
			
			if (plot){
				barplot(t(tim), cex.names=0.7)
				title(paste("table.integer64(",n,", order=",taborder,")", sep=""))
			}
			
			ret[["table",as.character(n)]] <- tim
		}
	}
	if ("rank" %in% what){
		message("rank: timings of different methods")
		N <- c(nsmall, nbig)
		for (i in seq_along(N)){
			n <- N[i]
			x <- c(sample(n, n-1, TRUE), NA)
			tim <- matrix(0, 7, 3)
			dimnames(tim) <- list(c("rank","rank.keep","rank.64","sortorderrnk","orderrnk"
				,"sort.cache","order.cache")
			, c("prep","both","use"))

			tim["rank","both"] <- timefun({
				rank(x)
			})[3]
			
			tim["rank.keep","both"] <- timefun({
				p <- rank(x, na.last="keep")
			})[3]
			
			x <- as.integer64(x)

			tim["rank.64","both"] <- timefun({
				p2 <- rank.integer64(x)
			})[3]
			stopifnot(identical(p2, p))
				
			tim["sortorderrnk","prep"] <- timefun({
				s <- clone(x)
				o <- seq_along(x)
				na.count <- ramsortorder(s, o, na.last=FALSE)
			})[3]
			tim["sortorderrnk","use"] <- timefun({
				p2 <- sortorderrnk(s, o, na.count)
			})[3]
			stopifnot(identical(p2, p))
				
			tim["orderrnk","prep"] <- timefun({
				o <- seq_along(x)
				na.count <- ramorder(x, o, na.last=FALSE)
			})[3]
			tim["orderrnk","use"] <- timefun({
				p2 <- orderrnk(x, o, na.count)
			})[3]
			stopifnot(identical(p2, p))
				
			sortordercache(x)
			tim["sort.cache","use"] <- timefun({
				p2 <- rank.integer64(x)
			})[3]
			stopifnot(identical(p2, p))
			remcache(x)

			ordercache(x)
			tim["order.cache","use"] <- timefun({
				p2 <- rank.integer64(x)
			})[3]
			stopifnot(identical(p2, p))
			remcache(x)
			
			if (plot){
				barplot(t(tim), cex.names=0.7)
				title(paste("rank.integer64(",n,")", sep=""))
			}
			
			ret[["rank",as.character(n)]] <- tim
		}
	}
	if ("quantile" %in% what){
		message("quantile: timings of different methods")
		N <- c(nsmall, nbig)
		for (i in seq_along(N)){
			n <- N[i]
			x <- c(sample(n, n-1, TRUE), NA)
			tim <- matrix(0, 6, 3)
			dimnames(tim) <- list(c("quantile","quantile.64","sortqtl","orderqtl"
				,"sort.cache","order.cache")
			, c("prep","both","use"))

			tim["quantile","both"] <- timefun({
				p <- quantile(x, type=1, na.rm=TRUE)
			})[3]
			p2 <- p
			p <- as.integer64(p2)
			names(p) <- names(p2)
			
			x <- as.integer64(x)

			tim["quantile.64","both"] <- timefun({
				p2 <- quantile(x, na.rm=TRUE)
			})[3]
			stopifnot(identical(p2, p))
				
			tim["sortqtl","prep"] <- timefun({
				s <- clone(x)
				na.count <- ramsort(s, na.last=FALSE)
			})[3]
			tim["sortqtl","use"] <- timefun({
				p2 <- sortqtl(s, na.count, seq(0, 1, 0.25))
			})[3]
			stopifnot(identical(unname(p2), unname(p)))
				
			tim["orderqtl","prep"] <- timefun({
				o <- seq_along(x)
				na.count <- ramorder(x, o, na.last=FALSE)
			})[3]
			tim["orderqtl","use"] <- timefun({
				p2 <- orderqtl(x, o, na.count, seq(0, 1, 0.25))
			})[3]
			stopifnot(identical(unname(p2), unname(p)))
				
			sortordercache(x)
			tim["sort.cache","use"] <- timefun({
				p2 <- quantile(x, na.rm=TRUE)
			})[3]
			stopifnot(identical(p2, p))
			remcache(x)

			ordercache(x)
			tim["order.cache","use"] <- timefun({
				p2 <- quantile(x, na.rm=TRUE)
			})[3]
			stopifnot(identical(p2, p))
			remcache(x)
			
			if (plot){
				barplot(t(tim), cex.names=0.7)
				title(paste("quantile(",n,")", sep=""))
			}
			
			ret[["quantile",as.character(n)]] <- tim
		}
	}

	ret
	
}


#! \name{match.integer64}
#! \alias{match.integer64}
#! \alias{\%in\%.integer64}
#! \title{
#! 64-bit integer matching
#! }
#! \description{
#! \code{match} returns a vector of the positions of (first) matches of its first argument in its second. 
#! 
#! \code{\%in\%} is a more intuitive interface as a binary operator, which returns a logical vector indicating if there is a match or not for its left operand. 
#! 
#! }
#! \usage{
#! \method{match}{integer64}(x, table, nomatch = NA_integer_, nunique = NULL, method = NULL, ...)
#! \method{\%in\%}{integer64}(x, table, ...)
#! }
#! \arguments{
#!   \item{x}{
#! 	integer64 vector: the values to be matched, optionally carrying a cache created with \code{\link{hashcache}}
#! }
#!   \item{table}{
#! 	integer64 vector: the values to be matched against, optionally carrying a cache created with \code{\link{hashcache}} or \code{\link{sortordercache}}
#! }
#!   \item{nomatch}{
#!   the value to be returned in the case when no match is found. Note that it is coerced to integer.
#! }
#!   \item{nunique}{
#! 	NULL or the number of unique values of table (including NA). Providing \code{nunique} can speed-up matching when \code{table} has no cache. Note that a wrong nunique can cause undefined behaviour up to a crash.
#! }
#!   \item{method}{
#! 	NULL for automatic method selection or a suitable low-level method, see details
#! }
#!   \item{\dots}{
#! ignored
#! }
#! }
#! \details{
#!   These functions automatically choose from several low-level functions considering the size of \code{x} and \code{table} and the availability of caches. 
#! 
#! 
#!   Suitable methods for \code{\%in\%.integer64} are \code{\link{hashpos}} (hash table lookup), \code{\link{hashrev}} (reverse lookup), \code{\link{sortorderpos}} (fast ordering) and \code{\link{orderpos}} (memory saving ordering).
#!   Suitable methods for \code{match.integer64} are \code{\link{hashfin}} (hash table lookup), \code{\link{hashrin}} (reverse lookup), \code{\link{sortfin}} (fast sorting) and \code{\link{orderfin}} (memory saving ordering).
#! }
#! \value{
#!   A vector of the same length as \code{x}.
#! 
#!   \code{match}: An integer vector giving the position in \code{table} of
#!   the first match if there is a match, otherwise \code{nomatch}.
#! 
#!   If \code{x[i]} is found to equal \code{table[j]} then the value
#!   returned in the \code{i}-th position of the return value is \code{j},
#!   for the smallest possible \code{j}.  If no match is found, the value
#!   is \code{nomatch}.
#! 
#!   \code{\%in\%}: A logical vector, indicating if a match was located for
#!   each element of \code{x}: thus the values are \code{TRUE} or
#!   \code{FALSE} and never \code{NA}.
#! }
#! \author{
#! 	Jens Oehlschlägel <Jens.Oehlschlaegel@truecluster.com>
#! }
#! \seealso{
#! 	\code{\link{match}}
#! }
#! \examples{
#! x <- as.integer64(c(NA, 0:9), 32)
#! table <- as.integer64(c(1:9, NA))
#! match.integer64(x, table)
#! "\%in\%.integer64"(x, table)
#!
#! x <- as.integer64(sample(c(rep(NA, 9), 0:9), 32, TRUE))
#! table <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE))
#! stopifnot(identical(match.integer64(x, table), match(as.integer(x), as.integer(table))))
#! stopifnot(identical("\%in\%.integer64"(x, table), as.integer(x) \%in\% as.integer(table)))
#! 
#! \dontrun{
#! 	message("check when reverse hash-lookup beats standard hash-lookup")
#! 	e <- 4:24
#! 	timx <- timy <- matrix(NA, length(e), length(e), dimnames=list(e,e))
#! 	for (iy in seq_along(e))
#! 	for (ix in 1:iy){
#! 		nx <- 2^e[ix]
#! 		ny <- 2^e[iy]
#! 		x <- as.integer64(sample(ny, nx, FALSE))
#! 		y <- as.integer64(sample(ny, ny, FALSE))
#! 		#hashfun(x, bits=as.integer(5))
#! 		timx[ix,iy] <- repeat.time({
#! 		hx <- hashmap(x)
#! 		py <- hashrev(hx, y)
#! 		})[3]
#! 		timy[ix,iy] <- repeat.time({
#! 		hy <- hashmap(y)
#! 		px <- hashpos(hy, x)
#! 		})[3]
#! 		#identical(px, py)
#! 		print(round(timx[1:iy,1:iy]/timy[1:iy,1:iy], 2), na.print="")
#! 	}
#!
#! 	message("explore best low-level method given size of x and table")
#! 	B1 <- 1:27
#! 	B2 <- 1:27
#! 	tim <- array(NA, dim=c(length(B1), length(B2), 5)
#!  , dimnames=list(B1, B2, c("hashpos","hashrev","sortpos1","sortpos2","sortpos3")))
#! 	for (i1 in B1)
#! 	for (i2 in B2)
#! 	{
#! 	  b1 <- B1[i1]
#! 	  b2 <- B1[i2]
#! 	  n1 <- 2^b1
#! 	  n2 <- 2^b2
#! 	  x1 <- as.integer64(c(sample(n2, n1-1, TRUE), NA))
#! 	  x2 <- as.integer64(c(sample(n2, n2-1, TRUE), NA))
#! 	  tim[i1,i2,1] <- repeat.time({h <- hashmap(x2);hashpos(h, x1);rm(h)})[3]
#! 	  tim[i1,i2,2] <- repeat.time({h <- hashmap(x1);hashrev(h, x2);rm(h)})[3]
#! 	  s <- clone(x2); o <- seq_along(s); ramsortorder(s, o)
#! 	  tim[i1,i2,3] <- repeat.time(sortorderpos(s, o, x1, method=1))[3]
#! 	  tim[i1,i2,4] <- repeat.time(sortorderpos(s, o, x1, method=2))[3]
#! 	  tim[i1,i2,5] <- repeat.time(sortorderpos(s, o, x1, method=3))[3]
#! 	  rm(s,o)
#! 	  print(apply(tim, 1:2, function(ti)if(any(is.na(ti)))NA else which.min(ti)))
#! 	}
#! }
#! }
#! \keyword{manip}
#! \keyword{logic}


match.integer64 <- function(x, table, nomatch = NA_integer_, nunique=NULL, method=NULL, ...){
  stopifnot(is.integer64(x))
  table <- as.integer64(table)
  c <- cache(table)
  if (is.null(method)){
    if (is.null(c)){
			nx <- length(x)
			if (is.null(nunique))
				nunique <- length(table)
			btable <- as.integer(ceiling(log2(nunique*1.5)))
			bx <- as.integer(ceiling(log2(nx*1.5)))
			if (bx<=17 && btable>=16){
				method <- "hashrev"
			}else{
				method <- "hashpos"
			}
	}else{
		if (exists("hashmap", envir=c, inherits=FALSE)){
			method <- "hashpos"
		}else if (exists("sort", envir=c, inherits=FALSE) && exists("order", envir=c, inherits=FALSE) && (length(table)>length(x) || length(x)<4096)){
			method <- "sortorderpos"
		}else if (exists("order", envir=c, inherits=FALSE) && (length(table)>length(x) || length(x)<4096)){
			method <- "orderpos"
		}else{
			nx <- length(x)
			if (is.null(nunique)){
			  if (exists("nunique", envir=c, inherits=FALSE))
				nunique <- c$nunique
			  else
				nunique <- length(table)
			}
			btable <- as.integer(ceiling(log2(nunique*1.5)))
			bx <- as.integer(ceiling(log2(nx*1.5)))
			if (bx<=17 && btable>=16){
				method <- "hashrev"
			}else{
				method <- "hashpos"
			}
		}
	}
  }
  switch(method
  , hashpos={
			if (is.null(c) || !exists("hashmap", envir=c, inherits=FALSE)){
				if (exists("btable", inherits=FALSE))
					h <- hashmap(table, hashbits=btable)
				else{
					if (is.null(nunique))
						nunique <- c$nunique
					h <- hashmap(table, nunique=nunique)
				}
			}else
				h <- c
			p <- hashpos(h, x, nomatch=nomatch)
    }
  , hashrev={
		c <- cache(x)
		if (is.null(c) || !exists("hashmap", envir=c, inherits=FALSE)){
				if (exists("bx", inherits=FALSE))
					h <- hashmap(x, bits=bx)
				else{
					if (is.null(nunique))
						nunique <- c$nunique
					h <- hashmap(x, nunique=nunique)
				}
			}else
				h <- c
		p <- hashrev(h, table, nomatch=nomatch)
    }
  , sortorderpos={
		if (is.null(c) || !exists("sort", c) || !exists("order", c)){
			s <- clone(table)
			o <- seq_along(s)
			ramsortorder(s, o, na.last=FALSE)
		}else{
			s <- get("sort", c)
			o <- get("order", c)
		}
		p <- sortorderpos(s, o, x, nomatch=nomatch)
    }
  , orderpos={
		if (is.null(c) || !exists("order", c)){
			o <- seq_along(s)
			ramorder(table, o, na.last=FALSE)
		}else{
			o <- get("order", c)
		}
		p <- orderpos(table, o, x, nomatch=nomatch)
    }
  , stop("unknown method")
  )
  p
}


"%in%.integer64" <- function(x, table, ...){
  stopifnot(is.integer64(x))
  table <- as.integer64(table)
	nunique <- NULL
	method <- NULL
  c <- cache(table)
  if (is.null(method)){
    if (is.null(c)){
			nx <- length(x)
			if (is.null(nunique))
				nunique <- length(table)
			btable <- as.integer(ceiling(log2(nunique*1.5)))
			bx <- as.integer(ceiling(log2(nx*1.5)))
			if (bx<=17 && btable>=16){
				method <- "hashrin"
			}else{
				method <- "hashfin"
			}
	}else{
		if (exists("hashmap", envir=c, inherits=FALSE)){
			method <- "hashfin"
		}else if (exists("sort", envir=c, inherits=FALSE) && (length(table)>length(x) || length(x)<4096)){
			method <- "sortfin"
		}else if (exists("order", envir=c, inherits=FALSE) && (length(table)>length(x) || length(x)<4096)){
			method <- "orderfin"
		}else{
			nx <- length(x)
			if (is.null(nunique)){
			  if (exists("nunique", envir=c, inherits=FALSE))
				nunique <- c$nunique
			  else
				nunique <- length(table)
			}
			btable <- as.integer(ceiling(log2(nunique*1.5)))
			bx <- as.integer(ceiling(log2(nx*1.5)))
			if (bx<=17 && btable>=16){
				method <- "hashrin"
			}else{
				method <- "hashfin"
			}
		}
	}
  }
  switch(method
  , hashfin={
		if (is.null(c) || !exists("hashmap", envir=c, inherits=FALSE)){
			if (exists("btable", inherits=FALSE))
				h <- hashmap(table, hashbits=btable)
			else{
				if (is.null(nunique))
					nunique <- c$nunique
				h <- hashmap(table, nunique=nunique)
			}
		}else
			h <- c
		p <- hashfin(h, x)
    }
  , hashrin={
		c <- cache(x)
		if (is.null(c) || !exists("hashmap", envir=c, inherits=FALSE)){
				if (exists("bx", inherits=FALSE))
					h <- hashmap(x, bits=bx)
				else{
					if (is.null(nunique))
						nunique <- c$nunique
					h <- hashmap(x, nunique=nunique)
				}
		}else
			h <- c
		p <- hashrin(h, table)
    }
  , sortfin={
		if (is.null(c) || !exists("sort", c)){
			s <- clone(table)
			ramsort(s, na.last=FALSE)
		}else{
			s <- get("sort", c)
		}
		p <- sortfin(s, x)
    }
  , orderfin={
		if (is.null(c) || !exists("order", c)){
			o <- seq_along(s)
			ramorder(table, o, na.last=FALSE)
		}else{
			o <- get("order", c)
		}
		p <- orderfin(table, o, x)
    }
  , stop("unknown method")
  )
  p
}

#! \name{duplicated.integer64}
#! \alias{duplicated.integer64}
#! \title{Determine Duplicate Elements of integer64}
#! \description{
#!   \code{duplicated()} determines which elements of a vector or data frame are duplicates
#!   of elements with smaller subscripts, and returns a logical vector
#!   indicating which elements (rows) are duplicates.
#! }
#! \usage{
#! \method{duplicated}{integer64}(x, incomparables = FALSE, nunique = NULL, method = NULL, \dots)
#! }
#! \arguments{
#!   \item{x}{a vector or a data frame or an array or \code{NULL}.}
#!   \item{incomparables}{ignored}
#!   \item{nunique}{
#! 	NULL or the number of unique values (including NA). Providing \code{nunique} can speed-up matching when \code{x} has no cache. Note that a wrong nunique can cause undefined behaviour up to a crash.
#! }
#!   \item{method}{
#! 	NULL for automatic method selection or a suitable low-level method, see details
#! }
#!   \item{\dots}{ignored}
#! }
#! \details{
#!   This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. 
#! 
#!   Suitable methods are \code{\link{hashdup}} (hashing), \code{\link{sortorderdup}} (fast ordering) and \code{\link{orderdup}} (memory saving ordering).
#! }
#! \value{
#!     \code{duplicated()}: a logical vector of the same length as \code{x}.  
#! }
#! \author{
#! 	Jens Oehlschlägel <Jens.Oehlschlaegel@truecluster.com>
#! }
#! \seealso{ \code{\link{duplicated}}, \code{\link{unique.integer64}}  }
#! \examples{
#! x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE))
#! duplicated(x)
#! 
#! stopifnot(identical(duplicated(x),  duplicated(as.integer(x))))
#! }
#! \keyword{logic}
#! \keyword{manip}
#! 

duplicated.integer64 <- function(x
, incomparables = FALSE  # dummy parameter
, nunique = NULL
, method = NULL
, ...
){
  stopifnot(identical(incomparables, FALSE))
  c <- cache(x)
  if (is.null(nunique) && !is.null(c))
	nunique <- c$nunique
  if (is.null(method)){
    if (is.null(c)){
		if (length(x)>5e7)
			method <- "sortorderdup"
		else
			method <- "hashdup"
	}else{
		if (exists("sort", envir=c, inherits=FALSE) && exists("order", envir=c, inherits=FALSE))
			method <- "sortorderdup"
		else if (exists("hashmap", envir=c, inherits=FALSE))
			method <- "hashdup"
		else if (exists("order", envir=c, inherits=FALSE))
			method <- "orderdup"
		else if (length(x)>5e7)
			method <- "sortorderdup"
		else
			method <- "hashdup"
	}
  }
  switch(method
  , hashdup={
		if (is.null(c) || !exists("hashmap", envir=c, inherits=FALSE))
			h <- hashmap(x, nunique=nunique)
		else
			h <- c
		p <- hashdup(h)
    }
  , sortorderdup={
		if (is.null(c) || !exists("sort", c, inherits=FALSE) || !exists("order", c, inherits=FALSE)){
			s <- clone(x)
			o <- seq_along(s)
			ramsortorder(s, o, na.last=FALSE)
		}else{
			s <- get("sort", c, inherits=FALSE)
			o <- get("order", c, inherits=FALSE)
		}
		p <- sortorderdup(s, o)
    }
  , orderdup={
		if (is.null(c) || !exists("order", c, inherits=FALSE)){
			o <- seq_along(s)
			ramorder(x, o, na.last=FALSE)
		}else{
			o <- get("order", c, inherits=FALSE)
		}
		p <- orderdup(x, o)
    }
  , stop("unknown method", method)
  )
  p
}


#! \name{unique.integer64}
#! \alias{unique.integer64}
#! \title{Extract Unique Elements from integer64}
#! \description{
#!   \code{unique} returns a vector like \code{x} but with duplicate elements/rows removed.
#! }
#! \usage{
#! \method{unique}{integer64}(x, incomparables = FALSE, order = c("original","values","any")
#! , nunique = NULL, method = NULL, \dots)
#! }
#! \arguments{
#!   \item{x}{a vector or a data frame or an array or \code{NULL}.}
#!   \item{incomparables}{ignored}
#!   \item{order}{The order in which unique values will be returned, see details}
#!   \item{nunique}{
#! 	NULL or the number of unique values (including NA). Providing \code{nunique} can speed-up matching when \code{x} has no cache. Note that a wrong nunique can cause undefined behaviour up to a crash.
#! }
#!   \item{method}{
#! 	NULL for automatic method selection or a suitable low-level method, see details
#! }
#!   \item{\dots}{ignored}
#! }
#! \details{
#!   This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. 
#!   Suitable methods are \code{\link{hashmapuni}} (simultaneously creating and using a hashmap)
#! , \code{\link{hashuni}} (first creating a hashmap then using it)
#! , \code{\link{sortuni}} (fast sorting for sorted order only)
#! , \code{\link{sortorderuni}} (fast ordering for original order only) 
#! and \code{\link{orderuni}} (memory saving ordering).
#! \cr
#! The default \code{order="original"} returns unique values in the order of the first appearance in \code{x} like in \code{\link{unique}}, this costs extra processing. 
#! \code{order="values"} returns unique values in sorted order like in \code{\link{table}}, this costs extra processing with the hash methods but comes for free. 
#! \code{order="any"} returns unique values in undefined order, possibly faster. For hash methods this will be a quasi random order, for sort methods this will be sorted order.
#! }
#! \value{
#!   For a vector, an object of the same type of \code{x}, but with only
#!   one copy of each duplicated element.  No attributes are copied (so
#!   the result has no names).
#! }
#! \author{
#! 	Jens Oehlschlägel <Jens.Oehlschlaegel@truecluster.com>
#! }
#! \seealso{
#!   \code{\link{unique}} for the generic, \code{\link{unipos}} which gives the indices of the unique
#!   elements and \code{\link{table.integer64}} which gives frequencies of the unique elements.
#! }
#! \examples{
#! x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE))
#! unique(x)
#! unique(x, order="values")
#! 
#! stopifnot(identical(unique(x),  x[!duplicated(x)]))
#! stopifnot(identical(unique(x),  as.integer64(unique(as.integer(x)))))
#! stopifnot(identical(unique(x, order="values")
#! ,  as.integer64(sort(unique(as.integer(x)), na.last=FALSE))))
#! }
#! \keyword{manip}
#! \keyword{logic}


unique.integer64 <- function(x
, incomparables = FALSE  # dummy parameter
, order = c("original","values","any")
, nunique = NULL
, method = NULL
, ...
){
  stopifnot(identical(incomparables, FALSE))
  order <- match.arg(order)
  c <- cache(x)
  keep.order <- order == "original"
  if (is.null(nunique) && !is.null(c))
	nunique <- c$nunique
  if (is.null(method)){
    if (is.null(c)){
		if (order=="values")
			method <- "sortuni"
		else
			method <- "hashmapuni"
	}else{
		switch(order
		, "original" = {
			if (exists("hashmap", envir=c, inherits=FALSE))
				method <- "hashuni"
			else if (exists("order", envir=c, inherits=FALSE)){
				if (exists("sort", envir=c, inherits=FALSE))
					method <- "sortorderuni"
				else
					method <- "orderuni"
			}else
				method <- "hashmapuni"
		}
		, "values" = {
			if (exists("sort", envir=c, inherits=FALSE))
				method <- "sortuni"
			else if (exists("order", envir=c, inherits=FALSE))
				method <- "orderuni"
			else if (exists("hashmap", envir=c, inherits=FALSE) && c$nunique<length(x)/2)
				method <- "hashuni"
			else
				method <- "sortuni"
		}
		, "any" = {
			if (exists("sort", envir=c, inherits=FALSE))
				method <- "sortuni"
			else if (exists("hashmap", envir=c, inherits=FALSE))
				method <- "hashuni"
			else if (exists("order", envir=c, inherits=FALSE))
				method <- "orderuni"
			else
				method <- "sortuni"
		}
		)
	}
  }
  switch(method
  , hashmapuni={
		p <- hashmapuni(x, nunique=nunique)
    }
  , hashuni={
		if (is.null(c) || !exists("hashmap", envir=c, inherits=FALSE))
			h <- hashmap(x, nunique=nunique)
		else
			h <- c
		p <- hashuni(h, keep.order=keep.order)
		if (order=="values")
			ramsort(p, na.last=FALSE)
    }
  , sortuni={
		if (is.null(c) || !exists("sort", c, inherits=FALSE)){
			s <- clone(x)
			ramsort(s, na.last=FALSE)
		}else{
			s <- get("sort", c, inherits=FALSE)
		}
		if (is.null(nunique))
			nunique <- sortnut(s)[1]
		p <- sortuni(s, nunique)
    }
  , sortorderuni={
		if (is.null(c) || !exists("sort", c, inherits=FALSE) || !exists("order", c, inherits=FALSE)){
			s <- clone(x)
			o <- seq_along(x)
			ramsortorder(s, o, na.last=FALSE)
		}else{
			s <- get("sort", c, inherits=FALSE)
			o <- get("order", c, inherits=FALSE)
		}
		if (is.null(nunique))
			nunique <- sortnut(s)[1]
		p <- sortorderuni(x, s, o, nunique)
    }
  , orderuni={
		if (is.null(c) || !exists("order", c, inherits=FALSE)){
			o <- seq_along(x)
			ramorder(x, o, na.last=FALSE)
		}else{
			o <- get("order", c, inherits=FALSE)
		}
		if (is.null(nunique))
			nunique <- ordernut(x, o)[1]
		p <- orderuni(x, o, nunique, keep.order=keep.order)
    }
  , stop("unknown method", method)
  )
  p
}


#! \name{unipos}
#! \alias{unipos}
#! \alias{unipos.integer64}
#! \title{Extract Positions of Unique Elements}
#! \description{
#!   \code{unipos} returns the positions of those elements returned by \code{\link{unique}}.
#! }
#! \usage{
#! unipos(x, incomparables = FALSE, order = c("original","values","any"), \dots)
#! \method{unipos}{integer64}(x, incomparables = FALSE, order = c("original","values","any")
#! , nunique = NULL, method = NULL, \dots)
#! }
#! \arguments{
#!   \item{x}{a vector or a data frame or an array or \code{NULL}.}
#!   \item{incomparables}{ignored}
#!   \item{order}{The order in which positions of unique values will be returned, see details}
#!   \item{nunique}{
#! 	NULL or the number of unique values (including NA). Providing \code{nunique} can speed-up when \code{x} has no cache. Note that a wrong nunique can cause undefined behaviour up to a crash.
#! }
#!   \item{method}{
#! 	NULL for automatic method selection or a suitable low-level method, see details
#! }
#!   \item{\dots}{ignored}
#! }
#! \details{
#!   This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. 
#!   Suitable methods are \code{\link{hashmapupo}} (simultaneously creating and using a hashmap)
#! , \code{\link{hashupo}} (first creating a hashmap then using it)
#! , \code{\link{sortorderupo}} (fast ordering) 
#! and \code{\link{orderupo}} (memory saving ordering).
#! \cr
#! The default \code{order="original"} collects unique values in the order of the first appearance in \code{x} like in \code{\link{unique}}, this costs extra processing. 
#! \code{order="values"} collects unique values in sorted order like in \code{\link{table}}, this costs extra processing with the hash methods but comes for free. 
#! \code{order="any"} collects unique values in undefined order, possibly faster. For hash methods this will be a quasi random order, for sort methods this will be sorted order.
#! }
#! \value{
#!   an integer vector of positions
#! }
#! \author{
#! 	Jens Oehlschlägel <Jens.Oehlschlaegel@truecluster.com>
#! }
#! \seealso{
#!   \code{\link{unique.integer64}} for unique values and \code{\link{match.integer64}} for general matching.
#! }
#! \examples{
#! x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE))
#! unipos(x)
#! unipos(x, order="values")
#! 
#! stopifnot(identical(unipos(x),  (1:length(x))[!duplicated(x)]))
#! stopifnot(identical(unipos(x),  match.integer64(unique(x), x)))
#! stopifnot(identical(unipos(x, order="values"),  match.integer64(unique(x, order="values"), x)))
#! stopifnot(identical(unique(x),  x[unipos(x)]))
#! stopifnot(identical(unique(x, order="values"),  x[unipos(x, order="values")]))
#! }
#! \keyword{manip}
#! \keyword{logic}


unipos <- function(x, incomparables = FALSE, order = c("original","values","any"), ...)UseMethod("unipos")
unipos.integer64 <- function(x
, incomparables = FALSE  # dummy parameter
, order = c("original","values","any")
, nunique = NULL
, method = NULL
, ...
){
  stopifnot(identical(incomparables, FALSE))
  order <- match.arg(order)
  c <- cache(x)
  keep.order <- order == "original"
  if (is.null(nunique) && !is.null(c))
	nunique <- c$nunique
  if (is.null(method)){
    if (is.null(c)){
		if (order=="values")
			method <- "sortorderupo"
		else
			method <- "hashmapupo"
	}else{
		switch(order
		, "original" = {
			if (exists("hashmap", envir=c, inherits=FALSE))
				method <- "hashupo"
			else if (exists("order", envir=c, inherits=FALSE)){
				if (exists("sort", envir=c, inherits=FALSE))
					method <- "sortorderupo"
				else
					method <- "orderupo"
			}else
				method <- "hashmapupo"
		}
		, "values" = {
			if (exists("order", envir=c, inherits=FALSE)){
				if (exists("sort", envir=c, inherits=FALSE))
					method <- "sortorderupo"
				else
					method <- "orderupo"
			}else if (exists("hashmap", envir=c, inherits=FALSE) && c$nunique<length(x)/2)
				method <- "hashupo"
			else
				method <- "sortorderupo"
		}
		, "any" = {
			if (exists("sort", envir=c, inherits=FALSE) && exists("order", envir=c, inherits=FALSE))
				method <- "sortorderupo"
			else if (exists("hashmap", envir=c, inherits=FALSE))
				method <- "hashupo"
			else if (exists("order", envir=c, inherits=FALSE))
				method <- "orderupo"
			else
				method <- "sortorderupo"
		}
		)
	}
  }
  switch(method
  , hashmapupo={
		p <- hashmapupo(x, nunique=nunique)
    }
  , hashupo={
		if (is.null(c) || !exists("hashmap", envir=c, inherits=FALSE))
			h <- hashmap(x, nunique=nunique)
		else
			h <- c
		p <- hashupo(h, keep.order=keep.order)
		if (order=="values"){
			s <- x[p]
			ramsortorder(s, p, na.last=FALSE)
		}
    }
  , sortorderupo={
		if (is.null(c) || !exists("sort", c, inherits=FALSE) || !exists("order", c, inherits=FALSE)){
			s <- clone(x)
			o <- seq_along(x)
			ramsortorder(s, o, na.last=FALSE)
		}else{
			s <- get("sort", c, inherits=FALSE)
			o <- get("order", c, inherits=FALSE)
		}
		if (is.null(nunique))
			nunique <- sortnut(s)[1]
		p <- sortorderupo(s, o, nunique, keep.order=keep.order)
    }
  , orderupo={
		if (is.null(c) || !exists("order", c, inherits=FALSE)){
			o <- seq_along(x)
			ramorder(x, o, na.last=FALSE)
		}else{
			o <- get("order", c, inherits=FALSE)
		}
		if (is.null(nunique))
			nunique <- ordernut(x, o)[1]
		p <- orderupo(x, o, nunique, keep.order=keep.order)
    }
  , stop("unknown method", method)
  )
  p
}



#! \name{table.integer64}
#! \title{Cross Tabulation and Table Creation for integer64}
#! \alias{table.integer64}
#! 
#! \concept{counts}
#! \concept{frequencies}
#! \concept{occurrences}
#! \concept{contingency table}
#! 
#! \description{
#!   \code{table.integer64} uses the cross-classifying integer64 vectors to build a contingency
#!   table of the counts at each combination of vector values.
#! }
#! \usage{
#! table.integer64(\dots
#! , return = c("table","data.frame","list")
#! , order = c("values","counts")
#! , nunique = NULL
#! , method = NULL
#! , dnn = list.names(...), deparse.level = 1
#! ) 
#! }
#! \arguments{
#!   \item{\dots}{one or more objects which can be interpreted as factors
#!     (including character strings), or a list (or data frame) whose
#!     components can be so interpreted.  (For \code{as.table} and
#!     \code{as.data.frame}, arguments passed to specific methods.)}
#!   \item{nunique}{
#! 	NULL or the number of unique values of table (including NA). Providing \code{nunique} can speed-up matching when \code{table} has no cache. Note that a wrong nunique can cause undefined behaviour up to a crash.
#! }
#!   \item{order}{
#! 	By default results are created sorted by "values", or by "counts"
#! }
#!   \item{method}{
#! 	NULL for automatic method selection or a suitable low-level method, see details
#! }
#!   \item{return}{
#!      choose the return format, see details
#! }
#!   \item{dnn}{the names to be given to the dimensions in the result (the
#!     \emph{dimnames names}).}
#!   \item{deparse.level}{controls how the default \code{dnn} is
#!     constructed.  See \sQuote{Details}.}
#! }
#! \details{
#!   This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. 
#!   Suitable methods are \code{\link{hashmaptab}} (simultaneously creating and using a hashmap)
#! , \code{\link{hashtab}} (first creating a hashmap then using it)
#! , \code{\link{sortordertab}} (fast ordering) 
#! and \code{\link{ordertab}} (memory saving ordering).
#! \cr
#!   If the argument \code{dnn} is not supplied, the internal function
#!   \code{list.names} is called to compute the \sQuote{dimname names}.  If the
#!   arguments in \code{\dots} are named, those names are used.  For the
#!   remaining arguments, \code{deparse.level = 0} gives an empty name,
#!   \code{deparse.level = 1} uses the supplied argument if it is a symbol,
#!   and \code{deparse.level = 2} will deparse the argument.
#! 
#!   Arguments \code{exclude}, \code{useNA}, are not supported, i.e. \code{NA}s are always tabulated, and, different from \code{\link{table}} they are sorted first if \code{order="values"}. 
#! }
#! \value{
#!   By default (with \code{return="table"}) \code{\link{table}} returns a \emph{contingency table}, an object of
#!   class \code{"table"}, an array of integer values. Note that unlike S the result is always an array, a 1D array if one factor is given. Note also that for multidimensional arrays this is a \emph{dense} return structure which can dramatically increase RAM requirements (for large arrays with high mutual information, i.e. many possible input combinations of which only few occur) and that \code{\link{table}} is limited to \code{2^31} possible combinations (e.g. two input vectors with 46340 unique values only). Finally note that the tabulated values or value-combinations are represented as \code{dimnames} and that the implied conversion of values to strings can cause \emph{severe} performance problems since each string needs to be integrated into R's global string cache. 
#!   \cr
#!   You can use the other \code{return=} options to cope with these problems, the potential combination limit is increased from \code{2^31} to \code{2^63} with these options, RAM is only rewquired for observed combinations and string conversion is avoided. 
#!   \cr
#!   With \code{return="data.frame"} you get a \emph{dense} representation as a \code{\link{data.frame}} (like that resulting from \code{as.data.frame(table(...))}) where only observed combinations are listed (each as a data.frame row) with the corresponding frequency counts (the latter as component
#!   named by \code{responseName}).  This is the inverse of \code{\link{xtabs}}..
#!   \cr
#!   With \code{return="list"} you also get a \emph{dense} representation as a simple \code{\link{list}} with components 
#!   \item{values }{a integer64 vector of the technically tabulated values, for 1D this is the tabulated values themselves, for kD these are the values representing the potential combinations of input values}
#!   \item{counts}{the frequency counts}
#!   \item{dims}{only for kD: a list with the vectors of the unique values of the input dimensions}
#! }
#! \note{
#!   Note that by using \code{\link{as.integer64.factor}} we can also input 
#!   factors into \code{table.integer64} -- only the \code{\link{levels}} get lost.
#!  \cr
#!   Note that because of the existence of \code{\link{as.factor.integer64}} 
#! the standard \code{\link{table}} function -- within its limits -- can also be used 
#! for \code{\link{integer64}}, and especially for combining \code{\link{integer64}} input 
#! with other data types.
#! }
#! \seealso{
#!   \code{\link{table}} for more info on the standard version coping with Base R's data types, \code{\link{tabulate}} which can faster tabulate \code{\link{integer}s} with a limited range \code{[1L .. nL not too big]}, \code{\link{unique.integer64}} for the unique values without counting them and \code{\link{unipos.integer64}} for the positions of the unique values. 
#! }
#! \examples{
#! message("pure integer64 examples")
#! x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE))
#! y <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE))
#! z <- sample(c(rep(NA, 9), letters), 32, TRUE)
#! table.integer64(x)
#! table.integer64(x, order="counts")
#! table.integer64(x, y)
#! table.integer64(x, y, return="data.frame")
#!
#! message("via as.integer64.factor we can use 'table.integer64' also for factors")
#! table.integer64(x, as.integer64(as.factor(z)))
#! 
#! message("via as.factor.integer64 we can also use 'table' for integer64")
#! table(x)
#! table(x, exclude=NULL)
#! table(x, z, exclude=NULL)
#!
#! \dontshow{
#!  stopifnot(identical(table.integer64(as.integer64(c(1,1,2))), table(c(1,1,2))))
#!  stopifnot(identical(table.integer64(as.integer64(c(1,1,2)),as.integer64(c(3,4,4))), table(c(1,1,2),c(3,4,4))))
#!  message("the following works with three warnings due to coercion")
#!  stopifnot(identical(table.integer64(c(1,1,2)), table(c(1,1,2))))
#!  stopifnot(identical(table.integer64(as.integer64(c(1,1,2)),c(3,4,4)), table(c(1,1,2),c(3,4,4))))
#!  stopifnot(identical(table.integer64(c(1,1,2),as.integer64(c(3,4,4))), table(c(1,1,2),c(3,4,4))))
#!  message("the following works because of as.factor.integer64")
#!  stopifnot(identical(table(as.integer64(c(1,1,2))), table(c(1,1,2))))  
#!  stopifnot(identical(table(as.integer64(c(1,1,2)),as.integer64(c(3,4,4))), table(c(1,1,2),c(3,4,4))))
#!  stopifnot(identical(table(as.integer64(c(1,1,2)),c(3,4,4)), table(c(1,1,2),c(3,4,4))))
#!  stopifnot(identical(table(c(1,1,2),as.integer64(c(3,4,4))), table(c(1,1,2),c(3,4,4))))
#! }
#!
#! }
#! \keyword{category}


table.integer64 <- function(
  ...
, return = c("table","data.frame","list")
, order = c("values","counts")
, nunique = NULL
, method = NULL
, dnn = list.names(...), deparse.level = 1
){
  order <- match.arg(order)
  return <- match.arg(return)
  # this is taken from 'table'
	list.names <- function(...){
        l <- as.list(substitute(list(...)))[-1L]
        nm <- names(l)
        fixup <- if (is.null(nm)) 
            seq_along(l)
        else nm == ""
        dep <- vapply(l[fixup], function(x) switch(deparse.level + 
            1, "", if (is.symbol(x)) as.character(x) else "", 
            deparse(x, nlines = 1)[1L]), "")
        if (is.null(nm)) 
            dep
        else {
            nm[fixup] <- dep
            nm
        }
    }  

	# COPY ON MODIFY is broken for reading from list(...)
	# because list(...) creates a copy of all ... and this invalidates our caches
	# therefore we go this sick workaround
	argsymbols <- as.list(substitute(list(...)))[-1L]
	argframe <- parent.frame()
	A <- function(i)eval(argsymbols[[i]], argframe)
	N <- length(argsymbols)
	if (!N) 
		stop("nothing to tabulate")
	if (N == 1L && is.list(A(1L))){
		args <- A(1L)
		if (length(dnn) != length(args)) 
            dnn <- if (!is.null(argn <- names(args))) argn
				else paste(dnn[1L], seq_along(args), sep = ".")		
		N <- length(args)
		A <- function(i)args[[i]]
	}
	force(dnn)
		
	if (N==1L){
		x <- A(1L)
			if (!is.integer64(x)){
				warning("coercing first argument to integer64")
				x <- as.integer64(x)
			}
	}else{
		a <- A(1L)
		n <- length(a)
		nu <- integer(N)
		d <- integer64(N+1); d[[1]] <- 1L
		dims <- vector("list", N)
		names(dims) <- dnn
		for (i in 1:N){
			a <- A(i)
			if (length(a) != n) 
				stop("all input vectors must have the same length")
			if (!is.integer64(a)){
				warning("coercing argument ", i, " to integer64")
				a <- as.integer64(a)
			}
			c <- cache(a)
			if (is.null(c$order)){
				s <- clone(a)
				o <- seq_along(s)
				ramsortorder(s,o)
				nu[[i]] <- sortnut(s)[["nunique"]]
			}else if (is.null(c$sort)){
				o <- c$order
				s <- a[o]
				nu[[i]] <- c$nunique
			}else{
				o <- c$order
				s <- c$sort
				nu[[i]] <- c$nunique
			}
			d[[i+1]] <- d[[i]] * nu[[i]]
			if (is.na(d[[i+1]]))
				stop("attempt to make a table from more than >= 2^63 hypothetical combinations")
			dims[[i]] <- sortuni(s, nu[[i]])
			if (i==1L)
				x <- sortorderkey(s,o) - 1L
			else
				x <- x + d[[i]] * (sortorderkey(s,o) - 1L)
		}
	}
  c <- cache(x)
  if (is.null(nunique) && !is.null(c))
	nunique <- c$nunique
  if (is.null(method)){
    if (is.null(c)){
		if (order=="values" && (is.null(nunique) || nunique>65536))
			method <- "sorttab"
		else
			method <- "hashmaptab"
	}else{
		if (order=="values"){
			if (exists("sort", envir=c, inherits=FALSE))
				method <- "sorttab"
			else if (exists("hashmap", envir=c, inherits=FALSE) && c$nunique<sqrt(length(x)))
				method <- "hashtab"
			else if (exists("order", envir=c, inherits=FALSE))
				method <- "ordertab"
			else
				method <- "sorttab"
		}else{ # order = "counts"
			if (exists("hashmap", envir=c, inherits=FALSE))
				method <- "hashtab"
			else if (exists("sort", envir=c, inherits=FALSE))
				method <- "sorttab"
			else if (exists("order", envir=c, inherits=FALSE))
				method <- "ordertab"
			else
				method <- "hashmaptab"
		}
	}
  }
  switch(method
  , hashmaptab={
		tmp <- hashmaptab(x, nunique=nunique)
		cnt <- tmp$counts
		val <- tmp$values
		rm(tmp)
    }
  , hashtab={
		if (is.null(c) || !exists("hashmap", envir=c, inherits=FALSE))
			h <- hashmap(x, nunique=nunique)
		else
			h <- c 
		tmp <- hashtab(h, keep.order=FALSE)
		cnt <- tmp$counts
		val <- tmp$values
		rm(tmp)
    }
  , sorttab={
		if (is.null(c) || !exists("sort", c, inherits=FALSE)){
			s <- clone(x)
			ramsort(s, na.last=FALSE)
		}else{
			s <- get("sort", c, inherits=FALSE)
		}
		if (is.null(nunique))
			nunique <- sortnut(s)[1]
		val <- sortuni(s, nunique)
		cnt <- sorttab(s, nunique)
    }
  , ordertab={
		if (is.null(c) || !exists("order", c, inherits=FALSE)){
			o <- seq_along(x)
			ramorder(x, o, na.last=FALSE)
		}else{
			o <- get("order", c, inherits=FALSE)
		}
		if (is.null(nunique))
			nunique <- ordernut(x, o)[1]
		val <- orderuni(x, o, nunique, keep.order=FALSE)
		cnt <- ordertab(x, o, nunique, keep.order=FALSE)
		rm(o)
    }
  , stop("unknown method", method)
  )
	if (order=="values"){
		if (substr(method, 1, 4)=="hash"){
			o <- seq_along(val)
			ramsortorder(val, o, na.last=FALSE)
			cnt <- cnt[o]
		}
	}else{
		# xx workaround until we have implemented ramsort.integer
		o <- sort.list(cnt, na.last=NA, method="quick")
		cnt <- cnt[o]
		# o <- seq_along(cnt)
		# ramsortorder(cnt, o, na.last=FALSE)
		val <- val[o]
	}
  ## attaching names is extremely expensive with many unique values, doing this only for compatibility with 'table' here
  switch(return
  ,  "table" = {
  		if (N==1){
			attr(cnt, "dim") <-length(cnt)
			dn <- list(as.character(val))
			names(dn) <- dnn[1]
			attr(cnt, "dimnames") <- dn
		}else{
			a <- array(0L, dim=nu, dimnames=lapply(dims, as.character))
			a[as.integer(val)+1L] <- as.integer(cnt)
			cnt <- a
		}
		oldClass(cnt) <- "table"
	}
  ,  "data.frame" = {
		if (N==1){
			cnt <- data.frame(values=val, Freq=cnt)
			names(cnt)[[1]] <- dnn[1]
		}else{
			for (i in N:1){
				w <- val %/% d[[i]]
				val <- val - d[[i]]*w
				dims[[i]] <- dims[[i]][as.integer(w)+1L]
			}
			cnt <- data.frame(dims, Freq=cnt)
		}
	}
  , "list" = {
		if (N==1)
			cnt <- list(values=val, counts=cnt)
		else
			cnt <- list(values=val, counts=cnt, dims=dims)
    }
  )
  cnt
}


as.factor.integer64 <- function(x){

	c <- cache(x)
	if (is.null(c$order)){
		s <- clone(x)
		o <- seq_along(s)
		na.count <- ramsortorder(s,o)
		nu <- sortnut(s)[["nunique"]]
	}else if (is.null(c$sort)){
		o <- c$order
		s <- x[o]
		na.count <- c$na.count
		nu <- c$nunique
	}else{
		o <- c$order
		s <- c$sort
		na.count <- c$na.count
		nu <- c$nunique
	}
	dimtab <- sortuni(s, nu)
	dimpos <- sortorderkey(s,o,na.skip.num=na.count) - 1L
	attr(dimpos, "levels") <- dimtab
	oldClass(dimpos) <- "factor"
	dimpos
}

as.ordered.integer64 <- function(x){

	c <- cache(x)
	if (is.null(c$order)){
		s <- clone(x)
		o <- seq_along(s)
		na.count <- ramsortorder(s,o)
		nu <- sortnut(s)[["nunique"]]
	}else if (is.null(c$sort)){
		o <- c$order
		s <- x[o]
		na.count <- c$na.count
		nu <- c$nunique
	}else{
		o <- c$order
		s <- c$sort
		na.count <- c$na.count
		nu <- c$nunique
	}
	dimtab <- sortuni(s, nu)
	dimpos <- sortorderkey(s,o,na.skip.num=na.count) - 1L
	attr(dimpos, "levels") <- dimtab
	oldClass(dimpos) <- c("ordered", "factor")
	dimpos
}

as.integer64.factor <- function(x, ...)as.integer64(unclass(x))



#! \name{keypos}
#! \alias{keypos}
#! \alias{keypos.integer64}
#! \title{Extract Positions in redundant dimension table}
#! \description{
#!   \code{keypos} returns the positions of the (fact table) elements that participate in their sorted unique subset (dimension table)
#! }
#! \usage{
#! keypos(x, \dots)
#! \method{keypos}{integer64}(x, method = NULL, \dots)
#! }
#! \arguments{
#!   \item{x}{a vector or a data frame or an array or \code{NULL}.}
#!   \item{method}{
#! 	NULL for automatic method selection or a suitable low-level method, see details
#! }
#!   \item{\dots}{ignored}
#! }
#! \details{
#!   NAs are sorted first in the dimension table, see \code{\link{ramorder.integer64}}.
#!   \cr
#!   This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. 
#!   Suitable methods are \code{\link{sortorderkey}} (fast ordering) 
#! and \code{\link{orderkey}} (memory saving ordering).
#! }
#! \value{
#!   an integer vector of the same length as \code{x} containing positions relativ to \code{sort(unique(x), na.last=FALSE)}
#! }
#! \author{
#! 	Jens Oehlschlägel <Jens.Oehlschlaegel@truecluster.com>
#! }
#! \seealso{
#!   \code{\link{unique.integer64}} for the unique subset and \code{\link{match.integer64}} for finding positions in a different vector.
#! }
#! \examples{
#! x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE))
#! keypos(x)
#! 
#! stopifnot(identical(keypos(x),  match.integer64(x, sort(unique(x), na.last=FALSE))))
#! }
#! \keyword{manip}
#! \keyword{univar}



keypos <- function(x, ...)UseMethod("keypos")
keypos.integer64 <- function(x
, method = NULL
, ...
){
  c <- cache(x)
  if (is.null(method)){
    if (is.null(c)){
		method <- "sortorderkey"
	}else{
		if (exists("order", envir=c, inherits=FALSE)){
			if (exists("sort", envir=c, inherits=FALSE))
				method <- "sortorderkey"
			else
				method <- "orderkey"
		}else
			method <- "sortorderkey"
	}
  }
  switch(method
  , sortorderkey={
		if (is.null(c) || !exists("sort", c, inherits=FALSE) || !exists("order", c, inherits=FALSE)){
			s <- clone(x)
			o <- seq_along(x)
			ramsortorder(s, o, na.last=FALSE)
		}else{
			s <- get("sort", c, inherits=FALSE)
			o <- get("order", c, inherits=FALSE)
		}
		p <- sortorderkey(s, o)
    }
  , orderkey={
		if (is.null(c) || !exists("order", c, inherits=FALSE)){
			o <- seq_along(x)
			ramorder(x, o, na.last=FALSE)
		}else{
			o <- get("order", c, inherits=FALSE)
		}
		p <- orderkey(x, o)
    }
  , stop("unknown method", method)
  )
  p
}

#! \name{tiepos}
#! \alias{tiepos}
#! \alias{tiepos.integer64}
#! \title{Extract Positions of Tied Elements}
#! \description{
#!   \code{tiepos} returns the positions of those elements that participate in ties.
#! }
#! \usage{
#! tiepos(x, \dots)
#! \method{tiepos}{integer64}(x, nties = NULL, method = NULL, \dots)
#! }
#! \arguments{
#!   \item{x}{a vector or a data frame or an array or \code{NULL}.}
#!   \item{nties}{
#! 	NULL or the number of tied values (including NA). Providing \code{nties} can speed-up when \code{x} has no cache. Note that a wrong nties can cause undefined behaviour up to a crash.
#! }
#!   \item{method}{
#! 	NULL for automatic method selection or a suitable low-level method, see details
#! }
#!   \item{\dots}{ignored}
#! }
#! \details{
#!   This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. 
#!   Suitable methods are \code{\link{sortordertie}} (fast ordering) 
#! and \code{\link{ordertie}} (memory saving ordering).
#! }
#! \value{
#!   an integer vector of positions
#! }
#! \author{
#! 	Jens Oehlschlägel <Jens.Oehlschlaegel@truecluster.com>
#! }
#! \seealso{
#!   \code{\link{rank.integer64}} for possibly tied ranks and \code{\link{unipos.integer64}} for positions of unique values.
#! }
#! \examples{
#! x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE))
#! tiepos(x)
#! 
#! stopifnot(identical(tiepos(x),  (1:length(x))[duplicated(x) | rev(duplicated(rev(x)))]))
#! }
#! \keyword{manip}
#! \keyword{univar}


tiepos <- function(x, ...)UseMethod("tiepos")
tiepos.integer64 <- function(x
, nties = NULL
, method = NULL
, ...
){
  c <- cache(x)
  if (is.null(nties) && !is.null(c))
	nties <- c$nties
  if (is.null(method)){
    if (is.null(c)){
		method <- "sortordertie"
	}else{
		if (exists("order", envir=c, inherits=FALSE)){
			if (exists("sort", envir=c, inherits=FALSE))
				method <- "sortordertie"
			else
				method <- "ordertie"
		}else
			method <- "sortordertie"
	}
  }
  switch(method
  , sortordertie={
		if (is.null(c) || !exists("sort", c, inherits=FALSE) || !exists("order", c, inherits=FALSE)){
			s <- clone(x)
			o <- seq_along(x)
			ramsortorder(s, o, na.last=FALSE)
		}else{
			s <- get("sort", c, inherits=FALSE)
			o <- get("order", c, inherits=FALSE)
		}
		if (is.null(nties))
			nties <- sortnut(s)[2]
		p <- sortordertie(s, o, nties)
    }
  , ordertie={
		if (is.null(c) || !exists("order", c, inherits=FALSE)){
			o <- seq_along(x)
			ramorder(x, o, na.last=FALSE)
		}else{
			o <- get("order", c, inherits=FALSE)
		}
		if (is.null(nties))
			nties <- ordernut(x, o)[2]
		p <- ordertie(x, o, nties)
    }
  , stop("unknown method", method)
  )
  p
}


#! \name{rank.integer64}
#! \alias{rank.integer64}
#! \title{Sample Ranks from integer64}
#! \description{
#!   Returns the sample ranks of the values in a vector.  Ties (i.e., equal
#!   values) are averaged and missing values propagated.
#! }
#! \usage{
#! 	\method{rank}{integer64}(x, method = NULL, \dots)
#! }
#! \arguments{
#!   \item{x}{a integer64 vector}
#!   \item{method}{
#! 	NULL for automatic method selection or a suitable low-level method, see details
#! }
#!   \item{\dots}{ignored}
#! }
#! \details{
#!   This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. 
#!   Suitable methods are \code{\link{sortorderrnk}} (fast ordering) 
#! and \code{\link{orderrnk}} (memory saving ordering).
#! }
#! \value{
#!   A numeric vector of the same length as \code{x}.
#! }
#! \author{
#! 	Jens Oehlschlägel <Jens.Oehlschlaegel@truecluster.com>
#! }
#! \seealso{
#!   \code{\link{order.integer64}}, \code{\link{rank}} and \code{\link{prank}} for percent rank.
#! }
#! \examples{
#! x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE))
#! rank.integer64(x)
#! 
#! stopifnot(identical(rank.integer64(x),  rank(as.integer(x)
#! , na.last="keep", ties.method = "average")))
#! }
#! \keyword{univar}

rank.integer64 <- function(x
, method = NULL
, ...
){
  c <- cache(x)
  if (is.null(method)){
    if (is.null(c)){
		method <- "sortorderrnk"
	}else{
		if (exists("order", envir=c, inherits=FALSE)){
			if (exists("sort", envir=c, inherits=FALSE))
				method <- "sortorderrnk"
			else
				method <- "orderrnk"
		}else
			method <- "sortorderrnk"
	}
  }
  switch(method
  , sortorderrnk={
		if (is.null(c) || !exists("sort", c, inherits=FALSE) || !exists("order", c, inherits=FALSE)){
			s <- clone(x)
			o <- seq_along(x)
			na.count <- ramsortorder(s, o, na.last=FALSE)
		}else{
			s <- get("sort", c, inherits=FALSE)
			o <- get("order", c, inherits=FALSE)
			na.count <- get("na.count", c, inherits=FALSE)
		}
		p <- sortorderrnk(s, o, na.count)
    }
  , orderrnk={
		if (is.null(c) || !exists("order", c, inherits=FALSE)){
			o <- seq_along(x)
			na.count <- ramorder(x, o, na.last=FALSE)
		}else{
			o <- get("order", c, inherits=FALSE)
			na.count <- get("na.count", c, inherits=FALSE)
		}
		p <- orderrnk(x, o, na.count)
    }
  , stop("unknown method", method)
  )
  p
}

#! \name{prank}
#! \alias{prank}
#! \alias{prank.integer64}
#! \title{(P)ercent (Rank)s}
#! \description{
#! 	Function \code{prank.integer64}  projects the values [min..max] via ranks [1..n] to [0..1]. 
#! 	\code{\link{qtile.integer64}} is the inverse function of 'prank.integer64' and projects [0..1] to [min..max].
#! }
#! \usage{
#! 	prank(x, \dots)
#! 	\method{prank}{integer64}(x, method = NULL, \dots)
#! }
#! \arguments{
#!   \item{x}{a integer64 vector}
#!   \item{method}{
#! 	NULL for automatic method selection or a suitable low-level method, see details
#! }
#!   \item{\dots}{ignored}
#! }
#! \details{
#! 	Function \code{prank.integer64} is based on \code{\link{rank.integer64}}.
#! }
#! \value{
#!   \code{prank} returns a numeric vector of the same length as \code{x}.
#! }
#! \author{
#! 	Jens Oehlschlägel <Jens.Oehlschlaegel@truecluster.com>
#! }
#! \seealso{
#!   \code{\link{rank.integer64}} for simple ranks and \code{\link{qtile}} for the inverse function quantiles.
#! }
#! \examples{
#! x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE))
#! prank(x)
#! 
#! x <- x[!is.na(x)]
#! stopifnot(identical(x,  unname(qtile(x, probs=prank(x)))))
#! }
#! \keyword{univar}

prank <- function(x, ...)UseMethod("prank")
prank.integer64 <- function(x
, method = NULL
, ...
)
{	
	n <- nvalid(x)
	if (n<2L)
		return(rep(as.integer64(NA), length(x)))
	(rank.integer64(x, method=method, ...)-1L)/(n-1L)
}

#! \name{qtile}
#! \alias{qtile}
#! \alias{qtile.integer64}
#! \alias{quantile.integer64}
#! \alias{median.integer64}
#! \alias{mean.integer64}
#! \alias{summary.integer64}
#! \title{(Q)uan(Tile)s }
#! \description{
#! 	Function \code{\link{prank.integer64}}  projects the values [min..max] via ranks [1..n] to [0..1]. 
#! 	\code{qtile.ineger64} is the inverse function of 'prank.integer64' and projects [0..1] to [min..max].
#! }
#! \usage{
#! 	qtile(x, probs=seq(0, 1, 0.25), \dots)
#! 	\method{qtile}{integer64}(x, probs = seq(0, 1, 0.25), names = TRUE, method = NULL, \dots)
#! 	\method{quantile}{integer64}(x, probs = seq(0, 1, 0.25), na.rm = FALSE, names = TRUE, type=0L, \dots)
#! 	\method{median}{integer64}(x, na.rm = FALSE, \dots)
#!  \method{mean}{integer64}(x, na.rm = FALSE, \dots)
#! 	\method{summary}{integer64}(object, \dots)
#!  ## mean(x, na.rm = FALSE, ...)
#!  ## or
#!  ## mean(x, na.rm = FALSE)
#! }
#! \arguments{
#!   \item{x}{a integer64 vector}
#!   \item{object}{a integer64 vector}
#!   \item{probs}{
#! 		numeric vector of probabilities with values in [0,1] - possibly containing \code{NA}s
#! }
#!   \item{names}{
#! 	logical; if \code{TRUE}, the result has a \code{names} attribute. Set to \code{FALSE} for speedup with many probs.
#! }
#!   \item{type}{
#! 	an integer selecting the quantile algorithm, currently only 0 is supported, see details
#! }
#!   \item{method}{
#! 	NULL for automatic method selection or a suitable low-level method, see details
#! }
#!   \item{na.rm}{
#! 	logical; if \code{TRUE}, any \code{NA} and \code{NaN}'s are removed from \code{x} before the quantiles are computed.
#! }
#!   \item{\dots}{ignored}
#! }
#! \details{
#!  Functions \code{quantile.integer64} with \code{type=0} and \code{median.integer64} are convenience wrappers to \code{qtile}.
#!  \cr
#!	Function \code{qtile} behaves very similar to \code{quantile.default} with \code{type=1} 
#!  in that it only returns existing values, it is mostly symetric 
#!  but it is using 'round' rather than 'floor'. 
#!  \cr
#!  Note that this implies that \code{median.integer64} does not interpolate for even number of values 
#! (interpolation would create values that could not be represented as 64-bit integers).
#!  \cr
#!   This function automatically chooses from several low-level functions considering the size of \code{x} and the availability of a cache. 
#!   Suitable methods are \code{\link{sortqtl}} (fast sorting) 
#! and \code{\link{orderqtl}} (memory saving ordering).
#! }
#! \value{
#!   \code{prank} returns a numeric vector of the same length as \code{x}.
#!   \cr
#!   \code{qtile} returns a vector with elements from \code{x} 
#!   at the relative positions specified by \code{probs}.
#! }
#! \author{
#! 	Jens Oehlschlägel <Jens.Oehlschlaegel@truecluster.com>
#! }
#! \seealso{
#!   \code{\link{rank.integer64}} for simple ranks and \code{\link{quantile}} for quantiles.
#! }
#! \examples{
#! x <- as.integer64(sample(c(rep(NA, 9), 1:9), 32, TRUE))
#! qtile(x, probs=seq(0, 1, 0.25))
#! quantile(x, probs=seq(0, 1, 0.25), na.rm=TRUE)
#! median(x, na.rm=TRUE)
#! summary(x)
#! 
#! x <- x[!is.na(x)]
#! stopifnot(identical(x,  unname(qtile(x, probs=prank(x)))))
#! }
#! \keyword{univar}

qtile <- function(x, probs = seq(0, 1, 0.25), ...)UseMethod("qtile")
qtile.integer64 <- function(x, probs = seq(0, 1, 0.25), names = TRUE, method = NULL, ...){
	if (any(is.na(probs) | probs<0 | probs>1))
		stop("p outside [0,1]")
  c <- cache(x)
  if (is.null(method)){
    if (is.null(c)){
		method <- "sortqtl"
	}else{
		if (exists("sort", envir=c, inherits=FALSE))
			method <- "sortqtl"
		else if (exists("order", envir=c, inherits=FALSE))
			method <- "orderqtl"
		else
			method <- "sortqtl"
	}
  }
  switch(method
  , sortqtl={
		if (is.null(c) || !exists("sort", c, inherits=FALSE)){
			s <- clone(x)
			na.count <- ramsort(s, na.last=FALSE)
		}else{
			s <- get("sort", c, inherits=FALSE)
			na.count <- get("na.count", c, inherits=FALSE)
		}
		qs <- sortqtl(s, na.count, probs)
    }
  , orderqtl={
		if (is.null(c) || !exists("order", c, inherits=FALSE)){
			o <- seq_along(x)
			na.count <- ramorder(x, o, na.last=FALSE)
		}else{
			o <- get("order", c, inherits=FALSE)
			na.count <- get("na.count", c, inherits=FALSE)
		}
		qs <- orderqtl(x, o, na.count, probs)
    }
  , stop("unknown method", method)
  )
  if (names){
	np <- length(probs)
	dig <- max(2L, getOption("digits"))
	names(qs) <- paste(if (np < 100) 
		formatC(100 * probs, format = "fg", width = 1, digits = dig)
	else format(100 * probs, trim = TRUE, digits = dig), 
		"%", sep = "")
  }
  qs
}


quantile.integer64 <- function(x, probs = seq(0, 1, 0.25), na.rm = FALSE, names = TRUE, type=0L, ...){
	if (type[[1]]!=0L)
		stop("only type==0 ('qtile') supported")
	if (!na.rm && na.count(x)>0)
		stop("missing values not allowed with 'na.rm='==FALSE")
	qtile.integer64(x, probs = probs, na.rm = na.rm, names = names, ...)
}


# adding ... (wish of Kurt Hornik 23.3.2017)
if (is.na(match("...", names(formals(median))))){
	median.integer64 <- function(x, na.rm=FALSE){
		if (!na.rm && na.count(x)>0)
			stop("missing values not allowed with 'na.rm='==FALSE")
		qtile.integer64(x, probs = 0.5, na.rm = na.rm, names = FALSE)
	}
}else{
	median.integer64 <- function(x, na.rm=FALSE, ...){
		if (!na.rm && na.count(x)>0)
			stop("missing values not allowed with 'na.rm='==FALSE")
		qtile.integer64(x, probs = 0.5, na.rm = na.rm, names = FALSE)
	}
}
	
# mean.integer64 <- function(x, na.rm=FALSE){
	# s <- sum(x, na.rm=na.rm)
	# if (!is.na(s)){
		# if (na.rm)
			# s <- s%/%(length(x)-na.count(x))
		# else
			# s <- s%/%length(x)
	# }
	# s
# }
mean.integer64 <- function(x, na.rm=FALSE, ...){
	ret <- .Call(C_mean_integer64, x, as.logical(na.rm), double(1))
	oldClass(ret) <- "integer64"
	ret
}

summary.integer64 <- function (object, ...){
	nas <- na.count(object)
	qq <- quantile(object, na.rm=TRUE)
	qq <- c(qq[1L:3L], mean(object, na.rm=TRUE), qq[4L:5L])
    names(qq) <- c("Min.", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max.")
	if (any(nas)) 
		c(qq, "NA's" = nas)
	else qq
}