Nothing
# Fast methods for integers leveraging bit vectors
# (c) 2016-2017 Jens Oehlschägel
# Licence: GPL2
# Provided 'as is', use at your own risk
#' Get range and number of NAs
#'
#' @param x an integer vector
#'
#' @return an integer vector with three elements \describe{
#' \item{ 1 }{ min integer }
#' \item{ 2 }{ max integer }
#' \item{ 3 }{ number of NAs }
#' }
#' @seealso \code{\link{range_nanozero}} and \code{\link{range_sortna}}
#'
#' @examples
#' range_na(c(0L,1L,2L,NA))
#' @export
range_na <- function(x){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
.Call(C_R_range_na, x)
}
#' Remove zeros and get range and number of NAs
#'
#' @param x an integer vector
#'
#' @return an integer vector without zeros and with an attribute \code{\link{range_na}} with three elements \describe{
#' \item{ 1 }{ min integer }
#' \item{ 2 }{ max integer }
#' \item{ 3 }{ number of NAs }
#' }
#' @seealso \code{\link{range_na}} and \code{\link{range_sortna}}
#'
#' @examples
#' range_nanozero(c(0L,1L,2L,NA))
#' @export
range_nanozero <- function(x){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
.Call(C_R_range_nanozero, x)
}
#' Prepare for sorting and get range, number of NAs and unsortedness
#'
#' In one pass over the vector \code{NA}s are treated according to parameter
#' \code{na.last} exactly like \code{\link{sort}} does, the \code{\link{range}},
#' number of \code{NA}s and unsortedness is determined.
#'
#' @param x an integer vector
#' @inheritParams bit_sort
#'
#' @return an integer vector with \code{NA}s are treated and an \code{attribute} \code{\link{range_na}} with four elements \describe{
#' \item{ 1 }{ min integer }
#' \item{ 2 }{ max integer }
#' \item{ 3 }{ number of NAs }
#' \item{ 3 }{ 0 for sorted vector and 1 for \code{\link{is.unsorted}} }
#' }
#' @seealso \code{\link{range_na}} and \code{\link{range_nanozero}}
#'
#' @examples
#' range_sortna(c(0L,1L,NA,2L))
#' range_sortna(c(2L,NA,1L,0L))
#' range_sortna(c(0L,1L,NA,2L), na.last=TRUE)
#' range_sortna(c(2L,NA,1L,0L), na.last=TRUE)
#' range_sortna(c(0L,1L,NA,2L), na.last=FALSE)
#' range_sortna(c(2L,NA,1L,0L), na.last=FALSE)
#' @export
range_sortna <- function(x, decreasing = FALSE, na.last=NA){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
.Call(C_R_range_sortna, x, as.logical(decreasing), as.logical(na.last))
}
#' Low-level sorting: binary quicksort
#'
#' In one pass over the vector \code{NA}s are handled according to parameter
#' \code{na.last} by \code{\link{range_sortna}}, then, if the vector is unsorted,
#' binary quicksort is invoked.
#'
#' @param x an integer vector
#' @inheritParams bit_sort
#'
#' @return a sorted vector
#'
#' @examples
#' quicksort2(c(2L,0L,1L,NA,2L))
#' quicksort2(c(2L,0L,1L,NA,2L), na.last=TRUE)
#' quicksort2(c(2L,0L,1L,NA,2L), na.last=FALSE)
#' @export
quicksort2 <- function(x, na.last=NA){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!length(x))
return(x)
x <- range_sortna(x, na.last=na.last)
range_sortna <- getsetattr(x, "range_sortna", NULL)
if (is.na(range_sortna[1])) # no numbers to sort
return(rep(NA_integer_, range_sortna[3]))
if (range_sortna[4]) # is.unsorted
.Call(C_R_int_quicksort2, x, range_sortna, as.logical(na.last))
else
x
}
#' Low-level sorting: threeway quicksort
#'
#' In one pass over the vector \code{NA}s are handled according to parameter
#' \code{na.last} by \code{\link{range_sortna}}, then, if the vector is unsorted,
#' threeway quicksort is invoked.
#'
#' @param x an integer vector
#' @inheritParams bit_sort
#'
#' @return a sorted vector
#'
#' @examples
#' countsort(c(2L,0L,1L,NA,2L))
#' countsort(c(2L,0L,1L,NA,2L), na.last=TRUE)
#' countsort(c(2L,0L,1L,NA,2L), na.last=FALSE)
#' @export
quicksort3 <- function(x, na.last=NA){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!length(x))
return(x)
x <- range_sortna(x, na.last=na.last)
range_sortna <- getsetattr(x, "range_sortna", NULL)
if (is.na(range_sortna[1])) # no numbers to sort
return(rep(NA_integer_, range_sortna[3]))
if (range_sortna[4]) # is.unsorted
.Call(C_R_int_quicksort3, x, range_sortna, as.logical(na.last))
else
x
}
#' Low-level sorting: counting sort
#'
#' In one pass over the vector \code{NA}s are handled according to parameter
#' \code{na.last} by \code{\link{range_sortna}}, then, if the vector is unsorted,
#' counting sort is invoked.
#'
#' @param x an integer vector
#' @inheritParams bit_sort
#'
#' @return a sorted vector
#'
#' @examples
#' countsort(c(2L,0L,1L,NA,2L))
#' countsort(c(2L,0L,1L,NA,2L), na.last=TRUE)
#' countsort(c(2L,0L,1L,NA,2L), na.last=FALSE)
#' @export
countsort <- function(x, na.last=NA){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!length(x))
return(x)
x <- range_sortna(x, na.last=na.last)
range_sortna <- getsetattr(x, "range_sortna", NULL)
if (is.na(range_sortna[1])) # no numbers to sort
return(rep(NA_integer_, range_sortna[3]))
if (range_sortna[4]) # is.unsorted
.Call(C_R_int_countsort, x, range_sortna, as.logical(na.last))
else
x
}
#' Low-level sorting: bit sort
#'
#' In one pass over the vector \code{NA}s are handled according to parameter
#' \code{na.last} by \code{\link{range_sortna}}, then, if the vector is unsorted,
#' bit sort is invoked.
#'
#' @param x an integer vector
#' @param depth an integer scalar giving the number of bit-passed before switching to quicksort
#' @inheritParams bit_sort
#'
#' @return a sorted vector
#'
#' @examples
#' bitsort(c(2L,0L,1L,NA,2L))
#' bitsort(c(2L,0L,1L,NA,2L), na.last=TRUE)
#' bitsort(c(2L,0L,1L,NA,2L), na.last=FALSE)
#' @export
bitsort <- function(x, na.last=NA, depth=1){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!length(x))
return(x)
x <- range_sortna(x, na.last=na.last)
range_sortna <- getsetattr(x, "range_sortna", NULL)
if (is.na(range_sortna[1])) # no numbers to sort
return(rep(NA_integer_, range_sortna[3]))
if (!range_sortna[4]) # already sorted
return(x)
n <- as.double(range_sortna[2])-as.double(range_sortna[1])+1
.Call(C_R_bit_sort, tmp=bit(n), x, range_sortna, as.logical(na.last), depth=as.integer(depth))
}
# if (FALSE){
# # time measure relative speeds (radixsort is not yet integrated)
# require(microbenchmark)
# require(bit)
# require(ff)
# x <- sample(2,1e8,T)
# system.time(bitsort(x))
# x <- sample(1e8,1e8,T)
# system.time(bitsort(x))
#
# D <- as.integer(2^(1:27))
# N <- as.integer(2^(1:27))
# M <- c("b","c","q") #,"r")
# tim <- array(NA, dim=c(D=length(D), N=length(N), M=length(M)), dimnames=list(D=D, N=N, M=M))
# for (ni in seq_along(N)){
# n <- N[ni]
# for (di in seq_along(D)){
# d <- D[di]
# x <- sample(d,n,T)
# x[1] <- d
# if (di > ni-5 && di < ni+10){
# tim[di,ni,"b"] <- microbenchmark(bitsort(x), times = 1)$time
# tim[di,ni,"c"] <- microbenchmark(countsort(x), times = 1)$time
# tim[di,ni,"q"] <- microbenchmark(quicksort3(x), times = 1)$time
# #tim[di,ni,"r"] <- microbenchmark(radixsort(copy_vector(x)), times = 1)$time
# }
# }
# print(apply(tim[,1:ni,,drop=FALSE], 1:2, function(x){i <- which.min(x); if (length(i))M[i] else "."}), quote=FALSE)
# }
#
# round( tim[,,"b"] / tim[,,"c"], 1 )
# round( tim[,,"b"] / tim[,,"q"], 1 )
# round( tim[,,"c"] / tim[,,"q"], 1 )
# round( tim[,,"c"] / tim[,,"r"], 1 )
# round( tim[,,"q"] / tim[,,"r"], 1 )
#
# M <- dimnames(tim)[[3]]
# r <- as.integer(rownames(tim[,,1]))[row(tim[,,1])]
# n <- as.integer(colnames(tim[,,1]))[col(tim[,,1])]
# d <- n/r
# #x <- ifelse(d>=1, "c", "q")
# x <- ifelse(d>=0.5,"c",ifelse(n<=1024,"q",ifelse(d<0.0625,"q","b")))
# x <- ifelse(n<=1024, ifelse(d<0.5, "q", "c"), ifelse(d<1, ifelse(d<0.0625, "q", "b"), "c"))
# dim(x) <- dim(tim[,,1])
# dimnames(x) <- dimnames(tim[,,1])
# y <- tim[cbind(as.vector(row(x)), as.vector(col(x)), match(as.vector(x), M))]
# attributes(y) <- attributes(x)
# print(apply(tim, 1:2, function(x){i <- which.min(x); if (length(i))M[i] else "."}), quote=FALSE)
# print(x, quote=F)
# round(y / apply(tim[,,-4], 1:2, min, na.rm=T), 1)
# round(y / tim[,,1], 1)
#
# f <- function(n, d=1, k=1){
# x <- dbinom(1:log2(n), n, 1/(d*n))*n
# r <- rev(cumsum(rev(x)))
# rr <- c(r[-1], 0)
# c <- r*log2(r)
# b <- n*k+r+rr*log2(rr)
# w <- as.integer(!is.na(b) & b<c)
# #print(round(data.frame(x=x, r=r, rr=rr, c=c, b=b, w=w)))
# sum(w)
# }
# sapply(2^(1:30), f, d=8, k=1)
# sapply(2^(1:30), f, k=2)
# sapply(2^(1:30), f, k=4)
# sapply(2^(1:30), f, k=8)
# sapply(2^(1:30), f, k=16)
# sapply(2^(1:30), f, k=32)
#
# require(microbenchmark)
# require(bit)
# n <- as.integer(2^27)
# x <- sample(n*4, n, T)
# times <- 1
# microbenchmark(countsort(x), times=times)
# microbenchmark(quicksort3(x), times=times)
# microbenchmark(bitsort(x, depth=1), times=times)
# microbenchmark(bitsort(x, depth=2), times=times)
# microbenchmark(bitsort(x, depth=3), times=times)
# microbenchmark(bitsort(x, depth=4), times=times)
# microbenchmark(bitsort(x, depth=5), times=times)
#
# microbenchmark(quicksort2(x), times=times)
# microbenchmark(bit_sort_unique(x), times=times)
#
# # ergo: ab 2^16 zwischen 1/4 und 1/32
#
# }
#' bit sort
#'
#' fast sorting of integers
#'
#' determines the range of the integers and checks if the density justifies use
#' of a bit vector; if yes, sorts the first occurences of each integer in the
#' range using a bit vector, sorts the rest and merges; if no, falls back to quicksort.
#'
#' @param x an integer vector
#' @param decreasing (currently only \code{FALSE} is supported)
#' @param na.last \code{NA} removes NAs, \code{FALSE} puts NAs at the beginning,
#' \code{TRUE} puts NAs at the end
#' @param has.dup TRUE (the default) assumes that \code{x} might have
#' duplicates, set to \code{FALSE} if duplicates are impossible
#'
#' @return a sorted vector
#' @seealso \code{\link{sort}}, \code{\link{ramsort}},
#' \code{\link{bit_sort_unique}}
#'
#' @examples
#' bit_sort(c(2L,1L,NA,NA,1L,2L))
#' bit_sort(c(2L,1L,NA,NA,1L,2L), na.last=FALSE)
#' bit_sort(c(2L,1L,NA,NA,1L,2L), na.last=TRUE)
#'
#' \dontrun{
#' x <- sample(1e7, replace=TRUE)
#' system.time(bit_sort(x))
#' system.time(sort(x))
#' }
#' @export
bit_sort <- function(x, decreasing = FALSE, na.last=NA, has.dup = TRUE){
if (decreasing)
stop("decreasing=TRUE not implemented")
if (!has.dup)
return(bit_sort_unique(x, decreasing = decreasing, na.last=na.last, has.dup=FALSE))
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!length(x))
return(x)
x <- range_sortna(x, decreasing = decreasing, na.last=na.last)
range_sortna <- getsetattr(x, "range_sortna", NULL)
if (is.na(range_sortna[1])) # no numbers to sort
return(rep(NA_integer_, range_sortna[3]))
if (!range_sortna[4]) # already sorted
return(x)
nr <- as.double(range_sortna[2])-as.double(range_sortna[1])+1
d <- length(x) / nr
if (nr<=65536){
if (d<0.5)
ret <- .Call(C_R_int_quicksort3, x, range_sortna, as.logical(na.last))
else
ret <- .Call(C_R_int_countsort, x, range_sortna, as.logical(na.last))
}else{
if (d < 0.03125 || nr > .Machine$integer.max ){
ret <- .Call(C_R_int_quicksort3, x, range_sortna, as.logical(na.last))
}else if (d <= 0.25){
ret <- .Call(C_R_bit_sort, tmp=bit(nr), x, range_sortna, as.logical(na.last), depth=1L)
}else{
ret <- .Call(C_R_int_countsort, x, range_sortna, as.logical(na.last))
}
}
ret
}
#' bit sort unique
#'
#' fast combination of \code{\link{sort}} and \code{\link{unique}} for integers
#'
#' determines the range of the integers and checks if the density justifies use
#' of a bit vector; if yes, creates the result using a bit vector; if no, falls back to \code{sort(unique())}
#'
#' @param x an integer vector
#' @param decreasing \code{FALSE} (ascending) or \code{TRUE} (descending)
#' @param na.last \code{NA} removes NAs, \code{FALSE} puts NAs at the beginning, \code{TRUE} puts NAs at the end
#' @param has.dup TRUE (the default) assumes that \code{x} might have duplicates, set to \code{FALSE} if duplicates are impossible
#' @param range_na \code{NULL} calls \code{\link{range_na}}, optionally the result of \code{\link{range_na}} can be given here to avoid calling it again
#'
#' @return a sorted unique integer vector
#' @seealso \code{\link{sort}}, \code{\link{unique}},
#' \code{\link{bit_sort}}, \code{\link{bit_unique}}
#'
#' @examples
#' bit_sort_unique(c(2L,1L,NA,NA,1L,2L))
#' bit_sort_unique(c(2L,1L,NA,NA,1L,2L), na.last=FALSE)
#' bit_sort_unique(c(2L,1L,NA,NA,1L,2L), na.last=TRUE)
#' bit_sort_unique(c(2L,1L,NA,NA,1L,2L), decreasing = TRUE)
#' bit_sort_unique(c(2L,1L,NA,NA,1L,2L), decreasing = TRUE, na.last=FALSE)
#' bit_sort_unique(c(2L,1L,NA,NA,1L,2L), decreasing = TRUE, na.last=TRUE)
#'
#' \dontrun{
#' x <- sample(1e7, replace=TRUE)
#' system.time(bit_sort_unique(x))
#' system.time(sort(unique(x)))
#' x <- sample(1e7)
#' system.time(bit_sort_unique(x))
#' system.time(sort(x))
#' }
#' @export
bit_sort_unique <- function(x, decreasing = FALSE, na.last=NA, has.dup=TRUE, range_na=NULL){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (is.null(range_na))
range_na <- range_na(x)
else
range_na <- as.integer(range_na)
if (is.na(range_na[1])){
if (is.na(na.last) || length(range_na)<3L || range_na[3]==0)
return(integer())
else
return(NA_integer_)
}
nr <- as.double(range_na[2])-as.double(range_na[1])+1
d <- length(x) / nr
if (nr > .Machine$integer.max || d < 0 ){
if (has.dup){
ret <- quicksort3(x, na.last = xor(na.last, decreasing))
ret <- merge_unique(ret)
}else{
ret <- quicksort2(x, na.last = xor(na.last, decreasing))
}
if (decreasing)
reverse_vector(ret)
else
ret
}else
.Call(C_R_bit_sort_unique, tmp=bit(nr), x, range_na, as.logical(na.last), as.logical(decreasing))
}
#' bit \%in\%
#'
#' fast \code{\link{\%in\%}} for integers
#'
#' determines the range of the integers and checks if the density justifies use
#' of a bit vector; if yes, maps \code{x} or \code{table} -- whatever is smaller
#' -- into a bit vector and searches the other of \code{table} or \code{x} in
#' the it vector; if no, falls back to \code{\link{\%in\%}}
#'
#' @param x an integer vector of values to be looked-up
#' @param table an integer vector used as lookup-table
#' @param retFUN a function that coerces \code{\link{bit}} and \code{\link{logical}} vectors
#'
#' @return a boolean vector coerced to \code{retFUN}
#' @seealso \code{\link{\%in\%}}
#' @examples
#' bit_in(1:2, 2:3)
#' bit_in(1:2, 2:3, retFUN=as.logical)
#' @export
bit_in <- function(x, table, retFUN=as.bit){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!(is.integer(table) || is.factor(table)))
stop("table must be integer (or factor)")
nx <- length(x)
nt <- length(table)
reverse <- nx<nt
if (reverse){
range_na <- range_na(x)
n <- nx
}else{
range_na <- range_na(table)
n <- nt
}
if (is.na(range_na[1]))
nr <- 0L # no numbers, no temp bit vector
else
nr <- as.double(range_na[2])-as.double(range_na[1])+1
d <- (nx+nt) / nr
if (nr==0L || nr > .Machine$integer.max || d < 1/64 ){
ret <- !is.na(match(x, table))
}else{
if (reverse){
ret <- .Call(C_R_bit_table_in, tmp=bit(nr), x, table, range_na, ret=bit(length(x)))
}else{
ret <- .Call(C_R_bit_in_table, tmp=bit(nr), x, table, range_na, ret=bit(length(x)))
}
}
retFUN(ret)
}
#' bit unique and duplicated
#'
#' Fast versions of \code{\link{unique}}, \code{\link{duplicated}} ,
#' \code{\link{anyDuplicated}} and \code{sum(duplicated(x))} for integers.
#'
#' determines the range of the integers and checks if the density justifies use
#' of a bit vector; if yes, uses a bit vector for finding duplicates; if no,
#' falls back to \code{\link{unique}}, \code{\link{duplicated}}, \code{\link{anyDuplicated}} and \code{sum(duplicated(x))}
#'
#' @name bit_unidup
#' @param x an integer vector
#' @param na.rm \code{NA} treats NAs like other integers, \code{TRUE} treats
#' \emph{all} NAs as duplicates, \code{FALSE} treats \emph{no} NAs as
#' duplicates
#' @inheritParams bit_in
#' @inheritParams bit_sort_unique
#'
#' @return
#' \code{bit_unique} returns a vector of unique integers, \cr
#' \code{bit_duplicated} returns a boolean vector coerced to \code{retFUN}, \cr
#' \code{bit_anyDuplicated} returns the position of the first duplicate (or zero if no duplicates) \cr
#' \code{bit_sumDuplicated} returns the number of duplicated values (as.integer)
#' @seealso \code{\link{bit_sort_unique}}
#'
#' @examples
#' bit_unique(c(2L,1L,NA,NA,1L,2L))
#' bit_unique(c(2L,1L,NA,NA,1L,2L), na.rm=FALSE)
#' bit_unique(c(2L,1L,NA,NA,1L,2L), na.rm=TRUE)
#'
#' bit_duplicated(c(2L,1L,NA,NA,1L,2L))
#' bit_duplicated(c(2L,1L,NA,NA,1L,2L), na.rm=FALSE)
#' bit_duplicated(c(2L,1L,NA,NA,1L,2L), na.rm=TRUE)
#'
#' bit_anyDuplicated(c(2L,1L,NA,NA,1L,2L))
#' bit_anyDuplicated(c(2L,1L,NA,NA,1L,2L), na.rm=FALSE)
#' bit_anyDuplicated(c(2L,1L,NA,NA,1L,2L), na.rm=TRUE)
#'
#' bit_sumDuplicated(c(2L,1L,NA,NA,1L,2L))
#' bit_sumDuplicated(c(2L,1L,NA,NA,1L,2L), na.rm=FALSE)
#' bit_sumDuplicated(c(2L,1L,NA,NA,1L,2L), na.rm=TRUE)
NULL
#' @describeIn bit_unidup extracts unique elements
#' @export
bit_unique <- function(x, na.rm = NA, range_na=NULL){
na.rm <- as.logical(na.rm)
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!length(x))
return(x)
if (is.null(range_na))
range_na <- range_na(x)
else
range_na <- as.integer(range_na)
if (is.na(range_na[1])){
if (is.na(na.rm)){
return(NA_integer_)
}else if (na.rm){
return(integer())
}else{
return(rep(NA_integer_, range_na[3]))
}
}
nr <- as.double(range_na[2])-as.double(range_na[1])+1
d <- length(x) / nr
if (nr > .Machine$integer.max || d < 1/64 ){
if (is.na(na.rm)){
unique(x, incomparables = FALSE)
}else if (na.rm){
x <- unique(x, incomparables = FALSE)
x <- x[!is.na(x)]
x
}else{
unique(x, incomparables = NA)
}
}else{
.Call(C_R_bit_unique, tmp=bit(nr), x, range_na, na.rm)
}
}
#' @describeIn bit_unidup determines duplicate elements
#' @export
bit_duplicated <- function(x, na.rm = NA, range_na=NULL, retFUN=as.bit){
na.rm <- as.logical(na.rm)
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!length(x))
return(retFUN(NULL))
if (is.null(range_na))
range_na <- range_na(x)
else
range_na <- as.integer(range_na)
if (is.na(range_na[1]))
nr <- 0L # no numbers, no temp bit vector
else
nr <- as.double(range_na[2])-as.double(range_na[1])+1
d <- length(x) / nr
if (nr > .Machine$integer.max || d < 1/64 ){
if (is.na(na.rm)){
ret <- duplicated(x, incomparables = FALSE)
}else if (na.rm){
ret <- duplicated(x, incomparables = FALSE) | is.na(x)
}else{
ret <- duplicated(x, incomparables = NA)
}
}else{
ret <- .Call(C_R_bit_duplicated, tmp=bit(nr), x, range_na, ret=bit(length(x)), as.logical(na.rm))
}
retFUN(ret)
}
#' @describeIn bit_unidup checks for existence of duplicate elements
#' @export
bit_anyDuplicated <- function(x, na.rm = NA, range_na=NULL){
na.rm <- as.logical(na.rm)
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!length(x))
return(0L)
if (is.null(range_na))
range_na <- range_na(x)
else
range_na <- as.integer(range_na)
if (is.na(range_na[1]))
nr <- 0L # no numbers, no temp bit vector
else
nr <- as.double(range_na[2])-as.double(range_na[1])+1
d <- length(x) / nr
if (nr > .Machine$integer.max || d < 1/64 ){
if (is.na(na.rm)){
anyDuplicated(x, incomparables = FALSE)
}else if (na.rm){
max(0L, min(anyDuplicated(x, incomparables = NA), firstNA(x)))
}else{
anyDuplicated(x, incomparables = NA)
}
}else{
.Call(C_R_bit_anyDuplicated, bit(nr), x, range_na=range_na, as.logical(na.rm))
}
}
#' @describeIn bit_unidup counts duplicate elements
#' @export
bit_sumDuplicated <- function(x, na.rm = NA, range_na=NULL){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!length(x))
return(0L)
if (is.null(range_na))
range_na <- range_na(x)
else
range_na <- as.integer(range_na)
if (is.na(range_na[1]))
nr <- 0L # no numbers, no temp bit vector
else
nr <- as.double(range_na[2])-as.double(range_na[1])+1
d <- length(x) / nr
if (nr > .Machine$integer.max || d < 1/64 ){
if (is.na(na.rm)){
sum(duplicated(x, incomparables = FALSE))
}else if (na.rm){
sum(duplicated(x, incomparables = NA)) + sum(is.na(x))
}else{
sum(duplicated(x, incomparables = NA))
}
}else{
.Call(C_R_bit_sumDuplicated, bit(nr), x, range_na=range_na, as.logical(na.rm))
}
}
#' bit set operations
#'
#' Fast versions of \code{\link{union}}, \code{\link{intersect}},
#' \code{\link{setdiff}}, symmetric difference and \code{\link{setequal}}
#' for integers.
#'
#' determines the range of the integers and checks if the density justifies use
#' of a bit vector; if yes, uses a bit vector for finding duplicates; if no,
#' falls back to \code{\link{union}}, \code{\link{intersect}},
#' \code{\link{setdiff}}, \code{union(setdiff(x,y),setdiff(y,x))} and \code{\link{setequal}}
#'
#' @name bit_setops
#' @param x an integer vector
#' @param y an integer vector
#'
#' @return an integer vector
#' @seealso \code{\link{bit_in}}, \code{\link{bit_rangediff}}
#'
#' @examples
#' bit_union(1:2, 2:3)
#' bit_intersect(1:2, 2:3)
#' bit_setdiff(1:2, 2:3)
#' bit_symdiff(1:2, 2:3)
#' bit_setequal(1:2, 2:3)
#' bit_setequal(1:2, 2:1)
NULL
#' @describeIn bit_setops union
#' @export
bit_union <- function(x, y){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!(is.integer(y) || is.factor(y)))
stop("y must be integer (or factor)")
nx <- length(x)
ny <- length(y)
rx <- range_na(x)
ry <- range_na(y)
range_na <- c(min(rx[1],ry[1]), max(rx[2],ry[2]), rx[3]+ry[3])
if (max(rx[1],ry[1])>min(rx[2],ry[2])){
if ( range_na[3]>0 && (rx[3]>0) == (rx[3]>0) )
return(bit_unique(c(x,y), range_na=range_na))
else
return(c(bit_unique(x, range_na=rx), bit_unique(y, range_na=ry)))
}
if (is.na(range_na[1]))
nr <- 0L # no numbers, no temp bit vector
else
nr <- as.double(range_na[2])-as.double(range_na[1])+1
d <- (nx+ny) / nr
if (nr==0L || nr > .Machine$integer.max || d < 1/64 ){
ret <- union(x,y)
}else{
ret <- .Call(C_R_bit_union, tmp=bit(nr), x, y, range_na)
}
ret
}
#' @describeIn bit_setops intersection
#' @export
bit_intersect <- function(x, y){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!(is.integer(y) || is.factor(y)))
stop("y must be integer (or factor)")
nx <- length(x)
ny <- length(y)
rx <- range_na(x)
ry <- range_na(y)
range_na <- c(max(rx[1],ry[1]), min(rx[2],ry[2]), rx[3]+ry[3])
if (range_na[1]>range_na[2])
return(integer())
if (is.na(range_na[1]))
nr <- 0L # no numbers, no temp bit vector
else
nr <- as.double(range_na[2])-as.double(range_na[1])+1
d <- (nx+ny) / nr
if (nr==0L || nr > .Machine$integer.max || d < 1/64 ){
ret <- intersect(x,y)
}else{
ret <- .Call(C_R_bit_intersect, tmp=bit(nr), x, y, range_na)
}
ret
}
#' @describeIn bit_setops asymmetric difference
#' @export
bit_setdiff <- function(x, y){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!(is.integer(y) || is.factor(y)))
stop("y must be integer (or factor)")
nx <- length(x)
ny <- length(y)
range_na <- range_na(x)
if (is.na(range_na[1]))
nr <- 0L # no numbers, no temp bit vector
else
nr <- as.double(range_na[2])-as.double(range_na[1])+1
d <- (nx+ny) / nr
if (nr==0L || nr > .Machine$integer.max || d < 1/64 ){
ret <- setdiff(x,y)
}else{
ret <- .Call(C_R_bit_setdiff, tmp=bit(nr), x, y, range_na)
}
ret
}
#' @describeIn bit_setops symmetricx difference
#' @export
bit_symdiff <- function(x, y){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!(is.integer(y) || is.factor(y)))
stop("y must be integer (or factor)")
nx <- length(x)
ny <- length(y)
rx <- range_na(x)
ry <- range_na(y)
range_na <- c(min(rx[1],ry[1]), max(rx[2],ry[2]), rx[3]+ry[3])
if (max(rx[1],ry[1])>min(rx[2],ry[2])){
if ( range_na[3]>0 && (rx[3]>0) == (rx[3]>0) )
return(bit_unique(c(x,y), range_na=range_na))
else
return(c(bit_unique(x, range_na=rx), bit_unique(y, range_na=ry)))
}
if (is.na(range_na[1]))
nr <- 0L # no numbers, no temp bit vector
else
nr <- as.double(range_na[2])-as.double(range_na[1])+1
d <- (nx+ny) / nr
if (nr==0L || nr > .Machine$integer.max || d < 1/64 ){
ret <- union(setdiff(x,y),setdiff(y,x))
}else{
ret <- .Call(C_R_bit_symdiff, bit(nr), bit(nr), x, y, range_na, rx[3]>0, ry[3]>0)
}
ret
}
#' @describeIn bit_setops equality
#' @export
bit_setequal <- function(x, y){
if (!(is.integer(x) || is.factor(x)))
stop("x must be integer (or factor)")
if (!(is.integer(y) || is.factor(y)))
stop("y must be integer (or factor)")
nx <- length(x)
ny <- length(y)
rx <- range_na(x)
ry <- range_na(y)
rx[3] <- as.logical(rx[3]) # map NA count to 0/1
ry[3] <- as.logical(ry[3]) # map NA count to 0/1
if (!identical(rx,ry))
return(FALSE)
range_na <- rx
if (is.na(range_na[1]))
nr <- 0L # no numbers, no temp bit vector
else
nr <- as.double(range_na[2])-as.double(range_na[1])+1
d <- (nx+ny) / nr
if (nr==0L || nr > .Machine$integer.max || d < 1/256 ){
ret <- setequal(x,y)
}else{
ret <- .Call(C_R_bit_setequal, bit(nr), bit(nr), x, y, range_na)
}
ret
}
#' bit range difference
#'
#' Fast version of \code{setdiff(rx[1]:rx[2], y)}.
#'
#' determines the range of the integers \code{y} and checks if the density justifies use
#' of a bit vector; if yes, uses a bit vector for the set operation; if no,
#' falls back to a quicksort and \code{\link{merge_rangediff}}
#'
#' @param y an integer vector of elements to exclude
#' @param revx \code{FALSE} as is, \code{TRUE} to reverse the direction and sign of \code{rx}
#' @param revy \code{FALSE} as is, \code{TRUE} to reverse the direction and sign of \code{y}
#' @inheritParams merge_rev
#'
#' @return an integer vector
#' @seealso \code{\link{bit_setdiff}}, \code{\link{merge_rangediff}}
#'
#' @examples
#' bit_rangediff(c(1L,6L), c(3L,4L))
#' bit_rangediff(c(6L,1L), c(3L,4L))
#' bit_rangediff(c(6L,1L), c(3L,4L), revx=TRUE)
#' bit_rangediff(c(6L,1L), c(3L,4L), revx=TRUE, revy=TRUE)
#' @export
bit_rangediff <- function(rx, y, revx=FALSE, revy=FALSE){
if (!is.integer(y))
stop("y must be integer")
if (!is.ri(rx)){
stopifnot(length(rx)==2)
rx <- as.integer(rx)
}
if (rx[1]>rx[2])
ox <- as.double(rev(rx))
else
ox <- as.double(rx)
n <- ox[2]-ox[1]+1L
d <- length(y) / n
if (n > .Machine$integer.max || d < 1/64 ){
ox <- as.integer(ox)
y <- .Call(C_R_int_quicksort2, copy_vector(y), c(ox, 0L), na.last=NA)
ret <- merge_rangediff(ox, y, revx=revx, revy=revy)
if (rx[1]>rx[2])
ret <- rev(ret)
ret
}else{
.Call(C_R_bit_rangediff, tmp=bit(n), as.integer(rx), y, as.logical(revx), as.logical(revy))
}
}
# /* some experiments - just ignore
# bit_sample1 <- function (x, size, replace = FALSE, prob = NULL){
# if (length(x)==1L){
# x <- seq_len(x)
# } else{
# x <- as.integer(x)
# }
# size <- as.integer(size)
# if (replace)
# stop("replace not implemented")
# else{
# stopifnot(size <= length(x))
# .Call(C_R_bit_sample, x, bit(length(x)), size)
# }
# }
# bit_sample <- function (x, size=length(x), replace = FALSE, prob = NULL){
# if (length(x)==1L){
# x <- seq_len(x)
# } else{
# x <- as.integer(x)
# }
# size <- as.integer(size)
# replace <- as.logical(replace)
# if (!replace && size>length(x))
# stop("size > length(x) without replace")
# .Call(C_R_bit_sample, x, size, replace)
# }
# */
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.