# hybrid index packing with run length encoding
# (c) 2007 Jens Oehlschägel
# Licence: GPL2
# Provided 'as is', use at your own risk
# Created: 2007-08-24
# Last changed: 2007-10-25
# source("d:/mwp/eanalysis/ff/R/hi.R")
# -- creating a hi object directly ----------------------------------------------------------------------
#! \name{hi}
#! \alias{hi}
#! \alias{print.hi}
#! \alias{str.hi}
#! \title{ Hybrid index class }
#! \description{
#! Class for hybrid index representation, plain and rle-packed
#! }
#! \usage{
#! hi(from, to, by = 1L, maxindex = NA, vw = NULL, pack = TRUE, NAs = NULL)
#! \method{print}{hi}(x, \dots)
#! \method{str}{hi}(object, nest.lev=0, \dots)
#! }
#! \arguments{
#! \item{from}{ integer vector of lower sequence bounds }
#! \item{to}{ integer vector of upper sequence bounds }
#! \item{by}{ integer of stepsizes }
#! \item{maxindex}{ maximum indep position (needed for negative indices) }
#! \item{vw}{ virtual window information, see \code{\link{vw}} }
#! \item{pack}{ FALSE to suppress rle-packing }
#! \item{NAs}{ a vector of NA positions (not yet used) }
#! \item{x}{ an object of class 'hi' to be printed }
#! \item{object}{ an object of class 'hi' to be str'ed }
#! \item{nest.lev}{ current nesting level in the recursive calls to str }
#! \item{\dots}{ further arguments passed to the next method }
#! }
#! \details{
#! Class \code{hi} will represent index data either as a plain positive or negative index vector or as an rle-packed version thereof.
#! The current implementation switches from plain index positions \code{i} to rle-packed storage of \code{diff(i)} as soon as the compression ratio is 3 or higher.
#! Note that sequences shorter than 2 must never be packed (could cause C-side crash).
#! Furthermore hybrid indices are guaranteed to be sorted ascending, which helps \code{\link{ff}s} access method avoiding to swap repeatedly over the same memory pages (or file positions).
#! }
#! \value{
#! A list of class 'hi' with components
#! \item{ x }{ directly accessed by the C-code: the sorted index of class 'rlepack' as returned by \code{\link[bit]{rlepack}} }
#! \item{ ix }{ NULL or positions to restore original order }
#! \item{ re }{ logical scalar indicating if sequence was reversed from descending to ascending (in this case \code{is.null(ix)}) }
#! \item{ minindex }{ directly accessed by the C-code: represents the lowest positive subscript to be enumerated in case of negative subscripts }
#! \item{ maxindex }{ directly accessed by the C-code: represents the highest positive subscript to be enumerated in case of negative subscripts }
#! \item{ length }{ number of subscripts, whether negative or positive, not the number of selected elements }
#! \item{ dim }{ NULL or dim -- used by \code{\link{as.matrix.hi}} }
#! \item{ dimorder }{ NULL or \code{\link{dimorder}} }
#! \item{ symmetric }{ logical scalar indicating whether we have a symmetric matrix }
#! \item{ fixdiag }{ logical scalar indicating whether we have a fixed diagonal (can only be true for symmetric matrices) }
#! \item{ vw }{ virtual window information \code{\link{vw}} }
#! \item{ NAs }{ NULL or NA positions as returned by \code{\link[bit]{rlepack}} }
#! }
#! \author{ Jens Oehlschlägel }
#! \note{ \command{hi} defines the class structure, however usually \code{\link{as.hi}} is used to acturally Hybrid Index Preprocessing for \code{\link{ff}} }
#! \seealso{ \code{\link{as.hi}} for coercion, \code{\link[bit]{rlepack}}, \code{\link[bit]{intrle}}, \code{\link[bit:maxindex.default]{maxindex}}, \code{\link[bit:maxindex.default]{poslength}} }
#! \examples{
#! hi(c(1, 11, 29), c(9, 19, 21), c(1,1,-2))
#! as.integer(hi(c(1, 11, 29), c(9, 19, 21), c(1,1,-2)))
#! }
#! \keyword{ IO }
#! \keyword{ data }
hi <- function (from, to, by = 1L, maxindex = NA, vw=NULL, pack = TRUE, NAs = NULL)
{
minindex <- 1L
maxindex <- as.integer(maxindex)
if (is.null(vw)){
vw.convert <- FALSE
}else{
if (is.matrix(vw))
stop("matrix vw not allowed in hi, use as.hi")
storage.mode(vw) <- "integer"
vw.convert <- TRUE
}
nspec <- length(from)
if (nspec > 0) {
from <- as.integer(from)
to <- rep(as.integer(to), length.out = nspec)
by <- rep(as.integer(by), length.out = nspec)
d <- to - from
N <- d%/%by
if (any(d != 0 & sign(d) != sign(by)) || any(N * by != d))
stop("illegal input to hi")
if (nspec > 1 || d[1] != 0){
l <- as.vector(rbind(rep(1L, nspec), N))[-1]
v <- as.vector(rbind(c(0L, from[-1] - to[-nspec]), by))[-1]
v <- v[l > 0]
l <- l[l > 0]
from <- from[1]
to <- to[nspec]
nl <- length(l)
r <- list(lengths = l, values = v)
n <- sum(r$lengths) + 1L
tab <- tabulate(sign(r$values) + 2, 3)
s <- !tab[1] || !tab[3]
if (s) { # sorted
#if (nl) {
# if (pack)
# pack <- 2 * nl < n
#}else
# pack <- FALSE
#if (pack){
class(r) <- "rle"
#}else{
# r <- as.integer(cumsum(c(from, rep(r$values, r$lengths))))
#}
x <- list(first = from, dat = r, last = to)
class(x) <- "rlepack"
ix <- NULL
re <- tab[1] > 0
if (re)
x <- rev(x) # rev.rlepack
}else{
re <- FALSE
x <- as.integer(cumsum(c(from, rep(r$values, r$lengths))))
x <- sort.int(x, index.return = TRUE, method = "quick")
ix <- x$ix
x <- rlepack(x$x, pack = pack)
#ix <- seq_len(n)
#radixorder(x, ix)
#x <- rlepack(x[ix], pack = pack)
}
}else{
# nspec==1 && d[1] != 0
re <- FALSE
ix <- NULL
x <- list(first = from[1], dat = from[1], last = from[1])
class(x) <- "rlepack"
}
x <- unique(x) # unique.rlepack
# this ifelse section copied 1L to as.hi.call
if (x$last < 0) {
if (is.na(maxindex))
stop("maxindex is required with negative subscripts")
if ( -x$first > maxindex )
stop("negative subscripts out of range")
re <- FALSE
ix <- NULL
if (vw.convert){
x$first <- x$first - vw[1]
x$last <- x$last - vw[1]
if (inherits(x$dat, "rle")){
n <- sum(x$dat$lengths) + 1L
}else{
x$dat <- x$dat - vw[1]
n <- length(x$dat)
}
}else{
if (inherits(x$dat, "rle")){
n <- sum(x$dat$lengths) + 1L
}else{
n <- length(x$dat)
}
}
}else if (x$first > 0){
if (!is.na(maxindex) && x$last > maxindex )
stop("positive subscripts out of range")
if (vw.convert){
x$first <- vw[1] + x$first
x$last <- vw[1] + x$last
if (inherits(x$dat, "rle")){
n <- sum(x$dat$lengths) + 1L
}else{
x$dat <- vw[1] + x$dat
n <- length(x$dat)
}
}else{
if (inherits(x$dat, "rle")){
n <- sum(x$dat$lengths) + 1L
}else{
n <- length(x$dat)
}
}
}else{
stop("0s and mixed positive/negative subscripts not allowed")
}
}else{
x <- list(first = NA_integer_, dat = integer(), last = NA_integer_)
re <- FALSE
ix <- NULL
n <- 0L
minindex <- 1L
maxindex <- as.integer(maxindex)
}
if (!is.null(NAs))
NAs <- rlepack(as.integer(NAs), pack = pack)
if (!is.null(vw)){
minindex <- vw[1] + 1L
maxindex <- vw[1] + vw[2]
}
ret <- list(
x = x # directly accessed by the C-code: hybrid index, i.e. either raw or rle
, ix = ix # NULL or positions for re-ordering
, re = re # logical indicating whether sequence was reversed from descending to ascending
, minindex = minindex # directly accessed by the C-code: represents the lowest positive subscript to be enumerated in case of negative subscripts
, maxindex = maxindex # directly accessed by the C-code: represents the highest positive subscript to be enumerated in case of negative subscripts
, length = n # number of subscripts, whether negative or positive
, dim = NULL # NULL or dim
, dimorder = NULL # NULL or dimorder
, symmetric = FALSE # logical indicating whether we have a symmetric matrix
, fixdiag = NULL # logical indicating whether we have a fixed diagonal (can only be true for symmetric matrices)
, vw = vw # NULL or OffsetWindowRest definition
, NAs = NAs # NULL or positive positions of NAs
)
class(ret) <- "hi"
ret
}
print.hi <- function(x, ...){
cat("hybrid index (hi) from ", x$x$first, " to ", x$x$last, " over ", if (inherits(x$x$dat, "rle")) "<rle position diffs>" else "<plain positions>", " re=", x$re, " ix=", if(is.null(x$ix)) "NULL" else "<reverse sort info>", "\n", sep="")
cat("minindex=", x$minindex, " maxindex=", x$maxindex, " length=", x$length, " poslength=", poslength(x), "\n", sep="")
if (!is.null(x$dim)){
cat("dim=c(", paste(x$dim, collapse=","), "), dimorder=c(", paste(x$dimorder, collapse=","), ")\n", sep="")
}
if (!is.null(x$vw)){
cat("vw=")
print(x$vw, ...)
}
invisible()
}
str.hi <- function(object, nest.lev=0, ...){
nest.str <- paste(rep(" ..", nest.lev), collapse="")
str(unclass(object), nest.lev=nest.lev, ...)
cat(nest.str, ' - attr(*, "class") = ', sep="")
str(class(object), nest.lev=nest.lev, ...)
}
# -- coerce to hi object ----------------------------------------------------------------------
#! \name{hiparse}
#! \alias{hiparse}
#! \title{ Hybrid Index, parsing }
#! \description{
#! \command{hiparse} implements the parsing done in Hybrid Index Preprocessing in order to avoid RAM for expanding index expressions.
#! \emph{Not to be called directly}
#! }
#! \usage{
#! hiparse(x, envir, first = NA_integer_, last = NA_integer_)
#! }
#! \arguments{
#! \item{x}{ an index expression, precisely: \code{\link{call}} }
#! \item{envir}{ the environemtn in which to evaluate components of the index expression }
#! \item{first}{ first index position found so far }
#! \item{last}{ last index position found so far }
#! }
#! \details{
#! This primitive parser recognises the following tokens: numbers like 1, symbols like x, the colon sequence operator \code{\link{:}} and the concat operator \code{\link{c}}.
#! \code{hiparse} will \code{\link{Recall}} until the index expression is parsed or an unknown token is found.
#! If an unknown token is found, \code{hiparse} evluates it, inspects it and either accepts it or throws an error, catched by \code{\link{as.hi.call}},
#! which falls back to evaluating the index expression and dispatching (again) an appropriate \code{\link{as.hi}} method.
#! Reasons for suspending the parsing: if the inspected token is of class 'hi', 'ri', 'bit', 'bitwhich', 'is.logical', 'is.character', 'is.matrix' or has length>16.
#! }
#! \value{
#! undefined (and redefined as needed by \code{\link{as.hi.call}})
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{hi}}, \code{\link{as.hi.call}} }
#! \keyword{ IO }
#! \keyword{ data }
hiparse <- function(x, envir, first=NA_integer_, last=NA_integer_){
if (length(x)>1){
if (x[[1]]=='c'){
values <- integer()
lengths <- integer()
n <- length(x)
i <- 1
while(i<n){
i <- i + 1
r <- Recall(x[[i]], envir, first=first, last=last)
first <- r$first
last <- r$last
values <- c(values, r$values)
lengths <- c(lengths, r$lengths)
}
return(list(first=first, lengths=lengths, values=values, last=last))
}else if (x[[1]]==':'){
from <- eval(x[[2]], envir=envir)
to <- eval(x[[3]], envir=envir)
if (is.logical(from) || is.logical(to))
stop("as.hi.default:hiparse logicals encountered")
if (length(from)!=1 || length(to)!=1)
stop("as.hi.default:hiparse: arguments of : have length!=1")
from <- as.integer(from)
to <- as.integer(to)
if ( is.na(from) || is.na(to) || from==0 || to==0 )
stop("as.hi.default:hiparse NAs or 0s encountered")
if (is.na(first))
first <- from
if (is.na(last)){
if (from>to)
return(list(first=first, lengths=from-to, values=as.integer(-1), last=to))
else
return(list(first=first, lengths=to-from, values=as.integer(1), last=to))
}else{
if (from>to)
return(list(first=first, lengths=c(as.integer(1), from-to), values=c(from-last, as.integer(-1)), last=to))
else
return(list(first=first, lengths=c(as.integer(1), to-from), values=c(from-last, as.integer(1)), last=to))
}
}
}
x <- eval(x, envir=envir)
if (inherits(x,"hi"))
stop("DEBUGINFO visible when try(..., silent=FALSE) in as.hi.call: as.hi.default:hiparse found hi")
if (inherits(x,"ri"))
stop("DEBUGINFO visible when try(..., silent=FALSE) in as.hi.call: as.hi.default:hiparse found ri")
if (inherits(x,"bit"))
stop("DEBUGINFO visible when try(..., silent=FALSE) in as.hi.call: as.hi.default:hiparse found bit")
if (inherits(x,"bitwhich"))
stop("DEBUGINFO visible when try(..., silent=FALSE) in as.hi.call: as.hi.default:hiparse found bitwhich")
if (is.logical(x))
stop("DEBUGINFO visible when try(..., silent=FALSE) in as.hi.call: as.hi.default:hiparse found logical")
if (is.character(x))
stop("DEBUGINFO visible when try(..., silent=FALSE) in as.hi.call: as.hi.default:hiparse found character")
if (is.matrix(x))
stop("DEBUGINFO visible when try(..., silent=FALSE) in as.hi.call: as.hi.default:hiparse found matrix")
n <- length(x)
if (n>16)
stop("DEBUGINFO visible when try(..., silent=FALSE) in as.hi.call: as.hi.default:hiparse found length>16")
if (n){
x <- as.integer(x)
if (is.na(first))
first <- x[1]
if (is.na(last)){
r <- rle(diff(x)) # using standard rle here because x is small and we always need a result
}else{
r <- rle(diff(c(last, x))) # using standard rle here because x is small and we always need a result
}
if (is.na(intisasc(r$values)))
stop("as.hi.default:hiparse found NAs")
last <- x[n]
return(list(first=first, lengths=r$lengths, values=r$values, last=last))
}else{
return(list(first=first, lengths=integer(), values=integer(), last=last))
}
}
#! \name{as.hi}
#! \alias{as.hi}
#! \alias{as.hi.NULL}
#! \alias{as.hi.hi}
#! \alias{as.hi.ri}
#! \alias{as.hi.bit}
#! \alias{as.hi.bitwhich}
#! \alias{as.hi.call}
#! \alias{as.hi.name}
#! \alias{as.hi.(}
#! \alias{as.hi.integer}
#! \alias{as.hi.which}
#! \alias{as.hi.double}
#! \alias{as.hi.logical}
#! \alias{as.hi.character}
#! \alias{as.hi.matrix}
#! \title{ Hybrid Index, coercion to }
#! \description{
#! The generic \command{as.hi} and its methods are the main (internal) means for preprocessing index information into the hybrid index class \code{\link{hi}}.
#! Usually \command{as.hi} is called transparently from \code{\link{[.ff}}. However, you can explicitely do the index-preprocessing,
#! store the Hybrid Index \code{\link{hi}}, and use the \code{hi} for subscripting.
#! }
#! \usage{
#! as.hi(x, \dots)
#! \method{as.hi}{NULL}(x, \dots)
#! \method{as.hi}{hi}(x, \dots)
#! \method{as.hi}{ri}(x, maxindex = length(x), \dots)
#! \method{as.hi}{bit}(x, range = NULL, maxindex = length(x), vw = NULL
#! , dim = NULL, dimorder = NULL, pack = TRUE, \dots)
#! \method{as.hi}{bitwhich}(x, maxindex = length(x), pack = FALSE, \dots)
#! \method{as.hi}{call}(x, maxindex = NA, dim = NULL, dimorder = NULL, vw = NULL
#! , vw.convert = TRUE, pack = TRUE, envir = parent.frame(), \dots)
#! \method{as.hi}{name}(x, envir = parent.frame(), \dots)
#! %\method{as.hi}{(}(x, envir = parent.frame(), \dots)
#! \method{as.hi}{integer}(x, maxindex = NA, dim = NULL, dimorder = NULL
#! , symmetric = FALSE, fixdiag = NULL, vw = NULL, vw.convert = TRUE
#! , dimorder.convert = TRUE, pack = TRUE, NAs = NULL, \dots)
#! \method{as.hi}{which}(x, \dots)
#! \method{as.hi}{double}(x, \dots)
#! \method{as.hi}{logical}(x, maxindex = NA, dim = NULL, vw = NULL, pack = TRUE, \dots)
#! \method{as.hi}{character}(x, names, vw = NULL, vw.convert = TRUE, \dots)
#! \method{as.hi}{matrix}(x, dim, dimorder = NULL, symmetric = FALSE, fixdiag = NULL
#! , vw = NULL, pack = TRUE, \dots)
#! }
#! \arguments{
#! \item{x}{ an appropriate object of the class for which we dispatched }
#! \item{envir}{ the environment in which to evaluate components of the index expression }
#! \item{maxindex}{ maximum positive indexposition \code{maxindex}, is needed with negative indices, if vw or dim is given, maxindex is calculated automatically }
#! \item{names}{ the \code{\link[=names.ff]{names}} of the indexed vector for character indexing }
#! \item{dim}{ the \code{\link[=dim.ff]{dim}} of the indexed matrix to be stored within the \code{\link{hi}} object }
#! \item{dimorder}{ the \code{\link{dimorder}} of the indexed matrix to be stored within the \code{\link{hi}} object, may convert interpretation of \code{x} }
#! \item{symmetric}{ the \code{\link{symmetric}} of the indexed matrix to be stored within the \code{\link{hi}} object }
#! \item{fixdiag}{ the \code{\link{fixdiag}} of the indexed matrix to be stored within the \code{\link{hi}} object }
#! \item{vw}{ the virtual window \code{\link{vw}} of the indexed vector or matrix to be stored within the \code{\link{hi}} object, see details }
#! \item{vw.convert}{ FALSE to prevent doubly virtual window conversion, this is needed for some internal calls that have done the virtual window conversion already, see details }
#! \item{dimorder.convert}{ FALSE to prevent doubly dimorder conversion, this is needed for some internal calls that have done the dimorder conversion already, see details }
#! \item{NAs}{ a vector of NA positions to be stored \code{\link[bit]{rlepack}ed}, not fully supported yet }
#! \item{pack}{ FALSE to prevent \code{\link[bit]{rlepack}ing}, note that this is a hint rather than a guarantee, \code{\link[=as.hi]{as.hi.bit}} might ignore this }
#! \item{range}{ NULL or a vector with two elements indicating first and last position to be converted from 'bit' to 'hi' }
#! \item{\dots}{ further argument passed from generic to method or from wrapper method to \code{\link[=as.hi]{as.hi.integer}} }
#! }
#! \details{
#! The generic dispatches appropriately, \code{\link[=as.hi]{as.hi.hi}} returns an \code{\link{hi}} object unchanged,
#! \code{\link[=as.hi]{as.hi.call}} tries to \code{\link{hiparse}} instead of evaluate its input in order to save RAM.
#! If parsing is successfull \code{\link[=as.hi]{as.hi.call}} will ignore its argument \code{pack} and always pack unless the subscript is too small to do so.
#! If parsing fails it evaluates the index expression and dispatches again to one of the other methods.
#! \code{\link[=as.hi]{as.hi.name}} and \code{\link[=as.hi]{as.hi.(}} are wrappers to \code{\link[=as.hi]{as.hi.call}}.
#! \code{\link[=as.hi]{as.hi.integer}} is the workhorse for coercing evaluated expressions
#! and \code{\link[=as.hi]{as.hi.which}} is a wrapper removing the \code{which} class attribute.
#! \code{\link[=as.hi]{as.hi.double}}, \code{\link[=as.hi]{as.hi.logical}} and \code{\link[=as.hi]{as.hi.character}} are also wrappers to \code{\link[=as.hi]{as.hi.integer}},
#! but note that \code{\link[=as.hi]{as.hi.logical}} is not memory efficient because it expands \emph{all} positions and then applies logical subscripting.
#! \cr
#! \code{\link[=as.hi]{as.hi.matrix}} calls \code{\link{arrayIndex2vectorIndex}} and then \code{\link[=as.hi]{as.hi.integer}} to interpret and preprocess matrix indices.
#! \cr
#! If the \code{dim} and \code{dimorder} parameter indicate a non-standard dimorder (\code{\link{dimorderStandard}}), the index information in \code{x} is converted from a standard dimorder interpretation to the requested \code{\link{dimorder}}.
#! \cr
#! If the \code{vw} parameter is used, the index information in \code{x} is interpreted relative to the virtual window but stored relative to the abolute origin.
#! Back-coercion via \code{\link{as.integer.hi}} and friends will again return the index information relative to the virtual window, thus retaining symmetry and transparency of the viurtual window to the user.
#! \cr
#! You can use \code{\link[=length.hi]{length}} to query the index length (possibly length of negative subscripts),
#! \code{\link[=length.hi]{poslength}} to query the number of selected elements (even with negative subscripts),
#! and \code{\link[=length.hi]{maxindex}} to query the largest possible index position (within virtual window, if present)
#! \cr
#! Duplicated negative indices are removed and will not be recovered by \code{\link{as.integer.hi}}.
#! }
#! \value{
#! an object of class \code{\link{hi}}
#! }
#! \author{ Jens Oehlschlägel }
#! \note{ Avoid changing the Hybrid Index representation, this might crash the \code{\link{[.ff}} subscripting. }
#! \seealso{ \code{\link{hi}} for the Hybrid Index class, \code{\link{hiparse}} for parsing details, \code{\link{as.integer.hi}} for back-coercion, \code{\link{[.ff}} for ff subscripting }
#! \examples{
#! message("integer indexing with and without rel-packing")
#! as.hi(1:12)
#! as.hi(1:12, pack=FALSE)
#! message("if index is double, the wrapper method just converts to integer")
#! as.hi(as.double(1:12))
#! message("if index is character, the wrapper method just converts to integer")
#! as.hi(c("a","b","c"), names=letters)
#! message("negative index must use maxindex (or vw)")
#! as.hi(-(1:3), maxindex=12)
#! message("logical index can use maxindex")
#! as.hi(c(FALSE, FALSE, TRUE, TRUE))
#! as.hi(c(FALSE, FALSE, TRUE, TRUE), maxindex=12)
#!
#! message("matrix index")
#! x <- matrix(1:12, 6)
#! as.hi(rbind(c(1,1), c(1,2), c(2,1)), dim=dim(x))
#!
#! message("first ten positions within virtual window")
#! i <- as.hi(1:10, vw=c(10, 80, 10))
#! i
#! message("back-coerce relativ to virtual window")
#! as.integer(i)
#! message("back-coerce relativ to absolute origin")
#! as.integer(i, vw.convert=FALSE)
#!
#! message("parsed index expressions save index RAM")
#! as.hi(quote(1:1000000000))
#! \dontrun{
#! message("compare to RAM requirement when the index experssion is evaluated")
#! as.hi(1:1000000000)
#! }
#!
#! message("example of parsable index expression")
#! a <- seq(100, 200, 20)
#! as.hi(substitute(c(1:5, 4:9, a)))
#! hi(c(1,4, 100),c(5,9, 200), by=c(1,1,20))
#!
#! message("two examples of index expression temporarily expanded to full length due to
#! non-supported use of brackets '(' and mathematical operators '+' accepting token")
#! message("example1: accepted token but aborted parsing because length>16")
#! as.hi(quote(1+(1:16)))
#! message("example1: rejected token and aborted parsing because length>16")
#! as.hi(quote(1+(1:17)))
#! }
#! \keyword{ IO }
#! \keyword{ data }
as.hi.NULL <- function(x, ...){
structure(list(x = structure(list(first = NA_integer_, dat = integer(0),
last = NA_integer_), .Names = c("first", "dat", "last"), class = "rlepack"),
ix = NULL, re = FALSE, minindex = 1L, maxindex = 0L,
length = 0L, dim = NULL, dimorder = NULL, symmetric = FALSE,
fixdiag = NULL, vw = NULL, NAs = NULL), .Names = c("x", "ix",
"re", "minindex", "maxindex", "length", "dim", "dimorder", "symmetric",
"fixdiag", "vw", "NAs"), class = "hi")
}
# xx temporary compatibility function setting the rlepack class
as.hi.hi <- function(x, ...){
if (!inherits(x$x, "rlepack"))
class(x$x) <- "rlepack"
x
}
as.hi.name <- function(x, envir=parent.frame(), ...){
as.hi(eval(x, envir=envir), ...)
}
"as.hi.(" <- function(x, envir=parent.frame(), ...){
as.hi.call(x[[2]], envir=envir, ...)
}
as.hi.call <- function(
x
, maxindex = NA_integer_
, dim = NULL
, dimorder = NULL
, vw = NULL
, vw.convert = TRUE
, pack = TRUE
, envir = parent.frame()
, ...
){
if ((!is.null(dim) && !dimorderStandard(dimorder)) || !is.null(dim(vw)))
return(as.hi(eval(x, envir=envir), maxindex=maxindex, dim=dim, dimorder=dimorder, vw=vw, vw.convert=vw.convert, pack=pack, ...))
#message("DEBUGINFO: trying hiparse")
r <- try(hiparse(x, envir=envir), silent=TRUE)
if (inherits(r,"try-error")){
#message("DEBUGINFO: hiparse failed, evaluating the index expression and dispatching again")
return(as.hi(eval(x, envir=envir), maxindex=maxindex, dim=dim, dimorder=dimorder, vw=vw, vw.convert=vw.convert, pack=pack, ...))
}
if (is.null(vw))
vw.convert <- FALSE
else{
storage.mode(vw) <- "integer"
}
minindex <- 1L
if (is.na(maxindex)){
if(is.null(dim))
maxindex <- maxindex(x)
else
maxindex <- as.integer(prod(dim))
}else{
maxindex <- as.integer(maxindex)
}
if (is.na(r$first)){
x <- rlepack(integer())
ix <- NULL
re <- FALSE
n <- 0L
}else{
nl <- length(r$lengths)
n <- sum(r$lengths) + 1L
# test for sorted
tab <- tabulate(sign(r$values)+2, 3)
if (tab[1] && tab[3]){ # unsorted in both directions
re <- FALSE
x <- as.integer(cumsum(c(r$first, rep(r$values, r$lengths))))
x <- sort.int(x, index.return=TRUE, method="quick")
ix <- x$ix
x <- rlepack(x$x, pack=pack)
#ix <- seq_len(n)
#radixorder(x, ix)
#x <- rlepack(x[ix], pack = pack)
}else{
if (nl){
pack <- 2*length(r$lengths)<n
}else
pack <- FALSE
if (pack){
dat <- list(lengths=r$lengths, values=r$values)
class(dat) <- "rle"
}else{
dat <- as.integer(cumsum(c(r$first, rep(r$values, r$lengths))))
}
x <- list(first=r$first, dat=dat, last=r$last)
class(x) <- "rlepack"
ix <- NULL
if (tab[1]){ # sorted descending
re <- TRUE
x <- rev(x) # rev.rlepack
}else{ # sorted ascending
re <- FALSE
}
}
# xx this ifelse section copied 1L from hi
if (x$last < 0) {
if (is.na(maxindex))
stop("maxindex is required with negative subscripts")
if ( -x$first > maxindex )
stop("negative subscripts out of range")
re <- FALSE
ix <- NULL
if (vw.convert){
x$first <- x$first - vw[1]
x$last <- x$last - vw[1]
if (inherits(x$dat, "rle")){
n <- sum(x$dat$lengths) + 1L
}else{
x$dat <- x$dat - vw[1]
n <- length(x$dat)
}
}else{
if (inherits(x$dat, "rle")){
n <- sum(x$dat$lengths) + 1L
}else{
n <- length(x$dat)
}
}
}else if (x$first > 0){
if (!is.na(maxindex) && x$last > maxindex )
stop("positive subscripts out of range")
if (vw.convert){
x$first <- vw[1] + x$first
x$last <- vw[1] + x$last
if (inherits(x$dat, "rle")){
n <- sum(x$dat$lengths) + 1L
}else{
x$dat <- vw[1] + x$dat
n <- length(x$dat)
}
}else{
if (inherits(x$dat, "rle")){
n <- sum(x$dat$lengths) + 1L
}else{
n <- length(x$dat)
}
}
}else{
stop("0s and mixed positive/negative subscripts not allowed")
}
}
if (!is.null(vw)){
if (is.null(dim)){
# minindex..maxindex represents the window of allowed values (used by the C-code in case of negative subscripts for enumerating all positive subscripts)
minindex <- vw[1] + 1L
maxindex <- vw[1] + vw[2]
}else{
# NOTE that negative subscripts cannot be handled in a (vw && dim)-context (enumerating all positive subscripts is not simply minindex..maxindex)
minindex <- 1L
maxindex <- as.integer(prod(colSums(vw)))
}
}
ret <- list(
x = x
, ix = ix
, re = re
, minindex = minindex
, maxindex = maxindex
, length = n
, dim = NULL
, dimorder = NULL
, symmetric = FALSE
, fixdiag = NULL
, vw = vw
, NAs = NULL
)
class(ret) <- "hi"
return(ret)
}
as.hi.integer <- function(
x
, maxindex = NA_integer_
, dim = NULL
, dimorder = NULL
, symmetric = FALSE
, fixdiag = NULL
, vw = NULL
, vw.convert = TRUE # as.hi.matrix sets this to false in order to avoid applying vw twice
, dimorder.convert = TRUE # as.hi.matrix sets this to false in order to avoid dimorder conversion twice
, pack = TRUE
, NAs = NULL
, ... # dummy to keep R CMD check quiet
){
n <- length(x)
if (is.null(vw))
vw.convert <- FALSE
else{
storage.mode(vw) <- "integer"
if (is.null(dim) && !is.null(dim(vw)))
dim <- vw[2,]
}
# these are still the limits to be checked from user perspective
minindex <- 1L
if (is.na(maxindex)){
if(is.null(dim))
maxindex <- maxindex(x)
else
maxindex <- as.integer(prod(dim))
}else{
maxindex <- as.integer(maxindex)
}
if (n){
if (is.null(dim) || dimorderStandard(dimorder))
dimorder.convert <- FALSE
prechecked <- dimorder.convert || (vw.convert && !( is.null(dim) || dimorderStandard(dimorder) ))
if (prechecked){
# need dimorder conversion, i.e., sorting pre-conversion != sorting post-conversion
# since we have no sorting pre-conversion, we cannot simply check the most extreme value, we have to check all values
# therefor we check all values pre-conversion
if (all(x<0, na.rm=TRUE)){
if (any(x < -maxindex, na.rm=TRUE))
stop("negative subscripts out of range")
x <- seq_len(maxindex)[x] # convert to positive indexes because we cannot enumerate
}else if (all(x>0, na.rm=TRUE)){
if (any(x > maxindex, na.rm=TRUE))
stop("positive subscripts out of range")
}else
stop("0s and mixed positive/negative subscripts not allowed")
x <- arrayIndex2vectorIndex(vectorIndex2arrayIndex(x, dim=dim), dim=dim, dimorder=dimorder, vw=vw)
vw.convert <- FALSE
# these are already the final limits from file perspective (make sure the converted values pass the (redundant) test further down)
if (is.null(vw))
maxindex <- prod(dim)
else
maxindex <- prod(colSums(vw))
}
isasc <- intisasc(x)
if (is.na(isasc))
stop("NAs in as.hi.integer")
if (isasc){
ix <- NULL
re <- FALSE
}else{
if (intisdesc(x)){
x <- rev(x)
ix <- NULL
re <- TRUE
}else{
x <- sort.int(x, index.return=TRUE, method="quick")
ix <- x$ix
x <- x$x
#ix <- seq_len(n)
#radixorder(x, ix)
#x <- x[ix]
re <- FALSE
}
}
# after sorting the range-checks can be done on the extremes only
if (x[n]<0){ # not possible after prechecked
if (is.na(maxindex)){
if (vw.convert && is.null(dim))
maxindex <- vw[[2]]
else
stop("maxindex is required with negative subscripts")
}
if ( -x[1] > maxindex )
stop("negative subscripts out of range")
ix <- NULL
re <- FALSE
x <- unique(x)
n <- length(x)
if (vw.convert){
# convert window positions to absolute positions
if (is.null(dim)){
x <- x - vw[1]
}else{
# (vw && dim)-context: convert negative indices to positive ones
x <- seq_len(maxindex)[x]
n <- length(x)
if (n)
x <- arrayIndex2vectorIndex(vectorIndex2arrayIndex(x, dim=dim, dimorder=dimorder), dimorder=dimorder, vw=vw)
}
}
}else if (x[1]>0){
if ( !is.na(maxindex) && x[n] > maxindex )
stop("positive subscripts out of range")
if (vw.convert){
# convert window positions to absolute positions
if (is.null(dim)){
x <- vw[1] + x
}else{
x <- arrayIndex2vectorIndex(vectorIndex2arrayIndex(x, dim=dim, dimorder=dimorder), dimorder=dimorder, vw=vw)
}
}
}else{
stop("0s and mixed positive/negative subscripts not allowed")
}
x <- rlepack(x, pack=pack)
#We could restrict compression to the case where we spent time and RAM on sorting anyhow
#x <- rlepack(x, pack=if (is.null(ix)) FALSE else pack)
}else{ # no data
x <- rlepack(integer())
ix <- NULL
re <- FALSE
}
# fix the final limits
if (!is.null(vw)){
if (is.null(dim)){
# minindex..maxindex represents the window of allowed values (used by the C-code in case of negative subscripts for enumerating all positive subscripts)
minindex <- vw[1] + 1L
maxindex <- vw[1] + vw[2]
}else{
# NOTE that negative subscripts cannot be handled in a (vw && dim)-context or in a non-standard dimorder-context (enumerating all positive subscripts is not simply minindex..maxindex)
maxindex <- as.integer(prod(colSums(vw)))
}
}
r <- list(
x = x
, ix = ix
, re = re
, minindex = minindex
, maxindex = maxindex
, length = n
, dim = dim
, dimorder = dimorder
, symmetric = symmetric
, fixdiag = fixdiag
, vw = vw
, NAs = NAs
)
class(r) <- "hi"
r
}
# xx TODO as.hi.which should be leading, not as.hi.integer
# and as.hi.integer is too complicated, remove the vw stuff
as.hi.which <- function(x, ...){
ret <- as.hi.integer(unclass(x), ...)
ret$maxindex <- maxindex(x)
ret
}
if (FALSE){
dim <- 3:4
dimorder <- 1:2
vw <- rbind(c(1,1), dim, c(1,1))
i <- seq_len(prod(dim))
m <- vectorIndex2arrayIndex(i, dim=dim)
p <- arrayIndex2vectorIndex(m, dim=dim, dimorder=dimorder, vw=vw)
m
vectorIndex2arrayIndex(p, dim=dim, dimorder=dimorder, vw=vw)
h <- as.hi(m, dim=dim, dimorder=dimorder, vw=vw)
str(h)
p
as.integer(h)
i
}
as.hi.matrix <- function(x, dim, dimorder=NULL, symmetric=FALSE, fixdiag=NULL, vw=NULL, pack=TRUE
, ... # dummy to keep R CMD check quiet
){
if (is.null(vw)){
maxindex <- as.integer(prod(dim))
}else{
maxindex <- as.integer(prod(colSums(vw)))
}
if (nrow(x)){
if (x[1]<0)
stop("matrix subscripts must be positive")
if (symmetric){
i <- symmIndex2vectorIndex(x, dim=dim, fixdiag=fixdiag)
if (is.null(fixdiag)){
ret <- as.hi.integer(i, maxindex=maxindex, dim=dim, symmetric=symmetric, fixdiag=fixdiag, vw=vw, pack=pack)
}else{
isna <- is.na(i)
NAs <- (seq_along(i))[isna]
if (length(NAs))
ret <- as.hi.integer(i[!isna], maxindex=maxindex, dim=dim, symmetric=symmetric, fixdiag=fixdiag, vw=vw, pack=pack, NAs=rlepack(NAs))
else
ret <- as.hi.integer(i, maxindex=maxindex, dim=dim, symmetric=symmetric, fixdiag=fixdiag, vw=vw, pack=pack)
}
}else{
ret <- as.hi.integer(
arrayIndex2vectorIndex(x, dim=dim, dimorder=dimorder, vw=vw)
, maxindex=maxindex
, dim=dim
, dimorder=dimorder
, symmetric=symmetric
, fixdiag=fixdiag
, vw=vw
, vw.convert=FALSE
, dimorder.convert=FALSE
, pack=pack
)
}
}else{
ret <- as.hi.integer(integer(), maxindex=maxindex, dim=dim, dimorder=dimorder, symmetric=symmetric, fixdiag=fixdiag, vw=vw, pack=pack)
}
ret
}
as.hi.logical <- function(
x
, maxindex = NA
, dim = NULL
, vw = NULL
, pack = TRUE
, ... # dummy to keep R CMD check quiet
){
if(is.null(dim)){
if (is.na(maxindex))
maxindex <- length(x)
else
maxindex <- as.integer(maxindex)
}else{
maxindex <- as.integer(prod(dim))
}
if (length(x)>maxindex)
stop("as.hi.logical longer than maxindex")
if (maxindex>0){
x <- seq_len(maxindex)[rep(x, length=maxindex)]
}else{
x <- integer()
}
return(as.hi.integer(
x
, maxindex = maxindex # if !is.null(vw) maxindex is wrong, thus we rely on as.hi.integer ignoring maxindex in this case !!
, dim = dim
, vw = vw
, pack = pack
))
}
as.hi.double <- function(x, ...){
#silent as usually, thus not: warning("converting doubles to integer in as.hi")
as.hi.integer(as.integer(x), ...)
}
# used for character subsetting
# needs names:
as.hi.character <- function(x
, names # either character vector or some object of class that has "[" defined and returns integer positions (such as a named integer vector or class index)
, vw = NULL
, vw.convert=TRUE # if names refers to the vw-window, the default vw.convert=TRUE is fine, if names refers to the total object, set vw.convert = FALSE
, ...
){
#if (inherits(names, "fffc"))
# as.hi.integer(match.fffc(x, names), vw=vw, vw.convert=vw.convert, ...)
#else
if (is.atomic(names) && is.character(names))
as.hi.integer(match(x, names), vw=vw, vw.convert=vw.convert, ...)
else
as.hi.integer(names[x], vw=vw, vw.convert=vw.convert, ...)
}
# -- reverting hi to original (as far as possible) -----------------------------------------------------------
#! \name{as.integer.hi}
#! \alias{as.which.hi}
#! \alias{as.bitwhich.hi}
#! \alias{as.bit.hi}
#! \alias{as.integer.hi}
#! \alias{as.logical.hi}
#! \alias{as.character.hi}
#! \alias{as.matrix.hi}
#! \title{ Hybrid Index, coercing from }
#! \description{
#! Functions that (back-)convert an \code{\link{hi}} object to the respective subscripting information.
#! }
#! \usage{
#! \method{as.which}{hi}(x, \dots)
#! \method{as.bitwhich}{hi}(x, \dots)
#! \method{as.bit}{hi}(x, \dots)
#! \method{as.integer}{hi}(x, vw.convert = TRUE, \dots)
#! \method{as.logical}{hi}(x, maxindex = NULL, \dots)
#! \method{as.character}{hi}(x, names, vw.convert = TRUE, \dots)
#! \method{as.matrix}{hi}(x, dim = x$dim, dimorder = x$dimorder
#! , vw = x$vw, symmetric = x$symmetric, fixdiag = x$fixdiag, \dots)
#! }
#! \arguments{
#! \item{x}{ an object of class \code{\link{hi}} }
#! \item{maxindex}{ the \code{\link{length}} of the subscripted object (needed for logical output) }
#! \item{names}{ the \code{\link{names}} vector of the subscripted object }
#! \item{dim}{ the \code{\link{dim}} of the subscripted object }
#! \item{dimorder}{ the \code{\link{dimorder}} of the subscripted object }
#! \item{vw}{ the virtual window \code{\link{vw}} of the subscripted object }
#! \item{vw.convert}{ \code{vw.convert} }
#! \item{symmetric}{ TRUE if the subscripted matrix is \code{\link{symmetric}} }
#! \item{fixdiag}{ TRUE if the subscripted matrix has \code{\link{fixdiag}} }
#! \item{\dots}{ further arguments passed }
#! }
#! \value{
#! \command{as.integer.hi} returns an integer vector, see \code{\link{as.hi.integer}}.
#! \command{as.logical.hi} returns an logical vector, see \code{\link{as.hi.logical}}.
#! \command{as.character.hi} returns a character vector, see \code{\link{as.hi.character}}.
#! \command{as.matrix.hi} returns a matrix index, see \code{\link{as.hi.matrix}}.
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{hi}}, \code{\link{as.hi}} }
#! \examples{
#! x <- 1:6
#! names(x) <- letters[1:6]
#! as.integer(as.hi(c(1:3)))
#! as.logical(as.hi(c(TRUE,TRUE,TRUE,FALSE,FALSE,FALSE)))
#! as.character(as.hi(letters[1:3], names=names(x)), names=names(x))
#! x <- matrix(1:12, 6)
#! as.matrix(as.hi(rbind(c(1,1), c(1,2), c(2,1)), dim=dim(x)), dim=dim(x))
#! }
#! \keyword{ IO }
#! \keyword{ data }
# note that the result may be changed for negative subscripts (sorted ascending, duplicates removed)
# original order of subscripts NEED NOT be restored and MUST NOT be restored in order to not create long hi$ix component
as.integer.hi <- function(
x
, vw.convert=TRUE # set to FALSE when called from as.matrix.hi in order to avoid double conversion
, ... # dummy to keep R CMD check quiet
){
if (x$length){
ret <- unsort.hi(rleunpack(x$x), x)
if (is.null(x$dim)){
if (!is.null(x$vw) && vw.convert){
if (ret[1]<0){
ret <- ret + x$vw[1]
}else{
ret <- ret - x$vw[1]
}
}
}else{
if (!is.null(x$vw) && vw.convert){
# we know that subscripts must be positive in this case
ret <- arrayIndex2vectorIndex(vectorIndex2arrayIndex(ret, dimorder=x$dimorder, vw=x$vw), dim=x$vw[2,])
}else{
if (!dimorderStandard(x$dimorder))
ret <- arrayIndex2vectorIndex(vectorIndex2arrayIndex(ret, dim=x$dim, dimorder=x$dimorder), dim=x$dim)
}
}
}else{
ret <- integer()
}
ret
}
as.which.hi <- function(x, ...){
i <- as.integer(x, ...)
if (length(i) && i[[1]]<0){
i <- seq_len(maxindex(x))[i]
setattributes(i, list(maxindex = maxindex(x), class = c("booltype", "which")))
}else{
attributes(i) <- list(maxindex = maxindex(x), class = c("booltype", "which"))
}
i
}
as.matrix.hi <- function(
x
, dim = x$dim
, dimorder = x$dimorder
, vw = x$vw
, symmetric = x$symmetric
, fixdiag = x$fixdiag
, ... # dummy to keep R CMD check quiet
){
if (x$length){
if (is.null(dim))
stop("need dim to return matrix subscripts")
if (x$x$first<0)
stop("matrix subscripts must be positive")
if (symmetric){
if (is.null(fixdiag)){
stop("not yet implemented for symmetric matices with fixdiag")
}else{
stop("not yet implemented for symmetric matices without fixdiag (redundant diagonal)")
}
}else{
ret <- unsort.hi(rleunpack(x$x), x)
ret <- vectorIndex2arrayIndex(ret, dim=dim, dimorder=dimorder, vw=vw)
}
ret
}else{
matrix(integer(), 0, length(x$dim))
}
}
# note that result may be longer due to recycling
as.logical.hi <- function(
x
, maxindex=NULL
, ... # dummy to keep R CMD check quiet
){
if (is.null(maxindex))
maxindex <- maxindex(x)
if (is.na(maxindex))
stop("can't make logical without knowing vector length")
ret <- rep(FALSE, maxindex)
ret[seq_len(maxindex)[as.integer.hi(x)]] <- TRUE
ret
}
as.character.hi <- function(
x
, names
, vw.convert=TRUE # set to FALSE if the names do not correspond to window but to total object
, ... # dummy to keep R CMD check quiet
){
names[as.integer.hi(x, vw.convert=vw.convert)]
}
# -- querying the 'length' of a hi object ----------------------------------------------
#! \name{length.hi}
#! \alias{length.hi}
#! \alias{poslength.hi}
#! \alias{maxindex.hi}
#! \title{ Hybrid Index, querying }
#! \description{
#! Functions to query some index attributes
#! }
#! \usage{
#! \method{length}{hi}(x)
#! \method{maxindex}{hi}(x, \dots)
#! \method{poslength}{hi}(x, \dots)
#! }
#! \arguments{
#! \item{x}{ an object of class \code{\link{hi}} }
#! \item{\dots}{ further arguments (not used) }
#! }
#! \details{
#! \command{length.hi} returns the number of the subsript elements in the index (even if they are negative).
#! By contrast \code{\link[bit:maxindex.default]{poslength}} returns the number of selected elements (which for negative indices is \code{maxindex(x) - length(unique(x))}).
#! \code{\link[bit:maxindex.default]{maxindex}} returns the highest possible index position.
#! }
#! \value{
#! an integer scalar
#! }
#! \author{ Jens Oehlschlägel }
#! \note{ duplicated negative indices are removed }
#! \seealso{ \code{\link{hi}}, \code{\link{as.hi}}, \code{\link{length.ff}}, \code{\link{length}}, \code{\link[bit:maxindex.default]{poslength}}, \code{\link[bit:maxindex.default]{maxindex}} }
#! \examples{
#! length(as.hi(-1, maxindex=12))
#! poslength(as.hi(-1, maxindex=12))
#! maxindex(as.hi(-1, maxindex=12))
#! message("note that")
#! length(as.hi(c(-1, -1), maxindex=12))
#! length(as.hi(c(1,1), maxindex=12))
#! }
#! \keyword{ IO }
#! \keyword{ data }
# be aware that this is is the length of the pos/neg integer index, not necessarily the length of an original logical (no FALSEs, recycled) or negative (no duplicates)
length.hi <- function(x){
x$length
}
# this give the length of the object to be subscripted (if known)
# must always be known after as.hi.logical
maxindex.hi <- function(
x
, ... # dummy to keep R CMD check quiet
)
{
if (is.null(x$vw))
x$maxindex
else{
if (is.null(x$dim))
x$vw[2]
else
as.integer(prod(x$vw[2,]))
}
}
poslength.hi <- function(
x
, ... # dummy to keep R CMD check quiet
){
if (is.na(x$x$first))
0L
else if (x$x$first<0){
if (is.na(x$maxindex))
stop("poslength.hi requires maxindex")
maxindex.hi(x) - x$length
}else
x$length
}
#! \name{unsort}
#! \alias{unsort}
#! \alias{unsort.hi}
#! \alias{unsort.ahi}
#! \alias{subscript2integer}
#! \title{ Hybrid Index, internal utilities }
#! \description{
#! Non-documented internal utilities that might change
#! }
#! \usage{
#! unsort(x, ix)
#! unsort.hi(x, index)
#! unsort.ahi(x, index, ixre = any(sapply(index, function(i) {
#! if (is.null(i$ix)) {
#! if (i$re) TRUE else FALSE
#! } else {
#! TRUE
#! }
#! })), ix = lapply(index, function(i) {
#! if (is.null(i$ix)) {
#! if (i$re)
#! orig <- rev(seq_len(poslength(i)))
#! else orig <- seq_len(poslength(i))
#! }
#! else {
#! orig <- i$ix
#! }
#! orig
#! }))
#! subscript2integer(x, maxindex = NULL, names = NULL)
#! }
#! \arguments{
#! \item{x}{ \code{x} }
#! \item{ix}{ \code{ix} }
#! \item{ixre}{ \code{ixre} }
#! \item{index}{ \code{index} }
#! \item{maxindex}{ \code{maxindex} }
#! \item{names}{ \code{names} }
#! }
#! \details{
#! These are utility functions for restoring original order after sorting.
#! For now we 'mimic' the intuitive but wrong argument order of match()
#! which should rather have the 'table' argument as its first argument,
#! then one could properly method-dispatch on the type of table.
#! xx We might change to proper 'unsort' generic, but then we have to change argument order.
#! }
#! \value{
#! undefined
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{hi}}, \code{\link{as.hi}} }
#! \keyword{ IO }
#! \keyword{ data }
# not actually used
unsort <- function(
x # sorted values
, ix # needed to restore orig order
){
orig <- vector(mode=storage.mode(x), length=length(x))
orig[ix] <- x
orig
}
unsort.hi <- function(
x # a vector of values in hi sorting
, index # a hi object
){
if (is.null(index$ix)){
if (index$re)
orig <- rev(x)
else
orig <- x
}else{
orig <- vector(mode=storage.mode(x), length=length(x))
orig[index$ix] <- x
}
orig
}
unsort.ahi <- function(
x # an m-array of values in multi-hi sorting
, index # a list of m hi indices
, ixre = any(sapply(index, function(i){
if (is.null(i$ix)){
if (i$re)
TRUE
else
FALSE
}else{
TRUE
}
}))
, ix = lapply(index, function(i){
if (is.null(i$ix)){
if (i$re)
orig <- rev(seq_len(poslength(i)))
else
orig <- seq_len(poslength(i))
}else{
orig <- i$ix
}
orig
})
){
if (ixre){
x <- do.call("[<-", c(list(x=x), ix, list(value=x)))
}
x
}
# used in subset.ff_dist
# does not handle matrix subscripts
subscript2integer <- function(
x
, maxindex=NULL
, names=NULL
){
if(any(is.na(x)))
stop("NAs not allowed in ff subscripting")
if (is.character(x)){
if (is.null(names))
stop("need names")
match(x, names)
}else if(is.logical(x)){
if (is.null(maxindex))
stop("need maxindex with logical subscripts")
seq_len(maxindex)[x]
}else{
if (is.double(x))
x <- as.integer(x)
tab <- tabulate(sign(x)+2, 3)
if (tab[[2]])
stop("no zeros allowed in ff subscripts")
if (tab[[1]] && tab[[2]])
stop("mixing negative and positive subscripts is not alllowed")
if (tab[[1]]){
if (is.null(maxindex))
stop("need maxindex with negative subscripts")
seq_len(maxindex)[x]
}else{
x
}
}
}
# Example
if (FALSE){
a <- seq(100, 200, 20)
as.hi(substitute(c(1:5, 4:9, a)))
hi(c(1,4, 100),c(5,9, 200), by=c(1,1,20))
as.hi(c(1:5, 4:9, a))
x <- c(1:5, 4:9, a)
as.hi(x)
as.hi(substitute(x))
as.integer(as.hi(x))
as.logical(as.hi(x))
as.logical(as.hi(x, maxindex=200))
length(as.hi.integer(x))
maxindex(as.hi(x))
poslength(as.hi(x, maxindex=200))
library(regtest)
# parsing has some overhead ...
timefactor(as.hi(substitute(c(1:4, 5:9, a))), hi(c(1,5,100),c(4,9, 200), by=c(1,1,20)), 1000, 1000)
# ... but with long sequences
timefactor(as.hi(substitute(c(1:4, a, 500:999999))), as.hi(c(1:4, a, 500:999999)), 100, 1)
s1 <- hi(c(1,4, 200),c(5,9, 100), by=c(1,1,-20))
s2 <- as.hi(substitute(c(1:5, 4:9, a)))
s3 <- as.hi(c(1:5, 4:9, a))
identical(s1, s2)
identical(s3, s2)
identical(as.integer(c(1:5, 4:9, a)), as.integer(s1))
identical(as.integer(c(1:5, 4:9, a)), as.integer(s2))
identical(as.integer(c(1:5, 4:9, a)), as.integer(s3))
library(ff)
n <- 10000000
a <- ff(0L, length=n)
#i <- c(1,n:2)
#i <- sample(n)
#save(i, file="c:/tmp/i.RData")
load(file="c:/tmp/i.RData")
memory.size(max=T)
j <- rlepack(i) # x10
memory.size(max=T)
debug(as.hi.integer)
j <- as.hi.integer(i)
memory.size(max=T)
system.time(j <- as.hi(quote(i)))
x <- 20:29
as.hi(quote((c(1, 3:10, x))))
# C-coded with 33% trick
load(file="c:/tmp/i.RData")
memory.size(max=T)
gc()
system.time(j <- intrle(i))
memory.size(max=T)
# minus structure() = 7x RAM incl. input/output
load(file="c:/tmp/i.RData")
rle <-
function (x)
{
if (!is.vector(x) && !is.list(x))
stop("'x' must be an atomic vector")
n <- length(x)
if (n == 0)
return(list(lengths = integer(0), values = x))
y <- x[-1] != x[-n]
i <- c(which(y | is.na(y)), n)
ret <- list(lengths = diff(c(0L, i)), values = x[i])
class(ret) = "rle"
ret
}
gc()
j <- rle(i)
memory.size(max=T)
# original = 9x RAM incl. input/output
load(file="c:/tmp/i.RData")
gc()
j <- rle(i)
memory.size(max=T)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.