ff: Memory-Efficient Storage of Large Data on Disk and Fast Access Functions

Documented in add.default add.ff clone.ff close.ff close.ff_pointer delete.default delete.ff delete.ff_pointer deleteIfOpen.ff deleteIfOpen.ff_pointer dim.ff dimnames.ff_array dimorder.default dimorder.ff_array ff ffsymmxtensions ffxtensions filename.default filename.ffdf filename.ff_pointer finalize.ff finalize.ffdf finalize.ff_pointer finalizer.ff fixdiag.default fixdiag.dist fixdiag.ff geterror.ff geterrstr.ff get.ff getset.ff is.factor.ff is.ff is.open.ff is.open.ff_pointer is.ordered.ff is.readonly.ff is.sorted.default length.ff levels.ff maxlength.default maxlength.ff na.count.default na.count.ff names.ff open.ff pagesize.ff pattern.ff physical.ff print.ff print.ff_matrix print.ff_vector ramattribs.default ramattribs.ff ramclass.default ramclass.ff read.ff readwrite.ff set.ff str.ff swap.default swap.ff swap.ff_array symmetric.default symmetric.dist symmetric.ff update.ff virtual.ff vw.default vw.ff write.ff

# R layer of ff
# (c) 2007 Jens Oehlschägel
# Licence: GPL2
# Provided 'as is', use at your own risk
# Created: 2007-09-03
# Last changed: 2007-10-25

# source("c:/mwp/eanalysis/ff/R/ff.R")
# package.skeleton("fftest", path="c:/tmp", list=ls("package:ff"))


if(getRversion() < "2.11.0")
    .POSIXct <- function(xx, tz = NULL)
    structure(xx, class = c("POSIXt", "POSIXct"), tzone = tz)


# --- ff info -----------------------------------------------------------

#! \name{ffxtensions}
#! \alias{ffxtensions}
#! \alias{ffsymmxtensions}
#! \title{ Test for availability of ff extensions }
#! \description{
#!   checks if this version of package ff supports ff extensions.
#! }
#! \usage{
#!  ffxtensions()
#!  ffsymmxtensions()
#! }
#! \value{
#!   logical scalar
#! }
#! \details{
#!   ff extensions are needed for certain bitcompressed vmodes and ff symm extensions for symmetric matrices.
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{  \code{\link{vmode}}%, \code{\link{symm}}
#! }
#! \examples{
#!   ffxtensions()
#!   ffsymmxtensions()
#! }
#! \keyword{ IO }
#! \keyword{ data }

ffxtensions <- function()
  .Call(C_ffxtensions, PACKAGE="ff")

ffsymmxtensions <- function()
  .Call(C_ffsymmxtensions, PACKAGE="ff")


#vector of those physical ff attributes that are kept in the ram version
ramphysical_includes <- c("pattern","filename","pagesize","caching","finalizer","finonexit")
#vector of those virtual ff attributes that are kept in the ram version
ramvirtual_includes <- c("Dimorder")


# vector of attributes that are not handled by the ramattribs mechanism
ramattribs_excludes <- c(
  "length"
, "dim"
, "dimnames"
, "names"
, "levels"
, "class"
, "vmode"
, "physical"
, "virtual"
)

# vector of classes that are not handled by the ramclass mechanism
ramclass_excludes <- c(
  "matrix"
, "array"
, "symm"
, "logical"
, "integer"
, "numeric"
)

caching_schemes <- c("mmnoflush","mmeachflush")

#! \name{is.ff}
#! \alias{is.ff}
#! \title{ Test for class ff }
#! \description{
#!   checks if x inherits from class "ff"
#! }
#! \usage{
#! is.ff(x)
#! }
#! \arguments{
#!   \item{x}{ any object }
#! }
#! \value{
#!   logical scalar
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{  \code{\link{inherits}}, \code{\link{as.ff}}, \code{\link{is.ffdf}} }
#! \examples{
#!   is.ff(integer())
#! }
#! \keyword{ IO }
#! \keyword{ data }

is.ff <- function(x)
{
  inherits(x,"ff")
}


#! \name{geterror.ff}
#! \alias{geterror.ff}
#! \alias{geterrstr.ff}
#! \title{ Get error and error string }
#! \description{
#!   Get last error code and error string that occured on an ff object.
#! }
#! \usage{
#! geterror.ff(x)
#! geterrstr.ff(x)
#! }
#! \arguments{
#!   \item{x}{ an ff object }
#! }
#! \value{
#!   \command{geterror.ff} returns an error integer code (no error = 0) and \command{geterrstr.ff} returns the error message (no error = "no error").
#! }
#! \author{ Jens Oehlschlägel, Daniel Adler (C++ back-end) }
#! \seealso{  \code{\link{ff}} }
#! \examples{
#!   x <- ff(1:12)
#!   geterror.ff(x)
#!   geterrstr.ff(x)
#!   rm(x); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }

geterror.ff <- function(x)
{
  .Call(C_geterror, attr(x, "physical"), PACKAGE="ff")
}
geterrstr.ff <- function(x)
{
  .Call(C_geterrstr, attr(x, "physical"), PACKAGE="ff")
}


#! \name{filename}
#! \alias{filename}
#! \alias{filename.default}
#! \alias{filename.ff_pointer}
#! \alias{filename.ffdf}
#! \alias{filename<-}
#! \alias{filename<-.ff}
#! \alias{pattern}
#! \alias{pattern<-}
#! \alias{pattern.ff}
#! \alias{pattern<-.ff}
#! \alias{pattern<-.ffdf}
#! \title{ Get or set filename }
#! \description{
#!   Get or set filename from ram or \code{\link{ff}} object via the \code{filename} and \code{filename<-} generics
#!   or rename all files behind a \code{\link{ffdf}} using the \code{pattern<-} generic.
#! }
#! \usage{
#! filename(x, \dots)
#! filename(x, \dots) <- value
#! \method{filename}{default}(x, \dots)
#! \method{filename}{ff_pointer}(x, \dots)
#! \method{filename}{ffdf}(x, \dots)
#! \method{filename}{ff}(x, \dots) <- value
#! pattern(x, \dots)
#! pattern(x, \dots) <- value
#! \method{pattern}{ff}(x, \dots)
#! \method{pattern}{ff}(x, \dots) <- value
#! \method{pattern}{ffdf}(x, \dots) <- value
#! }
#! \arguments{
#!   \item{x}{ a ram or ff  object, or for pattern assignment only - a ffdf object }
#!   \item{value}{ a new filename }
#!   \item{\dots}{ dummy to keep R CMD CHECK quiet }
#! }
#! \value{
#!   \code{filename} and \code{pattern} return a character filename or pattern.
#!   For \code{\link{ffdf}} returns a list with one filename element for each \code{\link[=Extract.ffdf]{physical}} component.
#!   The assignment functions return the changed object, which will keep the change even without re-assigning the return-value
#! }
#! \details{
#!   Assigning a \code{filename<-} means renaming the corresponding file on disk - even for ram objects. If that fails, the assignment fails.
#!   If a file is moved in or out of \code{getOption("fftempdir")} the \code{\link{finalizer}}  is changed accordingly to 'delete' in \code{getOption("fftempdir")} and 'close' otherwise.
#!   \cr
#!   A \code{pattern} is an incomplete filename (optional path and optional filename-prefix) that is turned to filenames by
#!   adding a random string using and optionally an extension from optionally an extension from \code{getOption("ffextension")} (see \code{\link{fftempfile}}).
#!   \code{filename<-} exhibits R's standard behaviour of considering "filename" and "./filename" both to be located in \code{\link{getwd}}.
#!   By constrast \code{pattern<-} will create "filename" without path in \code{getOption("fftempdir")} and only "./filename" in \code{\link{getwd}}.
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{  \code{\link{fftempfile}}, \code{\link{finalizer}}, \code{\link{ff}}, \code{\link{as.ff}}, \code{\link{as.ram}}, \code{\link{update.ff}},  \code{\link{file.move}}}
#! \examples{
#!   \dontrun{
#!   message("Neither giving pattern nor filename gives a random filename 
#! with extension ffextension in fftempdir")
#!   x <- ff(1:12)
#!   finalizer(x)
#!   filename(x)
#!   message("Giving a pattern with just a prefix moves to a random filename 
#! beginning with the prefix in fftempdir")
#!   pattern(x) <- "myprefix_"
#!   filename(x)
#!   message("Giving a pattern with a path and prefix moves to a random filename 
#! beginning with prefix in path (use . for getwd) ")
#!   pattern(x) <- "./myprefix"
#!   filename(x)
#!   message("Giving a filename moves to exactly this filename and extension 
#! in the R-expected place) ")
#!   if (!file.exists("./myfilename.myextension")){
#!     filename(x) <- "./myfilename.myextension"
#!     filename(x)
#!   }
#!
#!   message("NOTE that the finalizer has changed from 'delete' to 'close': 
#! now WE are responsible for deleting the file - NOT the finalizer")
#!   finalizer(x)
#!   delete(x)
#!   rm(x)
#!   }
#! }
#! \keyword{ IO }
#! \keyword{ data }

filename.ff_pointer <- function(x
, ... # dummy to keep R CMD check quiet
)
  attr(x, "filename")

filename.default <- function(x
, ... # dummy to keep R CMD check quiet
)
  attr(attr(x, "physical"), "filename")

"filename<-.ff" <- function(x
, ... # dummy to keep R CMD check quiet
, value
){
  isopen <- is.open(x)
  tmpdirnam <- getOption("fftempdir")

  oldnam <- filename(x)
  olddirnam <- dirname(oldnam)

  splitted <- splitPathFile(value)
  if (splitted$path=="."){
    splitted$path <- getwd()
    value <- unsplitPathFile(splitted)
  }else if(splitted$path=="" && splitted$fsep==""){
    splitted$path <- tmpdirnam
    value <- unsplitPathFile(splitted)
  }else{
    # convert to absolute path
    cwd <- getwd()
    on.exit(setwd(cwd))
    dfile <- dirname(value)
    bfile <- basename(value)
    setwd(dfile)
    dfile <- getwd()
    value <- file.path(dfile, bfile)
    # fix problem in file.path
    value <- gsub("/+","/",value)
  }

  if (olddirnam==tmpdirnam){
    if (splitted$path!=tmpdirnam){
      if (isopen)
        attr(attr(x,"physical"),"finalizer") <- "close"
      else
        attr(attr(x,"physical"),"finalizer") <- NULL
    }
  }else{
    if (splitted$path==tmpdirnam)
      finalizer(x) <- "delete"
  }

  if (isopen){
    close(x)
    on.exit(open(x), add=TRUE)
  }

  if(file.move(oldnam, value))
    physical(x)$filename <- value
  else
    stop("changing ff filename from '", oldnam, "' to '", value, "' failed")
  x
}


pattern.ff <- function(x, ...){
  attr(attr(x,"physical"),"pattern")
}

"pattern<-.ff" <- function(x, ...,value){
  filename <- fftempfile(value)
  filename(x) <- filename
  x
}


filename.ffdf <- function(x, ...)
  lapply(physical(x), filename)


"pattern<-.ffdf" <- function(x, ..., value){
  for (i in seq_len(ncol(x)))
    pattern(x[[i]]) <- value
  x
}


#! \name{is.readonly}
#! \alias{is.readonly}
#! \alias{is.readonly.ff}
#! \title{ Get readonly status }
#! \description{
#!   Get readonly status of an ff object
#! }
#! \usage{
#! is.readonly(x, \dots)
#! \method{is.readonly}{ff}(x, \dots)
#! }
#! \arguments{
#!   \item{x}{ \code{x} }
#!   \item{\dots}{ \code{\dots} }
#! }
#! \details{
#!   ff objects can be created/opened with \code{readonly=TRUE}.
#!   After each opening of the ff file readonly status is stored in the \code{\link[=Extract.ff]{physical}} attributes and serves as the default for the next opening.
#!   Thus querying a closed ff object gives the last readonly status.
#! }
#! \value{
#!   logical scalar
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{open.ff}}, \code{\link[=Extract.ff]{physical}} }
#! \examples{
#!   x <- ff(1:12)
#!   is.readonly(x)
#!   close(x)
#!   open(x, readonly=TRUE)
#!   is.readonly(x)
#!   close(x)
#!   is.readonly(x)
#!   rm(x)
#! }
#! \keyword{ IO }
#! \keyword{ data }


is.readonly.ff <- function(x
, ... # dummy to keep R CMD check quiet
){
  attr(attr(x, "physical"), "readonly")
}


#! \name{is.open}
#! \alias{is.open}
#! \alias{is.open.ff}
#! \alias{is.open.ffdf}
#! \alias{is.open.ff_pointer}
#! \title{ Test if object is opened }
#! \description{
#!   Test whether an ff or ffdf object or a \code{ff_pointer} is opened.
#! }
#! \usage{
#! is.open(x, \dots)
#! \method{is.open}{ff}(x, \dots)
#! \method{is.open}{ffdf}(x, \dots)
#! \method{is.open}{ff_pointer}(x, \dots)
#! }
#! \arguments{
#!   \item{x}{ an \code{\link{ff}} or \code{\link{ffdf}} object }
#!   \item{\dots}{ further arguments (not used) }
#! }
#! \details{
#!   ff objects open automatically if accessed while closed.
#!   For ffdf objects we test all of their \code{\link[=Extract.ffdf]{physical}} components including their \code{\link[=row.names.ffdf]{row.names}} if they are \code{\link{is.ff}}
#! }
#! \value{
#!   TRUE or FALSE (or NA if not all components of an ffdf object are opened or closed)
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{is.readonly}}, \code{\link{open.ff}}, \code{\link{close.ff}} }
#! \examples{
#!   x <- ff(1:12)
#!   is.open(x)
#!   close(x)
#!   is.open(x)
#!   rm(x); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }

is.open.ff_pointer <- function(x
, ... # dummy to keep R CMD check quiet
){
  .Call(C_is_open, x, PACKAGE="ff")
}

is.open.ff <- function(x
, ... # dummy to keep R CMD check quiet
){
  .Call(C_is_open, attr(x, "physical"), PACKAGE="ff")
}


#! \name{pagesize}
#! \alias{pagesize}
#! \alias{pagesize.ff}
#! \title{ Pagesize of ff object }
#! \description{
#!   Returns current pagesize of ff object
#! }
#! \usage{
#! pagesize(x, \dots)
#! \method{pagesize}{ff}(x, \dots)
#! }
#! \arguments{
#!   \item{x}{ an \code{\link{ff}} object }
#!   \item{\dots}{ further arguments (not used) }
#! }
#! \value{
#!   integer number of bytes
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{getpagesize}} }
#! \examples{
#!   x <- ff(1:12)
#!   pagesize(x)
#! }
#! \keyword{ IO }
#! \keyword{ data }



pagesize.ff <- function(x, ...){
  attr(attr(x, "physical"), "pagesize")
}


#! \name{maxlength}
#! \alias{maxlength}
#! \alias{maxlength.ff}
#! \alias{maxlength.default}
#! \title{ Get physical length of an ff or ram object }
#! \description{
#!   \command{maxlength} returns the physical length of an ff or ram object
#! }
#! \usage{
#! maxlength(x, \dots)
#! \method{maxlength}{ff}(x, \dots)
#! \method{maxlength}{default}(x, \dots)
#! }
#! \arguments{
#!   \item{x}{ ff or ram object }
#!   \item{\dots}{ additional arguments (not used) }
#! }
#! \value{
#!   integer scalar
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{length.ff}}, \code{\link[=Extract.ff]{maxindex}} }
#! \examples{
#!   x <- ff(1:12)
#!   length(x) <- 10
#!   length(x)
#!   maxlength(x)
#!   x
#!   rm(x); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }




# since we have separated length from maxlength, we might allow length to be reduced (while maxlength remains the same)
maxlength.ff  <- function(x
, ... # dummy to keep R CMD check quiet
)
{
  #we no longer call .Call(C_maxlength, x, PACKAGE="ff")
  attr(attr(x, "physical"), "maxlength")
}
maxlength.default <- function(x
, ... # dummy to keep R CMD check quiet
){
  length(x)
}


#! \name{symmetric}
#! \alias{symmetric}
#! \alias{symmetric.ff}
#! \alias{symmetric.default}
#! \alias{symmetric.dist}
#! \title{ Test for symmetric structure }
#! \description{
#!   Check if an object is inherently symmetric (its structure, not its data)
#! }
#! \usage{
#! symmetric(x, \dots)
#! \method{symmetric}{ff}(x, \dots)
#! \method{symmetric}{default}(x, \dots)
#! \method{symmetric}{dist}(x, \dots)
#! }
#! \arguments{
#!   \item{x}{ an ff or ram object }
#!   \item{\dots}{ further arguments (not used) }
#! }
#! \details{
#!   ff matrices can be declared symmetric at creation time. Compatibility function \command{symmetric.default} returns FALSE, \command{symmetric.dist} returns TRUE.
#! }
#! \value{
#!   TRUE or FALSE
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{symmetric}}, \code{\link{ff}} %, \code{\link{symm}}
#!         , \code{\link{dist}}, \code{\link{isSymmetric}} }
#! \examples{
#!   symmetric(matrix(1:16, 4, 4))
#!   symmetric(dist(rnorm(1:4)))
#! }
#! \keyword{ IO }
#! \keyword{ data }

symmetric.ff <- function(x
, ... # dummy to keep R CMD check quiet
)
{
  attr(attr(x, "virtual"),"Symmetric")
}
symmetric.default <- function(x
, ... # dummy to keep R CMD check quiet
)FALSE
symmetric.dist <- function(x
, ... # dummy to keep R CMD check quiet
)TRUE



#! \name{fixdiag}
#! \alias{fixdiag}
#! \alias{fixdiag<-}
#! \alias{fixdiag.ff}
#! \alias{fixdiag.default}
#! \alias{fixdiag.dist}
#! \title{ Test for fixed diagonal }
#! \description{
#!   Check if an object has fixed diagonal
#! }
#! \usage{
#! fixdiag(x, \dots)
#! fixdiag(x, \dots) <- value
#! \method{fixdiag}{ff}(x, \dots)
#! \method{fixdiag}{default}(x, \dots)
#! \method{fixdiag}{dist}(x, \dots)
#! }
#! \arguments{
#!   \item{x}{ an ff or ram object }
#!   \item{value}{ assignement value }
#!   \item{\dots}{ further arguments (not used) }
#! }
#! \details{
#!   ff symmetric matrices can be declared to have fixed diagonal at creation time. Compatibility function \command{fixdiag.default} returns NULL, \command{fixdiag.dist} returns 0.
#! }
#! \value{
#!   NULL or the scalar representing the fixed diagonal
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{fixdiag}}, \code{\link{ff}} %, \code{\link{symm}}
#!         , \code{\link{dist}} }
#! \examples{
#!   fixdiag(matrix(1:16, 4, 4))
#!   fixdiag(dist(rnorm(1:4)))
#! }
#! \keyword{ IO }
#! \keyword{ data }


fixdiag.ff <- function(x
, ... # dummy to keep R CMD check quiet
)
{
  attr(attr(x, "virtual"),"Fixdiag")
}
fixdiag.default <- function(x
, ... # dummy to keep R CMD check quiet
)
  NULL
fixdiag.dist <- function(x
, ... # dummy to keep R CMD check quiet
)
  0

#! \name{is.sorted}
#! \alias{is.sorted.default}
#! \alias{is.sorted<-.default}
#! \title{ Getting and setting 'is.sorted' physical attribute }
#! \description{
#!   Functions to mark an ff or ram object as 'is.sorted' and query this. Responsibility to maintain this attribute is with the user.
#! }
#! \usage{
#! \method{is.sorted}{default}(x, \dots)
#! \method{is.sorted}{default}(x, \dots) <- value
#! }
#! \arguments{
#!   \item{x}{ an ff or ram object }
#!   \item{\dots}{ ignored }
#!   \item{value}{ NULL (to remove the 'is.sorted' attribute) or TRUE or FALSE }
#! }
#! \details{
#!   Sorting is slow, see \code{\link{sort}}.
#!   Checking whether an object is sorted can avoid unnessary sorting -- see \code{\link{is.unsorted}}, \code{\link[bit:intrle]{intisasc}} -- but still takes too much time with large objects stored on disk.
#!   Thus it makes sense to maintain an attribute, that tells us whether sorting can be skipped.
#!   Note that -- though you change it yourself -- \code{is.sorted} is a \code{\link[=Extract.ff]{physical}} attribute of an object,
#!   because it represents an attribute of the \emph{data}, which is shared between different \code{\link[=Extract.ff]{virtual}} views of the object.
#! }
#! \value{
#!   TRUE (if set to TRUE) or FALSE (if set to NULL or FALSE)
#! }
#! \note{
#!   \command{ff} will set \code{is.sorted(x) <- FALSE} if \code{\link[=clone.ff]{clone}} or \code{\link{length<-.ff}} have increased length.
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{is.ordered.ff}} for testing factor levels, \code{\link{is.unsorted}} for testing the data, \code{\link[bit:intrle]{intisasc}} for a quick version thereof, \code{\link[bit]{na.count}} for yet another \code{\link[=Extract.ff]{physical}} attribute }
#! \examples{
#!   x <- 1:12
#!   is.sorted(x) <- !( is.na(is.unsorted(x)) || is.unsorted(x))
#!   is.sorted(x)
#!   x[1] <- 100L
#!   message("don't forget to maintain once it's no longer TRUE")
#!   is.sorted(x) <- FALSE
#!   message("check whether as 'is.sorted' attribute is maintained")
#!   !is.null(physical(x)$is.sorted)
#!   message("remove the 'is.sorted' attribute")
#!   is.sorted(x) <- NULL
#!   message("NOTE that querying 'is.sorted' still returns FALSE")
#!   is.sorted(x)
#! }
#! \keyword{ IO }
#! \keyword{ data }
#! \keyword{ arith }


# !is.sorted does not imply is.unsorted
is.sorted.default <- function(x, ...)
{
  s <- physical(x)$is.sorted
  if (is.null(s) || !s)
    FALSE
  else
    TRUE
}
"is.sorted<-.default" <- function(x
, ...
, value
)
{
  if (is.null(value))
    physical(x)$is.sorted <- NULL
  else if (is.na(value) || !value)
    physical(x)$is.sorted <- FALSE
  else
    physical(x)$is.sorted <- TRUE
  x
}



#! \name{na.count}
#! \alias{na.count.ff}
#! \alias{na.count.default}
#! \alias{na.count<-.ff}
#! \alias{na.count<-.default}
#! \title{ Getting and setting 'na.count' physical attribute }
#! \description{
#!   The 'na.count' physical attribute gives the current number of NAs if properly initialized and properly maintained, see details.
#! }
#! \usage{
#! \method{na.count}{ff}(x, \dots)
#! \method{na.count}{default}(x, \dots)
#! \method{na.count}{ff}(x, \dots) <- value
#! \method{na.count}{default}(x, \dots) <- value
#! }
#! \arguments{
#!   \item{x}{ an ff or ram object }
#!   \item{\dots}{ further arguments (not used) }
#!   \item{value}{ NULL (to remove the 'na.count' attribute) or TRUE to activate or an integer value }
#! }
#! \details{
#!   The 'na.count' feature is activated by assigning the current number of NAs to \code{na.count(x) <- currentNA} and deactivated by assigning NULL.
#!   The 'na.count' feature is maintained by the, \code{\link{getset.ff}}, \code{\link{readwrite.ff}} and \code{\link{swap}},
#!   other ff methods for writing -- \code{\link{set.ff}}, \code{\link{[[<-.ff}}, \code{\link{write.ff}}, \code{\link[=Extract.ff]{[<-.ff}} -- will stop if 'na.count' is activated.
#!   The functions \command{na.count} and \command{na.count<-} are generic.
#!   For ram objects, the default method for \command{na.count} calculates the number of NAs on the fly, thus no maintenance restrictions apply.
#! }
#! \value{
#!   NA (if set to NULL or NA) or an integer value otherwise
#! }
#! \author{ Jens Oehlschlägel, Daniel Adler (C++ back-end) }
#! \seealso{ \code{\link{getset.ff}}, \code{\link{readwrite.ff}} and \code{\link{swap}} for methods that support maintenance of 'na.count', \code{\link{NA}}, \code{\link[bit:Metadata]{is.sorted}} for yet another \code{\link[=Extract.ff]{physical}} attribute }
#! \examples{
#!   message("--- ff examples ---")
#!   x <- ff(1:12)
#!   na.count(x)
#!   message("activate the 'na.count' physical attribute and set the current na.count manually")
#!   na.count(x) <- 0L
#!   message("add one NA with a method that maintains na.count")
#!   swap(x, NA, 1)
#!   na.count(x)
#!   message("remove the 'na.count' physical attribute (and stop automatic maintenance)")
#!   na.count(x) <- NULL
#!   message("activate the 'na.count' physical attribute and have ff automatically 
#! calculate the current na.count")
#!   na.count(x) <- TRUE
#!   na.count(x)
#!   message("--- ram examples ---")
#!   x <- 1:12
#!   na.count(x)
#!   x[1] <- NA
#!   message("activate the 'na.count' physical attribute and have R automatically 
#! calculate the current na.count")
#!   na.count(x) <- TRUE
#!   na.count(x)
#!   message("remove the 'na.count' physical attribute (and stop automatic maintenance)")
#!   na.count(x) <- NULL
#!   na.count(x)
#!   rm(x); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }

na.count.ff <- function(x
, ... # dummy to keep R CMD check quiet
){
  physical <- physical(x)
  if (!is.null(physical$VW))
    return(NA_integer_)
  nc <- physical$na.count
  if (is.null(nc)){
    NA_integer_
  }else{
    nc
  }
}
"na.count<-.ff" <- function(x
, ...     # dummy to keep R CMD check quiet
, value   # assigning NA deactivates na.count, assigning integer value sets na.count to this value, assigning TRUE calculates na.count and sets it.
){
  if (!is.null(attr(attr(x, "physical"),"VW")))
    stop("you can't set na.count during existence of a virtual window, see ?vw")
  if (is.null(value))
    physical(x)$na.count <- NULL
  else{
    if (length(x)){
    if (is.logical(value)){
      if (value){
        i1 <- i2 <- 0L  # dummy assignment to shut up R CMD CHECK NOTE about no visible binding
        value <- ffvecapply(sum(is.na(x[i1:i2])), X=x, RETURN=TRUE, CFUN="sum")
      }else{
        stop("assign value=NULL (not value=FALSE) to deactivate na.count")
      }
    }
    }else{
      value = 0L
    }
    physical(x)$na.count <- as.integer(value)
  }
  x
}
# for ram objects we always calculate it on-the-fly (when 'activated')
na.count.default <- function(x
, ... # dummy to keep R CMD check quiet
){
  if (is.null(physical(x)$na.count))
    NA_integer_
  else
    as.integer(sum(is.na(x[])))
}
"na.count<-.default" <- function(x
, ... # dummy to keep R CMD check quiet
, value
){
  if (is.null(value))
    physical(x)$na.count <- NULL
  else
    physical(x)$na.count <- TRUE  # == 'activation' of na.count
  x
}


#! \name{physical.ff}
#! \alias{physical.ff}
#! \alias{physical<-.ff}
#! \alias{virtual.ff}
#! \alias{virtual<-.ff}
#! \title{ Getting and setting physical and virtual attributes of ff objects }
#! \description{
#!   Functions for getting and setting physical and virtual attributes of ff objects.
#! }
#! \usage{
#! \method{physical}{ff}(x)
#! \method{virtual}{ff}(x)
#! \method{physical}{ff}(x) <- value
#! \method{virtual}{ff}(x) <- value
#! }
#! \arguments{
#!   \item{x}{ an ff object }
#!   \item{value}{ a list with named elements }
#! }
#! \details{
#!   ff objects have physical and virtual attributes, which have different copying semantics:
#!   physical attributes are shared between copies of ff objects while virtual attributes might differ between copies.
#!   \code{\link{as.ram}} will retain some physical and virtual atrributes in the ram clone,
#!   such that \code{\link{as.ff}} can restore an ff object with the same attributes.
#! }
#! \value{
#!   \command{physical} and \command{virtual} returns a list with named elements
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{
#!  \code{\link[=Extract.ff]{physical.ff}}, \code{\link[=Extract.ff]{physical.ffdf}}, \code{\link{ff}}, \code{\link{as.ram}}; \cr
#!  \code{\link[bit:Metadata]{is.sorted}} and \code{\link[bit]{na.count}} for applications of physical attributes; \cr
#!  \code{\link{levels.ff}} and \code{\link{ramattribs}} for applications of virtual attributes
#! }
#! \examples{
#!   x <- ff(1:12)
#!   x
#!   physical(x)
#!   virtual(x)
#!   y <- as.ram(x)
#!   physical(y)
#!   virtual(y)
#!   rm(x,y); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }
#! \keyword{ attribute }


physical.ff <- function(x){
  p <- attributes(attr(x, "physical"))
  p <- p[is.na(match(names(p), "class"))]
  if (is.na(match("vmode", names(p))))
    c(list(vmode=vmode(x)), p)
  else
    p
}


"physical<-.ff" <- function(x, value){
  attributes(attr(x, "physical")) <- c(value, list(class="ff_pointer"))
  x
}



# -- now virtual attributes follow -- NOTE that the class of the ff object also has copy by value semantics, thus you can have two objects of different classes pointing to the same ff file

virtual.ff <- function(x){
  v <- attributes(attr(x, "virtual"))
  v[is.na(match(names(v), "class"))]
}

"virtual<-.ff" <- function(x, value){
  attributes(attr(x, "virtual")) <- c(value, list(class="virtual"))
  x
}



#! \name{ramattribs}
#! \alias{ramclass}
#! \alias{ramclass.ff}
#! \alias{ramclass.default}
#! \alias{ramclass_excludes}
#! \alias{ramattribs}
#! \alias{ramattribs.ff}
#! \alias{ramattribs.default}
#! \alias{ramattribs_excludes}
#! \title{ Get ramclass and ramattribs }
#! \description{
#!   Functions \command{ramclass} and \command{ramattribs} return the respective virtual attributes, that determine which class (and attributes) an ff object receives when subscripted (or coerced) to ram.
#! }
#! \usage{
#! ramclass(x, \dots)
#! \method{ramclass}{ff}(x, \dots)
#! \method{ramclass}{default}(x, \dots)
#! ramattribs(x, \dots)
#! \method{ramattribs}{ff}(x, \dots)
#! \method{ramattribs}{default}(x, \dots)
#! }
#! \arguments{
#!   \item{x}{ \code{x} }
#!   \item{\dots}{ further arguments (not used) }
#! }
#! \details{
#!  \command{ramclass} and \command{ramattribs} provide a general mechanism to store atomic classes in ff objects,
#!   for example \code{\link{factor}} -- see \code{\link{levels.ff}} -- and \code{\link[base:DateTimeClasses]{POSIXct}}, see the example.
#! }
#! \value{
#!   \command{ramclass} returns a character vector with classnames and \command{ramattribs} returns a list with names elemens just like \code{\link{attributes}}.
#!   The vectors \code{ramclass_excludes} and \code{ramattribs_excludes} name those attributes, which are not exported from ff to ram objects when using \code{\link{as.ram}}.
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{ff}}, \code{\link[=Extract.ff]{virtual}}, \code{\link{as.ram}}, \code{\link{levels.ff}}, \code{\link{attributes}}, \code{\link{DateTimeClasses}} }
#! \examples{
#!   x <- ff(as.POSIXct(as.POSIXlt(Sys.time(), "GMT")), length=12)
#!   x
#!   ramclass(x)
#!   ramattribs(x)
#!   class(x[])
#!   attributes(x[])
#!   virtual(x)$ramattribs$tzone = NULL
#!   attributes(x[])
#!   rm(x); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }
#! \keyword{ attribute }
#! \keyword{ classes }

ramclass.ff <- function(x
, ... # dummy to keep R CMD check quiet
)
  attr(attr(x, "virtual"), "ramclass")
ramattribs.ff <- function(x
, ... # dummy to keep R CMD check quiet
)
  attr(attr(x, "virtual"), "ramattribs")

ramclass.default <- function(x
, ... # dummy to keep R CMD check quiet
){
  cl <- class(x)
  cl <- cl[is.na(match(cl, ramclass_excludes))]
  if (length(cl))
    cl
  else
    NULL
}
ramattribs.default <- function(x
, ... # dummy to keep R CMD check quiet
){
  a <- attributes(x)
  a <- a[is.na(match(names(a), ramattribs_excludes))]
  if (length(a))
    a
  else
    NULL
}


#! \name{length.ff}
#! \alias{length.ff}
#! \alias{length<-.ff}
#! \title{ Getting and setting length }
#! \description{
#!   Gets and sets length of ff objects.
#! }
#! \usage{
#! \method{length}{ff}(x)
#! \method{length}{ff}(x) <- value
#! }
#! \arguments{
#!   \item{x}{ object to query }
#!   \item{value}{ new object length }
#! }
#! \details{
#!   Changing the length of ff objects is only allowed if no \code{\link{vw}} is used.
#!   Changing the length of ff objects will remove any \code{\link{dim.ff}} and \code{\link{dimnames.ff}} attribute.
#!   Changing the length of ff objects will remove any \code{\link[bit]{na.count}} or \code{\link[bit:Metadata]{is.sorted}} attribute and warn about this.
#!   New elements are usually zero, but it may depend on OS and filesystem what they really are.
#!   If you want standard R behaviour: filling with NA ,you need to do this yourself.
#!   As an exception to this rule, ff objects with \code{\link{names.ff}} will be filled with NA's automatically,
#!   and the length of the names will be adjusted (filled with position numbers where needed, which can easily consume a lot of RAM,
#!   therefore removing 'names' will help to faster increase length without RAM problems).
#! }
#! \note{
#!   Special care needs to be taken with regard ff objects that represent factors.
#!   For ff factors based on UNSIGNED \code{\link{vmode}s}, new values of zero are silently interpreted as the first factor level.
#!   For ff factors based on SIGNED \code{\link{vmode}s}, new values of zero result in illegal factor levels.
#!   See \code{\link{nrow<-}}.
#! }
#! \value{
#!   Integer scalar
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{length}}, \code{\link{maxlength}}, \code{\link{file.resize}}, \code{\link[=dim.ff]{dim}}, \code{\link[=Extract.ff]{virtual}} }
#! \examples{
#!   x <- ff(1:12)
#!   maxlength(x)
#!   length(x)
#!   length(x) <- 10
#!   maxlength(x)
#!   length(x)
#!   length(x) <- 16
#!   maxlength(x)
#!   length(x)
#!   rm(x); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }
#! \keyword{ attribute }



length.ff <- function(x)
{
  attr(attr(x, "virtual"), "Length")
}


"length1<-.ff" <- function(x, value)
{
  virtual <- virtual(x)
  if (!is.null(virtual$VW))
    stop("you can't set length during existence of a virtual window, see ?vw")
  if (!is.null(virtual$Dim))
    stop("you can't set length of arrays, either remove the dim attribute or use 'dim<-' instead")
  
  value <- as.integer(value)
  oldlen <- virtual$Length
  
  if (value==oldlen)
    return(x)
  
  
  vm <- vmode(x)
  newbytes <- ceiling((value * .ffbytes[vm])/.rambytes[vm])*.rambytes[vm]
  #newbytes <- ceiling(value * .ffbytes[vm])
  
  io <- is.open(x)
  if (io)
    close(x)
  file.resize( filename(x), newbytes )
  open(x)
  
  physical <- physical(x)
  physical$maxlength <- as.integer(newbytes / .ffbytes[vm])
  if (!is.null(physical$na.count)){
    physical$na.count <- NULL
    warning("changing length(ff) removed attribute 'na.count'")
  }
  if (!is.null(physical$is.sorted) && value>oldlen){
    physical$is.sorted <- NULL
    warning("increasing length(ff) removed attribute 'is.sorted'")
  }
  physical(x) <- physical
  
  virtual$Length <- value
  virtual(x) <- virtual
  
  if (!is.null(virtual$Names)){
    if (value>oldlen){
      if (!.vunsigned[vm] && vm!="raw")
        x[hi(oldlen+1L, value)] <- NA
      virtual$Names <- c(virtual$Names, (oldlen+1L):value)
    }else
      virtual$Names <- virtual$Names[seq_len(value)]
  }
  
  virtual(x) <- virtual
  
  if (!io)
    close(x)
  
  x
}


"length<-.ff" <- function(x, value)
{
  virtual <- virtual(x)
  if (!is.null(virtual$VW))
    stop("you can't set length during existence of a virtual window, see ?vw")
  if (!is.null(virtual$Dim))
    stop("you can't set length of arrays, either remove the dim attribute or use 'dim<-' instead")

  value <- as.integer(value)
  oldlen <- virtual$Length

  if (value==oldlen)
    return(x)


  vm <- vmode(x)
  newbytes <- ceiling((value * .ffbytes[vm])/.rambytes[vm])*.rambytes[vm]
  #newbytes <- ceiling(value * .ffbytes[vm])
  
  io <- is.open(x)
  if (io)
    close(x)
  file.resize( filename(x), newbytes )
  open(x)

  physical <- physical(x)
  physical$maxlength <- as.integer(newbytes / .ffbytes[vm])
  if (!is.null(physical$na.count)){
    physical$na.count <- NULL
    warning("changing length(ff) removed attribute 'na.count'")
  }
  if (!is.null(physical$is.sorted) && value>oldlen){
    physical$is.sorted <- NULL
    warning("increasing length(ff) removed attribute 'is.sorted'")
  }
  physical(x) <- physical
  
  if (value>oldlen){
    virtual$Length <- value
    virtual(x) <- virtual
    if (!is.null(virtual$Names)){
      if (!.vunsigned[vm] && vm!="raw")
        x[hi(oldlen+1L, value)] <- NA
      virtual$Names <- c(virtual$Names, (oldlen+1L):value)
    }
  }else {  # if value < oldlen
    npacked <- round(.rambytes[vm]/.ffbytes[vm])
    if (npacked>1 && (nrest <- value %% npacked)){
      nrest <- npacked - value %% npacked
      virtual$Length <- value+nrest
      virtual(x) <- virtual
      if (!is.null(virtual$Names))
        x[hi(value+1L, value+nrest)] <- .vNA[vm]
      else
        x[hi(value+1L, value+nrest)] <- 0L
    }
    virtual$Length <- value
    if (!is.null(virtual$Names)){
      virtual$Names <- virtual$Names[seq_len(value)]
    }
  }
  
  virtual(x) <- virtual
  
  if (!io)
    close(x)

  x
}


#! \name{levels.ff}
#! \alias{levels.ff}
#! \alias{levels<-.ff}
#! \alias{is.factor}
#! \alias{is.factor.default}
#! \alias{is.factor.ff}
#! \alias{is.ordered}
#! \alias{is.ordered.default}
#! \alias{is.ordered.ff}
#! \title{ Getting and setting factor levels }
#! \description{
#!   \code{levels.ff<-} sets factor levels, \code{levels.ff} gets factor levels
#! }
#! \usage{
#! \method{levels}{ff}(x)
#! \method{levels}{ff}(x) <- value
#!  is.factor(x)
#!  is.ordered(x)
#! \method{is.factor}{ff}(x)
#! \method{is.ordered}{ff}(x)
#! \method{is.factor}{default}(x)
#! \method{is.ordered}{default}(x)
#! }
#! \arguments{
#!   \item{x}{ an ff object }
#!   \item{value}{ the new factor levels, if NA is an allowed level it needs to be given explicitely, nothing is excluded }
#! }
#! \details{
#!   The ff object must have an integer vmode, see \code{\link{.rammode}}.
#!   If the mode is unsigned -- see \code{\link{.vunsigned}} -- the first factor level is coded with 0L instead of 1L in order to maximize the number of codable levels.
#!   Usually the internal ff coding -- see \code{\link{ram2ffcode}} -- is invisible to the user: when subscripting from an ff factor, unsigend codings are automatically converted to R's standard factor codes starting at 1L.
#!   However, you need to be aware of the internal ff coding in two situtations. \cr
#!   1. If you convert an ff integer object to an ff factor object and vice versa by assigning levels and \code{is.null(oldlevels)!=is.null(newlevels)}.  \cr
#!   2. Assigning data that does not match any level usually results in NA, however, in unsigned types there is no NA and all unknown data are mapped to the first level.
#! }
#! \value{
#!   \command{levels} returns a character vector of levels (possibly including \code{as.cha racter(NA)}).
#! }
#! \author{ Jens Oehlschlägel }
#! \note{ When levels as assigned to an ff object that formerly had not levels, we assign automatically \code{\link{ramclass}} == "factor". If you want to change to an ordered factor, use \code{\link[=Extract.ff]{virtual}$ramclass <- c("ordered", "factor")} }
#! \seealso{ \code{\link{ramclass}}, \code{\link{factor}}, \code{\link[=Extract.ff]{virtual}} }
#! \examples{
#!   message("--- create an ff factor including NA as last level")
#!   x <- ff("a", levels=c(letters, NA), length=99)
#!   message('    we expect a warning because "A" is an unknown level')
#!   x[] <- c("a", NA,"A")
#!   x
#!   levels(x)
#!
#!   message("--- create an ff ordered factor")
#!   x <- ff(letters, levels=letters, ramclass=c("ordered","factor"), length=260)
#!   x
#!   levels(x)
#!
#!   message("    make it a non-ordered factor")
#!   virtual(x)$ramclass <- "factor"
#!   x
#!   rm(x); gc()
#!
#!  \dontrun{
#!   message("--- create an unsigned quad factor")
#!   x <- ff(c("A","T","G","C"), levels=c("A","T","G","C"), vmode="quad", length=100)
#!   x
#!   message("  0:3 coding usually invisible to the user")
#!   unclass(x[1:4])
#!   message("    after removing levels, the 0:3 coding becomes visible to the user")
#!   message("    we expect a warning here")
#!   levels(x) <- NULL
#!   x[1:4]
#!   rm(x); gc()
#!  }
#!
#! }
#! \keyword{ IO }
#! \keyword{ data }
#! \keyword{ attribute }

levels.ff <- function(x){
  attr(attr(x, "virtual"), "Levels")
}

"levels<-.ff" <- function(x, value){
  v <- attr(attr(x, "physical"), "vmode")
  if (.rammode[v]!="integer")
    stop("factors must be integer")
  if (length(value)>.vmax[v]+.vunsigned[v])
    stop("vmode '", v, "' can carry max ", .vmax[v]+.vunsigned[v], " levels")
  oldlevels <- attr(attr(x, "virtual"), "Levels")
  if (is.null(oldlevels)){
    if (!is.null(value)){
      if (.vunsigned[v])
        warning("assigning levels to unsigned vmode interprets 0 as first level !!")
      attr(attr(x, "virtual"), "Levels") <- value
      attr(attr(x, "virtual"), "ramclass") <- "factor" # make it factor, we don't know if it is ordered
    }
  }else{
    if (is.null(value)){
      attr(attr(x, "virtual"), "ramclass") <- NULL
      attr(attr(x, "virtual"), "Levels") <- NULL
      if (.vunsigned[v])
        warning("removing levels from unsigned vmode leaves first level as 0 !!")
    }else{
      if (length(value)<length(oldlevels))
        warning("lengths of levels was reduced")
      attr(attr(x, "virtual"), "Levels") <- value
    }
  }
  x
}


is.factor.ff <- function(x){
  ramclass <- attr(attr(x, "virtual"), "ramclass")
  !is.null(ramclass) && !is.na(match("factor",ramclass))
}

is.ordered.ff <- function(x){
  ramclass <- attr(attr(x, "virtual"), "ramclass")
  !is.null(ramclass) && !is.na(match("ordered",ramclass))
}


#! \name{names.ff}
#! \alias{names.ff}
#! \alias{names<-.ff}
#! \alias{names.ff_array}
#! \alias{names<-.ff_array}
#! \title{ Getting and setting names }
#! \description{
#!   For \code{ff_vector}s you can set names, though this is not recommended for large objects.
#! }
#! \usage{
#!   \method{names}{ff}(x)
#!   \method{names}{ff}(x) <- value
#!   \method{names}{ff_array}(x)
#!   \method{names}{ff_array}(x) <- value
#! }
#! \arguments{
#!   \item{x}{ a ff vector }
#!   \item{value}{ a character vector }
#! }
#! \details{
#!   If \code{\link{vw}} is set, \command{names.ff} returns the appropriate part of the names, but you can't set names while \command{vw} is set.
#!   \command{names.ff\_array}
#!   returns NULL and setting names for
#!   \code{ff_array}s is not allowed,
#!   but setting \code{\link[=dimnames.ff]{dimnames}} is.
#! }
#! \value{
#!   \command{names} returns a character vector (or NULL)
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{names}}, \code{\link[=dimnames.ff]{dimnames.ff_array}}, \code{\link{vw}}, \code{\link[=Extract.ff]{virtual}} }
#! \examples{
#!   x <- ff(1:26, names=letters)
#!   names(x)
#!   names(x) <- LETTERS
#!   names(x)
#!   names(x) <- NULL
#!   names(x)
#!   rm(x); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }

names.ff <- function(x){
  virtual <- attr(x, "virtual")
  vw <- attr(virtual, "VW")
  nam <- attr(virtual, "Names")
  if (is.null(vw)){
    nam
  }else{
    if (vw[2]>0){
      nam <- nam[(vw[1]+1L):(vw[1]+vw[2])]
      nam
    }else{
      character()
    }
  }
}

"names<-.ff" <- function(x, value){
  if (is.null(value)){
    attr(attr(x, "virtual"), "Names") <- NULL
  }else{
    virtual <- attr(x, "virtual")
    vw <- attr(virtual, "VW")
    if (is.null(vw)){
      if (length(value)!=length(x))
        stop("names attributes has wrong length")
      attr(attr(x, "virtual"), "Names") <- as.character(as.vector(value))
    }else{
      nam <- attr(virtual, "Names")
      if (is.null(nam))
        nam <- character(sum(vw))
      if (vw[2]>0)
        nam[(vw[1]+1L):(vw[1]+vw[2])] <- as.character(as.vector(value))
      attr(attr(x, "virtual"), "Names") <- nam
    }
  }
  x
}

"names.ff_array" <- function(x)
  NULL

"names<-.ff_array" <- function(x, value){
  if (!is.null(value))
    stop("assigning names (other than NULL) to ff_array not supported (not useful and very complicated due to dimorder)")
  else
    x
}


#! \name{dimnames.ff}
#! \alias{dimnames.ff}
#! \alias{dimnames.ff_array}
#! \alias{dimnames<-.ff_array}
#! \title{ Getting and setting dimnames }
#! \description{
#!   For \code{ff_array}s you can set dimnames.
#! }
#! \usage{
#!   \method{dimnames}{ff_array}(x)
#!   \method{dimnames}{ff_array}(x) <- value
#! }
#! \arguments{
#!   \item{x}{ a ff array (or matrix) }
#!   \item{value}{ a list with length(dim(x)) elements (either NULL of character vector of length of dimension }
#! }
#! \details{
#!   if \code{\link{vw}} is set, \command{dimnames.ff\_array} returns the appropriate part of the names, but you can't set \command{dimnames} while \command{vw} is set.
#!   \command{dimnames} returns NULL for \code{ff_vectors} and setting \code{dimnames} for \code{ff_vector} is not allowed, but setting \code{\link[=names.ff]{names}} is.
#! }
#! \value{
#!   \command{dimnames} returns a list, see \code{\link{dimnames}}
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{dimnames}}, \code{\link{dim.ff}} , \code{\link{names.ff}} , \code{\link{vw}}, \code{\link[=Extract.ff]{virtual}} }
#! \examples{
#!   x <- ff(1:12, dim=c(3,4), dimnames=list(letters[1:3], LETTERS[1:4]))
#!   dimnames(x)
#!   dimnames(x) <- list(LETTERS[1:3], letters[1:4])
#!   dimnames(x)
#!   dimnames(x) <- NULL
#!   dimnames(x)
#!   rm(x); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }

dimnames.ff_array <- function(x){
  vw <- attr(attr(x, "virtual"), "VW")
  if (is.null(vw))
    attr(attr(x, "virtual"), "Dimnames")
  else{
    vw <- vw[1,]
    d <- attr(attr(x, "virtual"),"Dim")
    ii <- seq_along(d)
    names(ii) <- names(d)
    dn <- attr(attr(x, "virtual"), "Dimnames")
    lapply(ii, function(i){
      dn[[i]][(vw[i]+1L):(vw[i]+d[i])]
    })
  }
}

"dimnames<-.ff_array" <- function(x, value){
  if (!is.null(value)){
    if (!is.null(attr(attr(x, "virtual"), "VW")))
      stop("vw must be NULL before you can set dimnames")
    if (!is.list(value))
      stop("dimnames must be NULL or list")
    ffdim <- dim(x)
    ndim <- length(ffdim)
    if (length(value)!=ndim)
      stop("length of dimnames does not match length of dim")
    dimcheck <- sapply(seq_len(ndim), function(i){
      is.null(value[[i]]) || length(value[[i]]) == ffdim[i]
    })
    if (!all(dimcheck))
      stop("dimnames(i) is neither NULL nor matches dim(i)")
  }
  attr(attr(x, "virtual"), "Dimnames") <- value
  x
}



#! \name{dim.ff}
#! \alias{dim.ff}
#! \alias{dim.ffdf}
#! \alias{dim<-.ff}
#! \alias{dim<-.ffdf}
#! \alias{dimorder}
#! \alias{dimorder.default}
#! \alias{dimorder.ff_array}
#! \alias{dimorder.ffdf}
#! \alias{dimorder<-}
#! \alias{dimorder<-.ff_array}
#! \alias{dimorder<-.ffdf}
#! \title{ Getting and setting dim and dimorder }
#! \description{
#!   Assigning \code{dim} to an \code{ff_vector} changes it to an \code{ff_array}.
#!   Beyond that \code{dimorder} can be assigned to change from column-major order to row-major order or generalizations for higher order \code{ff_array}.
#! }
#! \usage{
#!   \method{dim}{ff}(x)
#!   \method{dim}{ffdf}(x)
#!   \method{dim}{ff}(x) <- value
#!   \method{dim}{ffdf}(x) <- value
#!    dimorder(x, \dots)
#!    dimorder(x, \dots) <- value
#!   \method{dimorder}{default}(x, \dots)
#!   \method{dimorder}{ff_array}(x, \dots)
#!   \method{dimorder}{ffdf}(x, \dots)
#!   \method{dimorder}{ff_array}(x, \dots) <- value
#!   \method{dimorder}{ffdf}(x, \dots) <- value  # just here to catch forbidden assignments
#! }
#! \arguments{
#!   \item{x}{ a ff object }
#!   \item{value}{ an appropriate integer vector }
#!   \item{\dots}{ further arguments (not used) }
#! }
#! \details{
#!    \command{dim} and \command{dimorder} are \code{\link[=Extract.ff]{virtual}} attributes. Thus two copies of an R ff object can point to the same file but interpret it differently.
#!    \command{dim} has the usual meaning, \command{dimorder} defines the dimension order of storage, i.e. \code{c(1,2)} corresponds to R's standard column-major order,
#!    \code{c(1,2)} corresponds to row-major order, and for higher dimensional arrays dimorder can also be used. Standard dimorder is \code{seq_along(dim(x))}. \cr
#!    For \code{\link{ffdf}} \code{dim} returns the number of rows and virtual columns. With \code{dim<-.ffdf} only the number of rows can be changed. For convenience you can assign \code{NA} to the number of columns. \cr
#!    For \code{\link{ffdf}} the dimorder returns non-standard dimorder if any of its columns contains a ff object with non-standard dimorder (see \code{\link{dimorderStandard}})
#!    An even higher level of virtualization is available using virtual windows, see \code{\link{vw}}.
#! }
#! \note{
#!   \code{x[]} returns a matrix like \code{x[,]} and thus respects dimorder, while \code{x[i:j]} returns a vector and simply returns elements in the stored order.
#!   Check the corresponding example twice, in order to make sure you understand that for non-standard dimorder \code{x[seq_along(x)]} is \emph{not the same} as \code{as.vector(x[])}.
#! }
#! \value{
#!   \command{names} returns a character vector (or NULL)
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{dim}}, \code{\link[=dimnames.ff]{dimnames.ff_array}}, \code{\link{dimorderStandard}}, \code{\link{vw}}, \code{\link[=Extract.ff]{virtual}} }
#! \examples{
#!   x <- ff(1:12, dim=c(3,4), dimorder=c(2:1))
#!   y <- x
#!   dim(y) <- c(4,3)
#!   dimorder(y) <- c(1:2)
#!   x
#!   y
#!   x[]
#!   y[]
#!   x[,bydim=c(2,1)]
#!   y[,bydim=c(2,1)]
#!
#!   message("NOTE that x[] like x[,] returns a matrix (respects dimorder),")
#!   message("while x[1:12] returns a vector IN STORAGE ORDER")
#!   message("check the following examples twice to make sure you understand this")
#!   x[,]
#!   x[]
#!   as.vector(x[])
#!   x[1:12]
#!   rm(x,y); gc()
#!
#!   \dontshow{
#!     message("some regression test with regard to different dimorders")
#!     k <- 24
#!     d <- 3:5
#!     n <- prod(d)
#!     for (i in 1:k){
#!       a <- array(sample(n), dim=sample(d))
#!       x <- as.ff(a, dimorder=sample(seq_along(d)))
#!       if (!identical(a[1:n], x[1:n]))
#!         stop("error in caclulating access positions")
#!       if (!identical(a[1:dim(a)[1],,], x[1:dim(a)[1],,]))
#!         stop("error in caclulating access positions")
#!     }
#!     rm(x); gc()
#!   }
#!   \dontrun{
#!     message("some performance comparison between different dimorders")
#!     n <- 100
#!     m <- 100000
#!     a <- ff(1L,dim=c(n,m))
#!     b <- ff(1L,dim=c(n,m), dimorder=2:1)
#!     system.time(lapply(1:n, function(i)sum(a[i,])))
#!     system.time(lapply(1:n, function(i)sum(b[i,])))
#!     system.time(lapply(1:n, function(i){i<-(i-1)*(m/n)+1; sum(a[,i:(i+m/n-1)])}))
#!     system.time(lapply(1:n, function(i){i<-(i-1)*(m/n)+1; sum(b[,i:(i+m/n-1)])}))
#!     rm(a,b); gc()
#!   }
#! }
#! \keyword{ IO }
#! \keyword{ data }


dim.ff <- function(x)
{
  attr(attr(x, "virtual"),"Dim")
}


# Attention, assigning dim sets dimorder to 1:ndim
"dim<-.ff" <- function(
  x
, value
)
{
  virtual <- attr(x, "virtual")
  if (!is.null(attr(virtual, "VW")))
    stop("you can't set dim during existence of a virtual window, see ?vw")
  if (is.null(value)){
    attr(virtual, "Dim") <- NULL
    attr(virtual, "Dimnames") <- NULL
    attr(virtual, "Dimorder") <- NULL
    attr(x, "virtual") <- virtual
    class(x) <- c("ff_vector","ff")
  }else{
    value <- as.integer(value)
    d <- attr(virtual, "Dim")
    if (identical(d, value))
      return(x)
    
    n <- attr(virtual, "Length")
    nvalue <- prod(value)
    nvaluedim <- length(value)
    if (nvalue==n){
      dimorder <- seq_len(nvaluedim)
    }else{
      # we allow to grow or shrink the slowest rotating dim (given dimorder)
      dimorder <- attr(virtual, "Dimorder")
      nd <- length(d)
      if ( nvaluedim==nd && ( nd==1 || all(d[dimorder][-nd]==value[dimorder][-nd]) ) ){
        dim(x) <- NULL
        length(x) <- nvalue
        dim(x) <- value
        dimorder(x) <- dimorder
        return(x)
      }else
        stop("you can only grow or shrink the slowest rotating dim")
    }
    attr(virtual, "Dim") <- value
    attr(virtual, "Dimnames") <- NULL
    attr(virtual, "Dimorder") <- dimorder
    attr(virtual, "Names") <- NULL
    attr(x, "virtual") <- virtual
    if (nvaluedim==2)
      class(x) <- c("ff_matrix", "ff_array","ff")
    else
      class(x) <- c("ff_array","ff")
  }
  x
}


dimorder.ff_array <- function(x
, ... # dummy to keep R CMD check quiet
)
{
  attr(attr(x, "virtual"),"Dimorder")
}
# Attention: you can not arbitrarily choose dimorder
# if you assign dimorder<-, dimorder MUST correspond to the physical layout
"dimorder<-.ff_array" <- function(x
, ... # dummy to keep R CMD check quiet
, value
)  # we assume you know what you do if you use this !!
{
  if (!is.null(attr(attr(x, "virtual"),"VW")))
    stop("you can't set dimorder during existence of a virtual window, see ?vw")
  do <- attr(attr(x, "virtual"),"Dimorder")
  value <- as.vector(as.integer(value))
  if (!identical(sort(do), sort(value)))
    stop("illegal dimorder, do you know what you are doing?")
  attr(attr(x, "virtual"),"Dimorder") <- value
  x
}

dimorder.default <- function(x
, ... # dummy to keep R CMD check quiet
){
  d <- dim(x)
  if (is.null(d))
    NULL
  else
    seq_along(d)
}



#! \name{vw}
#! \alias{vw}
#! \alias{vw<-}
#! \alias{vw.ff}
#! \alias{vw.default}
#! \alias{vw<-.ff_vector}
#! \alias{vw<-.ff_array}
#! \title{ Getting and setting virtual windows }
#! \description{
#!   The virtual window \command{vw} function allows one to define a virtual window into an \code{ff_vector} or \code{ff_array}.
#!   The ff object will behave like a smaller array and it is mapped into the specified region of the complete array.
#!   This allows for example to execute recursive divide and conquer algorithms that work on parts of the full object,
#!   without the need to repeatedly create subfiles.
#! }
#! \usage{
#! vw(x, \dots)
#! vw(x, \dots) <- value
#! \method{vw}{ff}(x, \dots)
#! \method{vw}{default}(x, \dots)
#! \method{vw}{ff_vector}(x, \dots) <- value
#! \method{vw}{ff_array}(x, \dots) <- value
#! }
#! \arguments{
#!   \item{x}{ an \code{ff_vector} or \code{ff_array} }
#!   \item{\dots}{ further arguments (not used) }
#!   \item{value}{ a vector or matrix with an Offset, Window and Rest component, see details and examples }
#! }
#! \details{
#!   Each dimension of an ff array (or vector) is decomposed into three components, an invisible Offset, a visibe Window and an invisible Rest.
#!   For each dimension the sum of the vw components must match the dimension (or length).
#!   For an \code{ff_vector}, \code{vw} is simply a vector[1:3], for an array is is a \code{matrix[1:3,seq_along(dim(x))]}.
#!   \code{vw} is a \code{\link[=Extract.ff]{virtual}} attribute. \cr
#! }
#! \value{
#!   NULL or a vw specification, see details
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{length.ff}}, \code{\link{dim.ff}}, \code{\link[=Extract.ff]{virtual}} }
#! \examples{
#!   x <- ff(1:26, names=letters)
#!   y <- x
#!   vw(x) <- c(0, 13, 13)
#!   vw(y) <- c(13, 13, 0)
#!   x
#!   y
#!   x[1] <- -1
#!   y[1] <- -2
#!   vw(x) <- NULL
#!   x[]
#!
#!   z <- ff(1:24, dim=c(4,6), dimnames=list(letters[1:4], LETTERS[1:6]))
#!   z
#!   vw(z) <- rbind(c(1,1), c(2,4), c(1,1))
#!   z
#!
#!   rm(x,y,z); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }
#! \keyword{ array }


vw.ff <- function(x
, ... # dummy to keep R CMD check quiet
)
{
  attr(attr(x, "virtual"),"VW")
}

"vw<-.ff_vector" <- function(x
, ... # dummy to keep R CMD check quiet
, value
)
{
  virtual <- attr(x, "virtual")
  vw <- attr(virtual,"VW")
  if (is.null(value)){
    if (!is.null(vw)){
      # restore original length when disabling vw
      attr(virtual,"Length") <- sum(vw)
      attr(virtual,"VW") <- NULL
    }
  }else{
    value <- as.integer(value)
    if (length(value)!=3)
      stop("vw(ff_vector) must be vector[3]")
    if (is.null(vw)){
      if (sum(value)!=attr(virtual, "Length"))
        stop("sum(vw(ff_vector)) must equal length(ff_vector)")
    }else{
      if (sum(value)!=sum(vw))
        stop("sum(vw(ff_vector)) must equal length(ff_vector)")
    }
    attr(virtual,"Length") <- value[2]
    attr(virtual,"VW") <- value
    attr(attr(x, "physical"),"na.count") <- NULL
  }
  attr(x, "virtual") <- virtual
  x
}

"vw<-.ff_array" <- function(x
, ... # dummy to keep R CMD check quiet
, value
)
{
  virtual <- attr(x, "virtual")
  vw <- attr(virtual,"VW")
  if (is.null(value)){
    if (!is.null(vw)){
      # restore original length when disabling vw
      d <- as.integer(colSums(attr(attr(x, "virtual"),"VW")))
      attr(virtual,"Length") <- as.integer(prod(d))
      attr(virtual,"Dim") <- d
      attr(virtual,"VW") <- NULL
    }
  }else{
    d <- attr(virtual,"Dim")
    storage.mode(value) <- "integer"
    if (!identical(dim(value), c(3L, length(d))))
      stop("vw(ff_array) must be matrix[3, length(dim(x))]")
    dimnames(value) <- NULL
    if (is.null(vw)){
      if (!identical(as.integer(colSums(value)), d))
        stop("colSums(vw(ff_array)) must equal dim(x)")
    }else{
      if (!identical(colSums(value), colSums(vw)))
        stop("colSums(vw(ff_array)) must equal dim(x)")
    }
    attr(virtual,"Length") <- as.integer(prod(value[2,]))
    attr(virtual,"Dim") <- value[2,]
    attr(virtual,"VW") <- value
    attr(attr(x, "physical"),"na.count") <- NULL
  }
  attr(x, "virtual") <- virtual
  x
}

vw.default <- function(x
, ... # dummy to keep R CMD check quiet
){
  NULL
}


#! \name{print.ff}
#! \alias{print.ff}
#! \alias{print.ffdf}
#! \alias{print.ff_vector}
#! \alias{print.ff_matrix}
#! \alias{str.ff}
#! \alias{str.ffdf}
#! \title{ Print and str methods }
#! \description{
#!   printing ff objects and compactly showing their structure
#! }
#! \usage{
#! \method{print}{ff}(x, \dots)
#! \method{print}{ff_vector}(x, maxlength = 16, \dots)
#! \method{print}{ff_matrix}(x, maxdim = c(16, 16), \dots)
#! \method{str}{ff}(object, nest.lev=0, \dots)
#! \method{str}{ffdf}(object, nest.lev=0, \dots)
#! }
#! \arguments{
#!   \item{x}{ a ff object }
#!   \item{object}{ a ff object }
#!   \item{nest.lev}{ current nesting level in the recursive calls to str }
#!   \item{maxlength}{ max number of elements to print from an \code{ff_vector} }
#!   \item{maxdim}{ max number of elements to print from each dimension from an \code{ff_array} }
#!   \item{\dots}{ further arguments to print }
#! }
#! \details{
#!   The print methods just print a few exmplary elements from the beginning and end of the dimensions.
#! }
#! \value{
#!   \code{invisible()}
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{ff}}, \code{\link{print}}, \code{\link{str}} }
#! \examples{
#!   x <- ff(1:10000)
#!   x
#!   print(x, maxlength=30)
#!   dim(x) <- c(100,100)
#!   x
#!   rm(x); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }
#! \keyword{ print }


print.ff <- function(x
, ... # dummy to keep R CMD check quiet
)
{
  vw <- vw(x)
  d <- dim(x)
  l <- levels(x)
  cat("ff (", if (file.exists(filename(x))) {if (is.open(x)) "open" else "closed"} else "deleted", ") "
  , vmode(x)
  , if (is.null(d) && !is.null(vw))  paste(" offset=", vw[1], " length=", vw[2], " totallength=", sum(vw), sep="")
    else paste(" length=", length(x), sep="")
  , " (", maxlength(x), ")"
  , if (is.sorted(x)) " (sorted)"
  , if (!is.null(physical(x)$na.count)) paste(" (na.count=",na.count(x),")",sep="")
  , if (!is.null(d))
      paste(
        if (is.null(vw))
          paste(" dim=c(", paste(dim(x), collapse=","), ")", sep="")
        else
          paste(" offset=c(", paste(vw[1,], collapse=","), ") dim=c(", paste(vw[2,], collapse=","), ") totaldim=c(", paste(colSums(vw), collapse=","), ")", sep="")
        , if (!is.null(dimorder(x))) paste(" dimorder=c(", paste(dimorder(x), collapse=","), ")", sep="")
        , if (symmetric(x)) paste(" symmetric", if (!is.null(fixdiag(x))) paste(" (fixdiag=", fixdiag(x), ")", sep=""), sep="")
    ,  sep="")
  , if (!is.null(l)) paste(" levels: ", paste(ifelse(is.na(l), "<NA>", l), sep="", collapse=if (is.ordered(x)) " < " else " "), sep="")
  , "\n", sep="")
}

print.ff_vector <- function(x, maxlength=16, ...){
  NextMethod("print")
  if(is.open(x)){
    print(vecprint(x, maxlength=maxlength), ...)
  }
}

print.ff_matrix <- function(x, maxdim=c(16,16), ...){
  NextMethod("print")
  if(is.open(x))
    print(matprint(x, maxdim=maxdim), ...)
}


str.ff <- function(object, nest.lev=0, ...){
  nest.str <- paste(rep(" ..", nest.lev), collapse="")
  str(unclass(object), nest.lev=nest.lev, ...)
  cat(nest.str, ' - attr(*, "class") = ', sep="")
  str(class(object), nest.lev=nest.lev, ...)
}



# --- ff new / update / clone / open / close / delete / deleteIfOpen -----------------------------------------------------------

#! \name{ff}
#! \alias{ff}
#! \alias{ff_pointer}
#! \title{ ff classes for representing (large) atomic data }
#! \description{
#!   The ff package provides atomic data structures that are stored on disk but behave (almost) as if they were in RAM by
#!   mapping only a section (pagesize) into main memory (the effective main memory consumption per ff object).
#!   Several access optimization techniques such as Hyrid Index Preprocessing (\code{\link{as.hi}}, \code{\link{update.ff}}) and Virtualization (\code{\link[=Extract.ff]{virtual}}, \code{\link{vt}}, \code{\link{vw}}) are implemented to achieve good performance even with large datasets.
#!   In addition to the basic access functions, the ff package also provides compatibility functions that facilitate writing code for ff and ram objects (\code{\link[=clone.ff]{clone}}, \code{\link{as.ff}}, \code{\link{as.ram}}) and very basic support for operating on ff objects (\code{\link{ffapply}}).
#!   While the (possibly packed) raw data is stored on a flat file, meta
#!   informations about the atomic data structure such as its dimension,
#!   virtual storage mode (\code{\link{vmode}}), factor level encoding,
#!   internal length etc.. are stored as an ordinary R object (external
#!   pointer plus attributes) and can be saved in the workspace.
#!   The raw flat file data encoding is always in native machine format for
#!   optimal performance and provides several packing schemes for different
#!   data types such as logical, raw, integer and double (in an extended version
#!   support for more tighly packed virtual data types is supported).
#!   flatfile data files can be shared among ff objects in the same R process or
#!   even from different R processes due to Memory-Mapping, although the
#!   caching effects have not been tested extensively.
#!   \cr
#!   Please do read and understand the limitations and warnings in \code{\link{LimWarn}} before you do anything serious with package ff.
#! }
#! \usage{
#! ff( initdata  = NULL
#! , length      = NULL
#! , levels      = NULL
#! , ordered     = NULL
#! , dim         = NULL
#! , dimorder    = NULL
#! , bydim       = NULL
#! , symmetric   = FALSE
#! , fixdiag     = NULL
#! , names       = NULL
#! , dimnames    = NULL
#! , ramclass    = NULL
#! , ramattribs  = NULL
#! , vmode       = NULL
#! , update      = NULL
#! , pattern     = NULL
#! , filename    = NULL
#! , overwrite   = FALSE
#! , readonly    = FALSE
#! , pagesize    = NULL  # getOption("ffpagesize")
#! , caching     = NULL  # getOption("ffcaching")
#! , finalizer   = NULL
#! , finonexit   = NULL  # getOption("fffinonexit")
#! , FF_RETURN   = TRUE
#! , BATCHSIZE   = .Machine$integer.max
#! , BATCHBYTES  = getOption("ffbatchbytes")
#! , VERBOSE     = FALSE
#! )
#! }
#! \arguments{
#!   \item{initdata}{ scalar or vector of the \code{\link{.vimplemented}} \code{\link{vmode}}s, recycled if needed, default 0, see also \code{\link{as.vmode}} and \code{\link{vector.vmode}} }
#!   \item{length}{ optional vector \code{\link{length}} of the object (default: derive from 'initdata' or 'dim'), see \code{\link{length.ff}} }
#!   \item{levels}{ optional character vector of levels if (in this case initdata must be composed of these) (default: derive from initdata) }
#!   \item{ordered}{ indicate whether the levels are ordered (TRUE) or non-ordered factor (FALSE, default) }
#!   \item{dim}{ optional array \code{\link{dim}}, see \code{\link{dim.ff}} and \code{\link{array}} }
#!   \item{dimorder}{ physical layout (default seq_along(dim)), see \code{\link{dimorder}} and \code{\link{aperm}} }
#!   \item{bydim}{ dimorder by which to interpret the 'initdata', generalization of the 'byrow' paramter in \code{\link{matrix}} }
#!   \item{symmetric}{ extended feature: TRUE creates symmetric matrix (default FALSE) %, see \code{\link{symm}}, \code{\link{ff_symm}}, \code{\link{ff_dist}}
#!        }
#!   \item{fixdiag}{ extended feature: non-NULL scalar requires fixed diagonal for symmetric matrix (default NULL is free diagonal) }
#!   \item{names}{ NOT taken from initdata, see \code{\link{names}} }
#!   \item{dimnames}{ NOT taken from initdata, see \code{\link{dimnames}} }
#!   \item{ramclass}{ class attribute attached when moving all or parts of this ff into ram, see \code{\link{ramclass}} }
#!   \item{ramattribs}{ additional attributes attached when moving all or parts of this ff into ram, see \code{\link{ramattribs}} }
#!   \item{vmode}{ virtual storage mode (default: derive from 'initdata'), see \code{\link{vmode}} and \code{\link{as.vmode}} }
#!   \item{update}{ set to FALSE to avoid updating with 'initdata' (default TRUE) (used by \code{\link{ffdf}}) }
#!   \item{pattern}{ root pattern with or without path for automatic ff filename creation (default NULL translates to "ff"), see also argument 'filename' }
#!   \item{filename}{ ff \code{\link{filename}} with or without path (default tmpfile with 'pattern' prefix); without path the file is created in \code{getOption("fftempdir")}, with path '.' the file is created in \code{\link{getwd}}. Note that files created in \code{getOption("fftempdir")} have default finalizer "delete" while other files have default finalizer "close". See also arguments 'pattern' and 'finalizer' and \code{\link[=Extract.ff]{physical}} }
#!   \item{overwrite}{ set to TRUE to allow overwriting existing files (default FALSE) }
#!   \item{readonly}{ set to TRUE to forbid writing to existing files }
#!   \item{pagesize}{ pagesize in bytes for the memory mapping (default from \code{getOptions("ffpagesize")} initialized by \code{\link{getdefaultpagesize}}), see also \code{\link[=Extract.ff]{physical}} }
#!   \item{caching}{ caching scheme for the backend, currently 'mmnoflush' or 'mmeachflush' (flush mmpages at each swap, default from \code{getOptions("ffcaching")} initialized with 'mmeachflush'), see also \code{\link[=Extract.ff]{physical}} }
#!   \item{finalizer}{ name of finalizer function called when ff object is \code{\link{remove}d} (default: ff files created in \code{getOptions("fftempdir")} are considered temporary and have default finalizer \code{\link[=delete.ff]{delete}}, files created in other locations have default finalizer \code{\link[=close.ff]{close}}); available finalizer generics are "close", "delete" and "deleteIfOpen", available methods are \code{\link{close.ff}}, \code{\link{delete.ff}} and \code{\link{deleteIfOpen.ff}}, see also argument 'finonexit' and \code{\link{finalizer}} }
#!   \item{finonexit}{ logical scalar determining whether  and \code{\link{finalize}} is also called when R is closed via \code{\link{q}}, (default TRUE from \code{getOptions("fffinonexit")}) }
#!   \item{FF_RETURN}{ logical scalar or ff object to be used. The default TRUE creates a new ff file. FALSE returns a ram object. Handing over an ff object here uses this or stops if not \code{\link{ffsuitable}} }
#!   \item{BATCHSIZE}{ integer scalar limiting the number of elements to be processed in \code{\link{update.ff}} when length(initdata)>1, default from \code{.Machine$integer.max} }
#!   \item{BATCHBYTES}{ integer scalar limiting the number of bytes to be processed in \code{\link{update.ff}} when length(initdata)>1, default from \code{getOption("ffbatchbytes")}, see also \code{\link{.rambytes}} }
#!   \item{VERBOSE}{ set to TRUE for verbosing in \code{\link{update.ff}} when length(initdata)>1, default FALSE }
#! }
#! \details{
#!  The atomic data is stored in \code{\link{filename}} as a native encoded raw flat file on disk, OS specific limitations of the file system apply.
#!  The number of elements per ff object is limited to the integer indexing, i.e. \code{\link{.Machine}$integer.max}.
#!  Atomic objects created with \command{ff} are \code{\link{is.open}}, a C++ object is ready to access the file via memory-mapping.
#!  Currently the C++ backend provides two caching schemes: 'mmnoflush' let the OS decide when to flash memory mapped pages
#!  and 'mmeachflush' will flush memory mapped pages at each page swap per ff file.
#!  These minimal memory ressources can be released by \code{\link[=close.ff]{close}ing} or \code{\link[=delete.ff]{delete}ing} the ff file.
#!  ff objects can be \code{\link{save}d} and \code{\link{load}ed} across R sessions. If the ff file still exists in the same location,
#!  it will be \code{\link[=open.ff]{open}ed} automatically at the first attempt to access its data. If the ff object is \code{\link{remove}d},
#!  at the next garbage collection (see \code{\link{gc}}) the ff object's \code{\link{finalizer}} is invoked.
#!  Raw data files can be made accessible as an ff object by explicitly given the filename and vmode but no size information (length or dim).
#!  The ff object will open the file and handle the data with respect to the given vmode.
#!  The \code{\link[=close.ff]{close}} finalizer will close the ff file, the \code{\link[=delete.ff]{delete}} finalizer will delete the ff file.
#!  The default finalizer \code{\link{deleteIfOpen}} will delete open files and do nothing for closed files. If the default finalizer is used,
#!  two actions are needed to protect the ff file against deletion: create the file outside the standard 'fftempdir' and close the ff object before removing it or before quitting R.
#!  When R is exited through \code{\link{q}}, the finalizer will be invoked depending on the 'fffinonexit' option, furthermore the 'fftempdir' is \code{\link{unlink}ed}. \cr
#! }
#! \value{
#!   If (\code{!FF_RETURN}) then a ram object like those generated by \code{\link{vector}}, \code{\link{matrix}}, \code{\link{array}} but with attributes 'vmode', 'physical' and 'virtual' accessible via \code{\link{vmode}}, \code{\link[=Extract.ff]{physical}} and \code{\link[=Extract.ff]{virtual}}  \cr
#!   If (\code{FF_RETURN}) an object of class 'ff' which is a a list with two components:
#!   \item{physical}{an external pointer of class '\code{ff_pointer}' which carries attributes with copy by reference semantics: changing a physical attribute of a copy changes the original }
#!   \item{virtual}{an empty list which carries attributes with copy by value semantics: changing a virtual attribute of a copy does not change the original }
#! }
#! \section{Physical object component}{
#!   The '\code{ff_pointer}' carries the following 'physical' or readonly attributes, which are accessible via \code{\link[=Extract.ff]{physical}}:
#!  \tabular{rl}{
#!   \code{vmode    } \tab see \code{\link{vmode}} \cr
#!   \code{maxlength} \tab see \code{\link{maxlength}} \cr
#!   \code{pattern  } \tab see parameter 'pattern' \cr
#!   \code{filename } \tab see \code{\link{filename}} \cr
#!   \code{pagesize } \tab see parameter 'pagesize' \cr
#!   \code{caching  } \tab see parameter 'caching' \cr
#!   \code{finalizer} \tab see parameter 'finalizer' \cr
#!   \code{finonexit} \tab see parameter 'finonexit' \cr
#!   \code{readonly } \tab see \code{\link{is.readonly}} \cr
#!   \code{class    } \tab The external pointer needs class 'ff\_pointer' to allow method dispatch of finalizers  \cr
#!  }
#! }
#! \section{Virtual object component}{
#!   The 'virtual' component carries the following attributes (some of which might be NULL):
#!  \tabular{rl}{
#!   \code{Length    } \tab see \code{\link{length.ff}} \cr
#!   \code{Levels    } \tab see \code{\link{levels.ff}} \cr
#!   \code{Names     } \tab see \code{\link{names.ff}} \cr
#!   \code{VW        } \tab see \code{\link{vw.ff}} \cr
#!   \code{Dim       } \tab see \code{\link{dim.ff}} \cr
#!   \code{Dimorder  } \tab see \code{\link{dimorder}} \cr
#!   \code{Symmetric } \tab see \code{\link{symmetric.ff}} \cr
#!   \code{Fixdiag   } \tab see \code{\link{fixdiag.ff}} \cr
#!   \code{ramclass  } \tab see \code{\link{ramclass}} \cr
#!   \code{ramattribs} \tab see \code{\link{ramattribs}} \cr
#!  }
#! }
#! \section{Class}{
#!   You should not rely on the internal structure of ff objects or their ram versions. Instead use the accessor functions like \code{\link{vmode}}, \code{\link[=Extract.ff]{physical}} and \code{\link[=Extract.ff]{virtual}}.
#!   Still it would be wise to avoid attributes AND classes 'vmode', 'physical' and 'virtual' in any other packages.
#!   Note that the 'ff' object's class attribute also has copy-by-value semantics ('virtual').
#!   For the 'ff' object the following class attritibutes are known:
#!  \tabular{rl}{
#!   vector \tab \code{c("ff_vector","ff")} \cr
#!   matrix \tab \code{c("ff_matrix","ff_array","ff")} \cr
#!   array \tab \code{c("ff_array","ff")} \cr
#!   symmetric matrix \tab \code{c("ff_symm","ff")} \cr
#!   distance matrix \tab \code{c("ff_dist","ff_symm","ff")} \cr
#!   reserved for future use \tab \code{c("ff_mixed","ff")} \cr
#!  }
#! }
#! \section{Methods}{
#!  The following methods and functions are available for ff objects:
#!  \tabular{lrll}{
#!   \emph{ Type} \tab  \emph{ Name }  \tab \emph{ Assign }  \tab \emph{Comment}  \cr
#!   \emph{ } \tab  \emph{ }                                 \tab \emph{ }  \tab \bold{Basic functions}  \cr
#!   function \tab  \code{\link{ff}}                         \tab \emph{ }  \tab constructor for ff and ram objects \cr
#!   generic  \tab  \code{\link[=update.ff]{update}}       \tab \emph{ }  \tab updates one ff object with the content of another \cr
#!   generic  \tab  \code{\link[=clone.ff]{clone}}                      \tab \emph{ }  \tab clones an ff object optionally changing some of its features \cr
#!   method   \tab  \code{\link[=print.ff]{print}}         \tab \emph{ }  \tab print ff \cr
#!   method   \tab  \code{\link[=str.ff]{str}}             \tab \emph{ }  \tab ff object structure \cr
#!   \emph{ } \tab  \emph{ }                                 \tab \emph{ }  \tab \bold{Class test and coercion}  \cr
#!   function \tab  \code{\link{is.ff}}                      \tab \emph{ }  \tab check if inherits from ff \cr
#!   generic  \tab  \code{\link{as.ff}}                      \tab \emph{ }  \tab coerce to ff, if not yet \cr
#!   generic  \tab  \code{\link{as.ram}}                     \tab \emph{ }  \tab coerce to ram retaining some of the ff information \cr
#!   generic  \tab  \code{\link[=as.bit.ff]{as.bit}}          \tab \emph{ }  \tab coerce to \code{\link[bit]{bit}} \cr
#!   \emph{ } \tab  \emph{ }                                 \tab \emph{ }  \tab \bold{Virtual storage mode} \cr
#!   generic  \tab  \code{\link{vmode}}                      \tab \code{<-} \tab get and set virtual mode (setting only for ram, not for ff objects) \cr
#!   generic  \tab  \code{\link{as.vmode}}                   \tab \emph{ }  \tab coerce to vmode (only for ram, not for ff objects) \cr
#!   \emph{ } \tab  \emph{ }                                 \tab \emph{ }  \tab \bold{Physical attributes}  \cr
#!   function \tab  \code{\link[=Extract.ff]{physical}}                   \tab \code{<-} \tab set and get physical attributes \cr
#!   generic  \tab  \code{\link{filename}}                   \tab \emph{<-}  \tab get and set filename \cr
#!   generic  \tab  \code{\link{pattern}}                    \tab \emph{<-}  \tab get pattern and set filename path and prefix via pattern \cr
#!   generic  \tab  \code{\link{maxlength}}                  \tab \emph{ }  \tab get maxlength \cr
#!   generic  \tab  \code{\link[bit:Metadata]{is.sorted}}                  \tab \code{<-} \tab set and get if is marked as sorted \cr
#!   generic  \tab  \code{\link[bit]{na.count}}                   \tab \code{<-} \tab set and get NA count, if set to non-NA only swap methods can change and na.count is maintained automatically \cr
#!   generic  \tab  \code{\link{is.readonly}}                \tab \emph{ }   \tab get if is readonly \cr
#!   \emph{ } \tab  \emph{ }                                 \tab \emph{ }   \tab \bold{Virtual attributes} \cr
#!   function \tab  \code{\link[=Extract.ff]{virtual}}                    \tab \code{<-} \tab set and get virtual attributes \cr
#!   method   \tab  \code{\link[=length.ff]{length}}       \tab \code{<-} \tab set and get length \cr
#!   method   \tab  \code{\link[=dim.ff]{dim}}             \tab \code{<-} \tab set and get dim \cr
#!   generic  \tab  \code{\link{dimorder}}                   \tab \code{<-} \tab set and get the order of dimension interpretation \cr
#!   generic  \tab  \code{\link{vt}}                         \tab \code{}   \tab virtually transpose ff_array \cr
#!   method  \tab   \code{\link[=t.ff]{t}}                 \tab \code{}   \tab create transposed clone of ff_array \cr
#!   generic  \tab  \code{\link{vw}}                         \tab \code{<-} \tab set and get virtual windows \cr
#!   method   \tab  \code{\link[=names.ff]{names}}         \tab \code{<-} \tab set and get names \cr
#!   method   \tab  \code{\link[=dimnames.ff]{dimnames}}   \tab \code{<-} \tab set and get dimnames \cr
#!   generic  \tab  \code{\link{symmetric}}                  \tab \emph{ }   \tab get if is symmetric \cr
#!   generic  \tab  \code{\link{fixdiag}}                    \tab \code{<-} \tab set and get fixed diagonal of symmetric matrix \cr
#!   method   \tab  \code{\link{levels}}                     \tab \code{<-} \tab levels of factor  \cr
#!   generic  \tab  \code{\link{recodeLevels}}               \tab \code{ }  \tab recode a factor to different levels \cr
#!   generic  \tab  \code{\link{sortLevels}}                 \tab \code{ }  \tab sort the levels and recoce a factor \cr
#!   method   \tab  \code{\link{is.factor}}                  \tab \emph{ }  \tab if is factor \cr
#!   method   \tab  \code{\link{is.ordered}}                 \tab \emph{ }  \tab if is ordered (factor) \cr
#!   generic  \tab  \code{\link{ramclass}}                   \tab \code{}   \tab get ramclass \cr
#!   generic  \tab  \code{\link{ramattribs}}                 \tab \code{}   \tab get ramattribs \cr
#!   \emph{ } \tab  \emph{ }                                 \tab \emph{ }  \tab \bold{Access functions}  \cr
#!   function \tab  \code{\link{get.ff}}                     \tab \emph{ }  \tab get single ff element (currently \code{\link[=Extract.ff]{[[}} is a shortcut) \cr
#!   function \tab  \code{\link{set.ff}}                     \tab \emph{ }  \tab set single ff element (currently \code{\link[=Extract.ff]{[[<-}} is a shortcut) \cr
#!   function \tab  \code{\link{getset.ff}}                  \tab \emph{ }  \tab set single ff element and get old value in one access operation \cr
#!   function \tab  \code{\link{read.ff}}                    \tab \emph{ }  \tab get vector of contiguous elements \cr
#!   function \tab  \code{\link{write.ff}}                   \tab \emph{ }  \tab set vector of contiguous elements  \cr
#!   function \tab  \code{\link{readwrite.ff}}               \tab \emph{ }  \tab set vector of contiguous elements and get old values in one access operation \cr
#!   method   \tab  \code{\link[=Extract.ff]{[}}                 \tab \emph{ }  \tab get vector of indexed elements, uses HIP, see \code{\link{hi}} \cr
#!   method   \tab  \code{\link[=Extract.ff]{[<-}}             \tab \emph{ }  \tab set vector of indexed elements, uses HIP, see \code{\link{hi}} \cr
#!   generic  \tab  \code{\link[=swap.ff]{swap}}           \tab \emph{ }  \tab set vector of indexed elements and get old values in one access operation \cr
#!   generic  \tab  \code{\link[=add.ff]{add}}             \tab \emph{ }  \tab (almost) unifies '+=' operation for ff and ram objects \cr
#!   generic  \tab  \code{\link[=bigsample.ff]{bigsample}} \tab \emph{ }  \tab sample from ff object \cr
#!   \emph{ } \tab  \emph{ }                                 \tab \emph{ }  \tab \bold{Opening/Closing/Deleting}                                             \cr
#!   generic  \tab  \code{\link{is.open}}                    \tab \emph{ }  \tab check if ff is open \cr
#!   method   \tab  \code{\link[=open.ff]{open}}           \tab \emph{ }  \tab open ff object (is done automatically on access) \cr
#!   method   \tab  \code{\link[=close.ff]{close}}         \tab \emph{ }  \tab close ff object (releases C++ memory and protects against file deletion if  \code{\link{deleteIfOpen}}) finalizer is used \cr
#!   generic  \tab  \code{\link[=delete.ff]{delete}}       \tab \emph{ }  \tab deletes ff file (unconditionally) \cr
#!   generic  \tab  \code{\link{deleteIfOpen}}               \tab \emph{ }  \tab deletes ff file if ff object is open (finalization method) \cr
#!   generic  \tab  \code{\link{finalizer}}                  \tab \emph{<-} \tab get and set finalizer \cr
#!   generic  \tab  \code{\link{finalize}}                   \tab \emph{}   \tab force finalization \cr
#!   \emph{ } \tab  \emph{ }                                 \tab \emph{ }  \tab \bold{Other}                                                     \cr
#!   function \tab  \code{\link{geterror.ff}}                \tab \emph{ }  \tab get error code \cr
#!   function \tab  \code{\link{geterrstr.ff}}               \tab \emph{ }  \tab get error message \cr
#!   }
#! }
#! \section{ff options}{
#!   Through \code{\link{options}} or \code{\link{getOption}} one can change and query global features of the ff package:
#!  \tabular{rll}{
#!   \emph{option}        \tab \emph{description}                           \tab \emph{default} \cr
#!   \code{fftempdir}     \tab default directory for creating ff files      \tab \code{\link{tempdir}} \cr
#!   \code{fffinalizer}   \tab name of default finalizer                    \tab \code{\link{deleteIfOpen}} \cr
#!   \code{fffinonexit}   \tab default for invoking finalizer on exit of R  \tab \code{TRUE} \cr
#!   \code{ffpagesize}    \tab default pagesize                             \tab \code{\link{getdefaultpagesize}} \cr
#!   \code{ffcaching}     \tab caching scheme for the C++ backend           \tab \code{'mmnoflush'} \cr
#!   \code{ffdrop}        \tab default for the \option{drop} parameter in the ff subscript methods  \tab TRUE \cr
#!   \code{ffbatchbytes}  \tab default for the byte limit in batched/chunked processing             \tab 16MB \cr
#!  }
#! }
#! \section{OS specific}{
#!  The following table gives an overview of file size limits for common file systems (see \url{https://en.wikipedia.org/wiki/Comparison_of_file_systems} for further details):
#!  \tabular{ll}{
#!   \strong{File System} \tab \strong{File size limit} \cr
#!   FAT16              \tab 2GB \cr
#!   FAT32              \tab 4GB \cr
#!   NTFS               \tab 16GB \cr
#!   ext2/3/4           \tab 16GB to 2TB \cr
#!   ReiserFS           \tab 4GB (up to version 3.4) / 8TB (from version 3.5) \cr
#!   XFS                \tab 8EB \cr
#!   JFS                \tab 4PB \cr
#!   HFS                \tab 2GB \cr
#!   HFS Plus           \tab 16GB \cr
#!   USF1               \tab 4GB to 256TB \cr
#!   USF2               \tab 512GB to 32PB \cr
#!   UDF                \tab 16EB \cr
#!   }
#! }
#! \section{Credits}{
#!  Package Version 1.0
#!  \tabular{ll}{
#!   Daniel Adler       \tab \email{dadler@uni-goettingen.de} \cr
#!                      \tab R package design, C++ generic file vectors, Memory-Mapping, 64-bit Multi-Indexing adapter and Documentation, Platform ports \cr
#!   Oleg Nenadic       \tab \email{onenadi@uni-goettingen.de} \cr
#!                      \tab Index sequence packing, Documentation \cr
#!   Walter Zucchini    \tab \email{wzucchi@uni-goettingen.de} \cr
#!                      \tab Array Indexing, Sampling, Documentation \cr
#!   Christian Gläser   \tab \email{christian_glaeser@gmx.de} \cr
#!                      \tab Wrapper for biglm package \cr
#!   }
#!  Package Version 2.0
#!  \tabular{ll}{
#!   Jens Oehlschlägel  \tab \email{Jens.Oehlschlaegel@truecluster.com} \cr
#!                      \tab R package redesign; Hybrid Index Preprocessing; transparent object creation and finalization; vmode design; virtualization and hybrid copying; arrays with dimorder and bydim; symmetric matrices; factors and POSIXct; virtual windows and transpose; new generics update, clone, swap, add, as.ff and as.ram; ffapply and collapsing functions. R-coding, C-coding and Rd-documentation. \cr
#!   Daniel Adler       \tab \email{dadler@uni-goettingen.de} \cr
#!                      \tab C++ generic file vectors, vmode implementation and low-level bit-packing/unpacking, arithmetic operations and NA handling, Memory-Mapping and backend caching. C++ coding and platform ports. R-code extensions for opening existing flat files readonly and shared. \cr
#!   }
#! }
#! \note{ Note that the standard finalizers are generic functions, their dispatch to the '\code{ff_pointer}' method happens at finalization time, their 'ff' methods exist for direct calling.
#! }
#! \section{Licence}{Package under GPL-2, included C++ code released by Daniel Adler under the less restrictive ISCL}
#! \seealso{ \code{\link{vector}}, \code{\link{matrix}}, \code{\link{array}}, \code{\link{as.ff}}, \code{\link{as.ram}} }
#! \examples{
#!   message("make sure you understand the following ff options 
#!     before you start using the ff package!!")
#!   oldoptions <- options(fffinalizer="deleteIfOpen", fffinonexit="TRUE", fftempdir=tempdir())
#!   message("an integer vector")
#!   ff(1:12)                  
#!   message("a double vector of length 12")
#!   ff(0, 12)
#!   message("a 2-bit logical vector of length 12 (vmode='boolean' has 1 bit)")
#!   ff(vmode="logical", length=12)
#!   message("an integer matrix 3x4 (standard colwise physical layout)")
#!   ff(1:12, dim=c(3,4))
#!   message("an integer matrix 3x4 (rowwise physical layout, but filled in standard colwise order)")
#!   ff(1:12, dim=c(3,4), dimorder=c(2,1))
#!   message("an integer matrix 3x4 (standard colwise physical layout, but filled in rowwise order
#! aka matrix(, byrow=TRUE))")
#!   ff(1:12, dim=c(3,4), bydim=c(2,1))
#!   gc()
#!   options(oldoptions)
#!
#!   if (ffxtensions()){
#!      message("a 26-dimensional boolean array using 1-bit representation
#!       (file size 8 MB compared to 256 MB int in ram)")
#!      a <- ff(vmode="boolean", dim=rep(2, 26))
#!      dimnames(a) <- dummy.dimnames(a)
#!      rm(a); gc()
#!   }
#!
#!   \dontrun{
#!
#!      message("This 2GB biglm example can take long, you might want to change
#!        the size in order to define a size appropriate for your computer")
#!      require(biglm)
#!
#!      b <- 1000
#!      n <- 100000
#!      k <- 3
#!      memory.size(max = TRUE)
#!      system.time(
#!      x <- ff(vmode="double", dim=c(b*n,k), dimnames=list(NULL, LETTERS[1:k]))
#!      )
#!      memory.size(max = TRUE)
#!      system.time(
#!      ffrowapply({
#!         l <- i2 - i1 + 1
#!         z <- rnorm(l)
#!         x[i1:i2,] <- z + matrix(rnorm(l*k), l, k)
#!      }, X=x, VERBOSE=TRUE, BATCHSIZE=n)
#!      )
#!      memory.size(max = TRUE)
#!
#!      form <- A ~ B + C
#!      first <- TRUE
#!      system.time(
#!      ffrowapply({
#!         if (first){
#!           first <- FALSE
#!           fit <- biglm(form, as.data.frame(x[i1:i2,,drop=FALSE], stringsAsFactors = TRUE))
#!         }else
#!           fit <- update(fit, as.data.frame(x[i1:i2,,drop=FALSE], stringsAsFactors = TRUE))
#!      }, X=x, VERBOSE=TRUE, BATCHSIZE=n)
#!      )
#!      memory.size(max = TRUE)
#!      first
#!      fit
#!      summary(fit)
#!      rm(x); gc()
#!   }
#! }
#! \keyword{ IO }
#! \keyword{ array }
#! \keyword{ attribute }
#! \keyword{ classes }
#! \keyword{ package }


ff <- function(
  initdata    = NULL
, length      = NULL
, levels      = NULL
, ordered     = NULL
, dim         = NULL
, dimorder    = NULL    # this is the (transparent) storage layout
, bydim       = NULL    # this is the dimorder used to read in the initdata (e.g. use 2:1 to mimic matrix(,byrow=TRUE)) (passed to update)
, symmetric   = FALSE   # allows to define a matrix as symmetric: in this case we need all(diff(dim)==0) and we assume that all subscript-combinations are equal (storage mapped to sorted subscripts, e.g. distance matrices), no dimorder allowed
, fixdiag     = NULL    # symmetric matrix only: value for fixdiag if diagonal is redundant (e.g. fixdiag=0 for dist)
, names       = NULL    # not taken fom initdata
, dimnames    = NULL    # not taken fom initdata
, ramclass    = NULL
, ramattribs  = NULL
, vmode       = NULL    # be default we get the vmode from initdata
, update      = NULL    # set to FALSE to suppress upating ff object with initdata
, pattern     = NULL
, filename    = NULL
, overwrite   = FALSE
, readonly    = FALSE
, pagesize    = NULL    # getOption("ffpagesize")
, caching     = NULL    # getOption("ffcaching")
, finalizer   = NULL    # "delete" for tempfiles and "close" for named files
, finonexit   = NULL    # getOption("fffinonexit")
#, hideclass = FALSE   # experimental, please ignore (would be needed to bypass <-.ff)
, FF_RETURN   = TRUE
, BATCHSIZE   = .Machine$integer.max       # optional batch size restriction in cases (limiting is useful if pulling values from function accesses raw data with more columns that k)
, BATCHBYTES  = getOption("ffbatchbytes")  # batch size restriction in bytes (needed for update() if length(initdata)>1)
, VERBOSE     = FALSE
)
{
  if (!ffsymmxtensions()){
    symm <- function(...).NotYetImplemented()
    if (symmetric){
      message("You are requesting a dual-licence feature that currently is only available to parties who support the development of package ff and friends")
      .NotYetUsed("symmetric", error = TRUE)
    }
    if (!is.null(fixdiag)){
      message("You are requesting a dual-licence feature that currently is only available to parties who support the development of package ff and friends")
      .NotYetUsed("fixdiag", error = TRUE)
    }
  }

  if (!is.logical(FF_RETURN) || length(FF_RETURN)!=1)
      stop("in ff() FF_RETURN must be a logical scalar")

  if (is.null(symmetric)||!is.logical(symmetric)||length(symmetric)!=1)
    stop("symmetric must be FALSE or TRUE")

  if (length(initdata) && !is.atomic(initdata[1]))
    stop("initdata[1] must be atomic")
  
  # determine filename and finalizer
  if (is.null(filename)){
    if (is.null(pattern))
      pattern <- "ff"
    filename <- fftempfile(pattern)
  }else{
    if (is.null(pattern))
      pattern <- file.path(splitPathFile(filename)$path, "ff")
    # fix problem in file.path
    pattern <- gsub("/+","/",pattern)
  }
  # gurantee absolute path
  dfile <- dirname(filename)
  bfile <- basename(filename)
  cwd <- getwd()
  on.exit(setwd(cwd))
  setwd(dfile)
  dfile <- getwd()
  filename <- file.path(dfile, bfile)
  # fix problem in file.path
  filename <- gsub("/+","/",filename)
  
  if (is.null(finalizer)){
    finalizer <- getOption("fffinalizer")
    if (is.null(finalizer)){
      if (dfile==getOption("fftempdir"))
        finalizer <- "delete"   # temporary ff object
      else
        finalizer <- "close"    # persistent ff object
    }
  }else{
    finalizer <- match.arg(finalizer, choices=c("deleteIfOpen", "delete", "close"))
  }
  
  if (is.null(pagesize))
    pagesize <- getOption("ffpagesize")
  else
    pagesize <- getalignedpagesize(pagesize)
  if (is.null(caching))
    caching <- getOption("ffcaching")
  else
    caching <- match.arg(caching, caching_schemes)
  if (is.null(finonexit))
    finonexit <- getOption("fffinonexit")
  
  
  if (is.null(vmode)){
    if (vmode(initdata)=="NULL") {
      # stop("need vmode or initdata")  # wish of Martijn Schuemie
      vmode <- "logical"                # wish of Martijn Schuemie
    } else { 
      vmode <- vmode(initdata)
    }
  }else{
    if (is.na(match(vmode, c("boolean", "logical", "quad", "nibble", "byte", "ubyte", "short", "ushort", "integer", "single", "double", "raw")))){
     stop("vmode '", vmode,"' not implemented")
    }
  }
  
  # handle levels
  if (is.null(levels))
    levels <- levels(initdata)
  if (!is.null(levels)){
    if (vmode=="character")
      vmode <- "integer"
    if (.rammode[vmode] != "integer")
      stop("factors must be .rammode integer")
    if (length(levels)>.vmax[vmode]+.vunsigned[vmode])
      stop("vmode '", vmode, "' can carry max ", .vmax[vmode]+.vunsigned[vmode], " levels")
    if (is.null(ramclass)){
      if (is.null(ordered))
        ordered <- is.ordered(initdata)
      if (ordered)
        ramclass <- c("ordered","factor")
      else
        ramclass <- "factor"
    }
  }
  
  # handle initdata
  if (!is.null(initdata)){
    if (is.null(ramclass))
      ramclass <- ramclass(initdata)
    if (is.null(ramattribs)){
      ramattribs <- ramattribs(initdata)
    }
  }

  # handle file reuse and set maxlength and createNew (FALSE=ram object or reuse of ff)
  if (FF_RETURN){
    if ( file.exists(filename) && !overwrite){
        createNew <- FALSE
        if (file.access(filename,4) == -1){ # no read access
          stop("read permission denied for file")
        }
        if (file.access(filename,2) == -1){ # no write access
          if (!readonly) {
            readonly <- TRUE
            warning("force read-only access on file")
          }
        }
        filesize <- file.info(filename)$size
        if (is.na(filesize))
          stop("unable to open file")
        if (!is.null(initdata)) {
          stop("bad argument initdata for existing file; initializing existing file is invalid")
        }
        fillength <- as.integer( floor( filesize / .rambytes[vmode] ) * .rambytes[vmode] / .ffbytes[vmode] )
        if (fillength>.Machine$integer.max){
          warning("limiting file to .Machine$integer.max elements at this vmode")
          maxlength <- as.integer( floor(.Machine$integer.max * .ffbytes[vmode] / .rambytes[vmode]) * .rambytes[vmode] /.ffbytes[vmode] )
        }else
          maxlength <- fillength
    }else{
      createNew <- TRUE
      if (readonly)
        stop("allocation of a new 'readonly' flat file vector not supported")
      # maxlength is set later when we know length
    }
  }else{
    createNew <- FALSE
  }

  # handle or derive length from dim or initdata
  if (is.null(dim)){ # ff_vector
    if (is.null(length)){
      if (!createNew){
        length <- maxlength
      }else{
        # wish of Martijn Schuemie:   stop("need length or initdata")
        length <- length(initdata)  # wish of Martijn Schuemie:
      }
    }else{
      length <- as.integer(length)
    }
    if(!is.null(dimorder))
      stop("dimorder must be null with vectors")
    if (symmetric)
      stop("symmetric only allowed with matrices")
    if (!is.null(fixdiag))
      stop("fixed-diagonal only allowed with symmetric matrices")
    ffclass <- c("ff_vector","ff")
  }else{ # ff_array || ff_symm
    if (!length(dim))
      stop("'dims' cannot be of length 0")
    dim <- as.integer(dim)
    ndim <- length(dim)
    if (is.null(dimorder))
      dimorder <- seq(length.out=ndim)
    else{
      dimorder <- as.integer(dimorder)
      if (!identical(sort(dimorder), seq_len(ndim)))
        stop("dimorder does not match dimension")
    }
    if (symmetric){ # ff_symm
      if (ndim!=2 || dim[1]!=dim[2])
        stop("symmetric matrices require parameter dim with 2 equal values")
      if (!dimorderStandard(dimorder))
        stop("non-standard dimorder not allowed for symmetric matrices")
      if (!is.null(bydim))
        stop("bydim not allowed for symmetric matrices")
      if (is.null(fixdiag))
        length <- dim[1]*(dim[1]-1L)/2L + dim[1]
      else
        length <- dim[1]*(dim[1]-1L)/2L
      ffclass <- c("ff_symm", "ff")
    }else{
      if(!is.null(fixdiag))
        stop("fixed-diagonal only allowed with symmetric matrices")
      if (!is.null(bydim)){
        bydim <- as.integer(bydim)
        if (!identical(sort(bydim), seq_len(ndim)))
          stop("bydim does not match dimension")
      }
      n <- as.integer(prod(dim))
      if(!is.null(length) && length!=n)
        stop("dim and length don't match")
      else
        length <- n
      if (ndim==2)
        ffclass <- c("ff_matrix", "ff_array","ff")
      else
        ffclass <- c("ff_array","ff")
    }
  }
  if (length<0 || length>.Machine$integer.max)
    stop("length must be between 0 and .Machine$integer.max")  # Martijn Schuemie for zero row ff
  
  if (createNew){
    if (.ffbytes[vmode] < 1)
      maxlength <- as.integer( ceiling( length * .ffbytes[vmode] / .rambytes[vmode] ) * .rambytes[vmode] / .ffbytes[vmode] )
    else
      maxlength <- as.integer(length)
  }
  
  pattr <- list(  # physical attributes
    vmode     = vmode
  , maxlength = maxlength
  , pattern   = pattern
  , filename  = filename
  , pagesize  = pagesize
  , finalizer = finalizer
  , finonexit = finonexit
  , readonly  = readonly 
  , caching   = caching
  , class     = "ff_pointer"  # class of pointer within ff class needed for finalizer dispatch
  )

  vattr <- list(
    Length      = length      # the current length we use from R
  , Dim         = dim
  , Dimorder    = dimorder
  , Symmetric   = symmetric
  , Fixdiag     = fixdiag
  , Levels      = levels
  , ramclass    = ramclass
  , ramattribs  = ramattribs
  )

  if (!FF_RETURN){
    if (length*.rambytes[vmode] > getOption("ffbatchbytes"))
      warning("creating large ram object with ", length*.rambytes[vmode], " > ", getOption("ffbatchbytes"))
    if (is.null(initdata)){
      initdata <- vector.vmode(vmode,0)  # Martijn Schuemie for zero row ff
    }else{
      if (is.null(levels))
        initdata <- as.vmode(initdata[], vmode) # NOTE that [] takes care of dimorder, [i1:i2] would not
      else
        initdata <- as.vmode(ram2ramcode(initdata[], levels), vmode) # NOTE that [] takes care of dimorder, [i1:i2] would not
    }
    ret <- switch(ffclass[1]
    , ff_symm   = { if (!is.null(levels)) stop("symm currently not defined for factors"); symm(initdata, dim=dim, dimnames = dimnames, fixdiag = fixdiag)}
    , ff_vector = {temp <- rep(initdata, length.out=length); names(temp)<-names; temp}
    , ff_matrix = {temp <- vector2array(initdata, dim=dim, dimorder=bydim); dimnames(temp)<- dimnames;temp}
    , ff_array  = {temp <- vector2array(initdata, dim=dim, dimorder=bydim); dimnames(temp)<- dimnames;temp}
    )
    if (!is.null(levels)){
      attr(ret, "levels") <- levels
    }
    if (!is.null(ramattribs))
      attributes(ret) <- c(attributes(ret), ramattribs)
    physical(ret) <- pattr[!is.na(match(names(pattr), ramphysical_includes))]
     virtual(ret) <- vattr[!is.na(match(names(vattr), ramvirtual_includes ))]
    attr(ret, "vmode") <- vmode
    if (!is.null(ramclass))
      class(ret) <- ramclass
    return(ret)
  }

  # stopifnot( file.access(filename,0) || (!file.access(filename,2) && overwrite ) )  # file must not exist OR be writable and overwrite
  # create fast file and return external pointer
  initval <- if (is.null(levels)) {
    if (length(initdata))
      as.vmode(initdata[1], vmode)
    else
      vector.vmode(vmode, 0)  # Martijn Schuemie for zero row ff
  } else {
    if (length(initdata)){
      if (.vunsigned[vmode])
        as.vmode(match(initdata[1], levels), vmode) - 1L
      else
        as.vmode(match(initdata[1], levels), vmode)
    }else{
      if (.vunsigned[vmode])
        0L
      else
        1L
    }
  }
  ffpointer <- .Call(C_new
  , as.character(filename)
  , .ffmode[vmode]
  , initval
  , maxlength
  , pagesize    # already integer
  , readonly
  , caching == "mmeachflush"
  , createNew  # Martijn Schuemie for zero row f: as.integer(maxlength == 0)
  , PACKAGE="ff"
  )

  #if (hideclass){
  #  a[[".class"]] <- a[["class"]]
  #  a[["class"]] <- NULL
  #}
  attributes(ffpointer) <- pattr  # unusual copying semantics: changing an attribute of a 'copy' of an ff object changes the attributes of ALL copies
  reg.finalizer(ffpointer, finalize.ff_pointer, onexit=finonexit)  # for details see ?finalize
  v <- list()
  attributes(v) <- vattr
  ret <- list()
  attributes(ret) <- list(physical=ffpointer, virtual=v, class=ffclass)
  # now this is a legal ff object

  # complete initialization
  if ( length(ret) && length(initdata) && (is.null(update) || update) ){
    if ( (!identical(as.vector(initdata[1]), as.vector(ret[1])) || length(initdata)>1) )
      ret <- update.ff(ret
      , from    = initdata
      , delete  = FALSE
      , bydim   = bydim
      , BATCHSIZE   = BATCHSIZE
      , BATCHBYTES  = BATCHBYTES
      , VERBOSE     = VERBOSE
      )
  }else{
    # note that at this point new ff objects are initialized with 0
    # thus unsigned factor are - without writing - initialized at their first level
    # signed factor need to be initialized with NA in order not to be in an illegal state
    if (!is.null(levels) && !.vunsigned[vmode])
      ret[] <- NA
  }
  if (is.null(dim)){
    if (!is.null(names)) # !hideclass &&
      names(ret) <- names
  }else{
    if (!is.null(dimnames)) # !hideclass &&
      dimnames(ret) <- dimnames
  }
  return(ret)
}



#! \name{update.ff}
#! \alias{update.ff}
#! \alias{update.ffdf}
#! \title{ Update ff content from another object }
#! \description{
#!   \command{update} copies updates one ff object with the content of another object.
#! }
#! \usage{
#! \method{update}{ff}(object, from, delete = FALSE, bydim = NULL, fromdim = NULL
#! , BATCHSIZE = .Machine$integer.max, BATCHBYTES = getOption("ffbatchbytes")
#! , VERBOSE = FALSE, \dots)
#! \method{update}{ffdf}(object, from, \dots)
#! }
#! \arguments{
#!   \item{object}{ an ff object to which to update }
#!   \item{from}{ an object from which to uodate  }
#!   \item{delete}{ NA for quick update with file-exchange, TRUE for quick update with deleting the 'from' object after the update, can speed up updating significantly }
#!   \item{bydim}{ how to interpret the content of the object, see \code{\link{ff}} }
#!   \item{fromdim}{ how to interpret the content of the 'from' object, see \code{\link{ff}} }
#!   \item{BATCHSIZE}{ \code{BATCHSIZE} }
#!   \item{BATCHBYTES}{ \code{BATCHBYTES} }
#!   \item{VERBOSE}{ \code{VERBOSE} }
#!   \item{\dots}{ further arguments }
#! }
#! \details{
#!   If the source object \code{is.ff} and not \code{delete=FALSE} then instead of slow copying we - if possible - try to swap and rename the files behind the ff objects.
#!   Quick update requires that the two ff objects are \code{\link{vectorCompatible}},
#!   that both don't use \code{\link{vw}},
#!   that they have identical \code{\link{maxlength}}
#!   and identical \code{\link{levels.ff}}.
#! }
#! \note{
#!   You don't have a guarantee that with \code{delete=TRUE} the 'from' object gets deleted or with \code{delete=NA} the 'from' objects carries the content of 'object'.
#!   Such expectations only turn true if really a quick update was possible.
#! }
#! \value{
#!   An ff object like the input 'object' updated with the content of the 'from' object.
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{ff}}, \code{\link[=clone.ff]{clone}}, \code{\link{ffvecapply}}, \code{\link{vectorCompatible}}, \code{\link{filename}} }
#! \examples{
#!   x <- ff(1:100)
#!   y <- ff(-(1:100))
#!   message("You should make it a habit to re-assign the return value 
#! of update although this is not needed currently.")
#!   x <- update(x, from=y)
#!   x
#!   y
#!   x[] <- 1:100
#!   x <- update(x, from=y, delete=NA)
#!   x
#!   y
#!   x <- update(x, from=y, delete=TRUE)
#!   x
#!   y
#!   x
#!   rm(x,y); gc()
#!
#!   \dontrun{
#!     message("timings")
#!     x <- ff(1:10000000)
#!     y <- ff(-(1:10000000))
#!     system.time(update(x, from=y))
#!     system.time(update(y, from=x, delete=NA))
#!     system.time(update(x, from=y, delete=TRUE))
#!     rm(x,y); gc()
#!   }
#!
#! }
#! \keyword{ IO }
#! \keyword{ data }


# update 'x' with content of 'from' using recycling
# if delete=TRUE and from is.ff of same maxlength then we do fast update by renaming behind the scences
# the latter is especially useful when 'transferring' data from temporary ff to target ff, see sort.ff



update.ff <- function(
  object
, from
, delete        = FALSE
, bydim         = NULL
, fromdim       = NULL
, BATCHSIZE     = .Machine$integer.max       # optional batch size restriction in cases (limiting is useful if pulling values from function accesses raw data with more columns that k)
, BATCHBYTES    = getOption("ffbatchbytes")  # batch size restriction in bytes
, VERBOSE       = FALSE
, ...
)
{
  if (!length(object))
    return(object)
  dto <- dim(object)
  dfrom <- dim(from)
  ndto <- length(dto)
  ndfrom <- length(dfrom)
  if (is.null(dfrom) && !is.null(fromdim))
    stop("from has no dimension for fromdim")
  if (is.null(dto) && !is.null(bydim))
    stop("from has no dimension for fromdim")
  if (inherits(from, "ff")){
    doto <- dimorder(object)
    dofrom <- dimorder(from)
    #dotodev <- !is.null(doto) && !identical(doto, sort(doto))
    #dofromdev <- !is.null(dofrom) && !identical(dofrom, sort(dofrom))
    #dodev <- (dotodev || dofromdev) && !identical(doto, dofrom)
    #dovector <- is.null(bydim) && is.null(fromdim) && !dodev
    if ( vmode(object)==vmode(from)
    && vectorCompatible(dim=dto, dim2=dfrom, dimorder=doto, dimorder2=dofrom, bydim=bydim, bydim2=fromdim)
    && is.null(vw(from)) && is.null(vw(object))
    && (is.na(delete) || delete)
    && length(object)==length(from)
    && maxlength(object)==maxlength(from)
    && identical(levels(from), levels(object))
    ){
      # fast update by file rename
      if (is.open(object)){
        reopento <- TRUE
        close(object)
      }else
        reopento <- FALSE
      if (is.open(from)){
        if (is.na(delete))
          reopenfrom <- TRUE
        else
          reopenfrom <- FALSE
        close(from)
      }else
        reopenfrom <- FALSE

      if (is.na(delete)){
        # do exchange
        tmpfilename <- fftempfile("update")
        objfilename <- filename(object)
        fromfilename <- filename(from)
        if(!file.move(objfilename, tmpfilename))
          stop("renaming object file '", objfilename, "' to temp file '", tmpfilename, "' failed in update(...,delete=NA)")
        if(!file.move(fromfilename, objfilename))
          stop("renaming from file '", fromfilename, "' to object file '", objfilename, "' failed in update(...,delete=NA)")
        if(!file.move(tmpfilename, fromfilename))
          stop("renaming temp file '", tmpfilename, "' to from file '", fromfilename, "' failed in update(...,delete=NA)")
      }else{
        # do plug in and delete
        oldnam <- filename(object)
        if(!file.remove(oldnam))
          stop("removing from file '", filename(from), "' failed in update(..., delete=TRUE)")
        if(!file.move(filename(from), oldnam))
          stop("renaming from file '", filename(from), "' to '", oldnam, "' failed in update(...,delete=TRUE)")
      }

      if (reopento)
        open(object)
      if (reopenfrom)
        open(from)
    }else{
      if (is.open(object)){
        recloseto <- FALSE
      }else{
        recloseto <- TRUE
        open(object)
      }
      if (is.open(from)){
        reclosefrom <- FALSE
      }else{
        reclosefrom <- TRUE
        open(from)
      }
      if (ndto && ndto==ndfrom && all(dto==dfrom)){
        if (prod(dto)){
        ndim <- ndto
        args <- rep(alist(a = ), ndim)
        argsfrom <- c(args, alist(bydim=fromdim))
        argsto <- c(args, alist(bydim=bydim))
        ret <- ffapply(
          #, 
          EXPR = {
              argsfrom[seq_len(ndim)] <- argsto[seq_len(ndim)] <- lapply(seq_len(ndim), function(i)substitute(b1:b2, list(b1=i1[i], b2=i2[i])))  # substitute avoids unpacking the sequences !!
            temp <- do.call("[", c(list(from), argsfrom, list(drop = FALSE)))
            object <- do.call("[<-", c(list(object), argsto, list(value=temp)))
          }
        , X = object
            , MARGIN  = seq_len(ndim)
        , BATCHSIZE   = BATCHSIZE
        , BATCHBYTES  = BATCHBYTES
        , VERBOSE     = VERBOSE
        )
        }
      }else{
        nto <- length(object)
        nfrom <- length(from)
        if (nto && nfrom){
        if (nto%%nfrom)
          warning("length(object) not a multiple of length(from) in update.ff(object, from, ...)")

        i1 <- i2 <- 0L  # dummy assignment to shut up R CMD CHECK NOTE about no visible binding
        if (nfrom<nto){
          ffvecapply(
            EXPR = object[i1:i2] <- repfromto(from, i1, i2)
          , X = object
          , BATCHSIZE   = BATCHSIZE
          , BATCHBYTES  = BATCHBYTES
          , VERBOSE     = VERBOSE
          )
        }else{
          ffvecapply(
            EXPR = object[i1:i2] <- from[i1:i2]
          , X = object
          , BATCHSIZE   = BATCHSIZE
          , BATCHBYTES  = BATCHBYTES
          , VERBOSE     = VERBOSE
          )
        }
      }

      }
      if (reclosefrom)
        close(from)
      if (recloseto)
        close(object)
    }
  }else{
    # if is.ram(from) we assign all in one chunk and let [<-.ff recycle
    # dimorder(from) is always standard
    # dimorder(to) and bydim is handled by [<-.ff
    # xx we only need to care about fromdim
    if (!is.null(fromdim)){
      fromLevels <- levels(from)
      if (is.null(fromLevels)){
        from <- array2vector(from, dim=dfrom, dimorder=fromdim)
      }else{
        fromramclass <- ramclass(from)
        from <- array2vector(from, dim=dfrom, dimorder=fromdim)
        attr(from, "levels") <- fromLevels
        class(from) <- fromramclass
      }
    }
    if (is.open(object)){
      recloseto <- FALSE
    }else{
      recloseto <- TRUE
      open(object)
    }
    if (is.null(bydim))
      object[] <- from
    else
      object[,bydim=bydim] <- from
    if (recloseto)
      close(object)
  } # end is.ram(from)
  object
}


#! \name{clone.ff}
#! \alias{clone.ff}
#! \title{ Cloning ff and ram objects }
#! \description{
#!   \command{clone} physically duplicates ff (and ram) objects and can additionally change some features, e.g. length.
#! }
#! \usage{
#! \method{clone}{ff}(x
#! , initdata = x
#! , length = NULL
#! , levels = NULL
#! , ordered = NULL
#! , dim = NULL
#! , dimorder = NULL
#! , bydim = NULL
#! , symmetric = NULL
#! , fixdiag = NULL
#! , names = NULL
#! , dimnames = NULL
#! , ramclass = NULL
#! , ramattribs = NULL
#! , vmode = NULL
#! , update  = NULL
#! , pattern = NULL
#! , filename = NULL
#! , overwrite = FALSE
#! , pagesize = NULL
#! , caching = NULL
#! , finalizer = NULL
#! , finonexit = NULL
#! , FF_RETURN = NULL
#! , BATCHSIZE = .Machine$integer.max
#! , BATCHBYTES = getOption("ffbatchbytes")
#! , VERBOSE = FALSE
#! , \dots)
#! }
#! \arguments{
#!   \item{x}{ \code{x} }
#!   \item{initdata}{ scalar or vector of the \code{\link{.vimplemented}} \code{\link{vmode}}s, recycled if needed, default 0, see also \code{\link{as.vmode}} and \code{\link{vector.vmode}} }
#!   \item{length}{ optional vector \code{\link{length}} of the object (default: derive from 'initdata' or 'dim'), see \code{\link{length.ff}} }
#!   \item{levels}{ optional character vector of levels if (in this case initdata must be composed of these) (default: derive from initdata) }
#!   \item{ordered}{ indicate whether the levels are ordered (TRUE) or non-ordered factor (FALSE, default) }
#!   \item{dim}{ optional array \code{\link{dim}}, see \code{\link{dim.ff}} and \code{\link{array}} }
#!   \item{dimorder}{ physical layout (default seq_along(dim)), see \code{\link{dimorder}} and \code{\link{aperm}} }
#!   \item{bydim}{ dimorder by which to interpret the 'initdata', generalization of the 'byrow' paramter in \code{\link{matrix}} }
#!   \item{symmetric}{ extended feature: TRUE creates symmetric matrix (default FALSE) %, see \code{\link{symm}}, \code{\link{ff_symm}}, \code{\link{ff_dist}}
#!        }
#!   \item{fixdiag}{ extended feature: non-NULL scalar requires fixed diagonal for symmetric matrix (default NULL is free diagonal) }
#!   \item{names}{ see \code{\link{names}} }
#!   \item{dimnames}{ NOT taken from initdata, see \code{\link{dimnames}} }
#!   \item{ramclass}{ class attribute attached when moving all or parts of this ff into ram, see \code{\link{ramclass}} }
#!   \item{ramattribs}{ additional attributes attached when moving all or parts of this ff into ram, see \code{\link{ramattribs}} }
#!   \item{vmode}{ virtual storage mode (default: derive from 'initdata'), see \code{\link{vmode}} and \code{\link{as.vmode}} }
#!   \item{update}{ set to FALSE to avoid updating with 'initdata' (default TRUE) (used by \code{\link{ffdf}}) }
#!   \item{pattern}{ root pattern for automatic ff filename creation (default "ff"), see also \code{\link[=Extract.ff]{physical}} }
#!   \item{filename}{ ff \code{\link{filename}} (default tmpfile with 'pattern' prefix), see also \code{\link[=Extract.ff]{physical}} }
#!   \item{overwrite}{ set to TRUE to allow overwriting existing files (default FALSE) }
#!   \item{pagesize}{ pagesize in bytes for the memory mapping (default from getOptions("ffpagesize") initialized by \code{\link{getdefaultpagesize}}), see also \code{\link[=Extract.ff]{physical}} }
#!   \item{caching}{ caching scheme for the backend, currently 'mmnoflush' or 'mmeachflush' (flush mmpages at each swap, default from getOptions("ffcaching") initialized with 'memorymap'), see also \code{\link[=Extract.ff]{physical}} }
#!   \item{finalizer}{ name of finalizer function called when ff object is \code{\link{remove}d}, (default "deleteIfOpen" from getOptions("fffinalizer"))), standard finalizers are \code{\link{close.ff}}, \code{\link{delete.ff}} and \code{\link{deleteIfOpen.ff}}, see also \code{\link{reg.finalizer}} }
#!   \item{finonexit}{ logical scalar determining whether finalizer is also called when R is closed via \code{\link{q}}, (default TRUE from getOptions("fffinonexit")) }
#!   \item{FF_RETURN}{ logical scalar or ff object to be used. The default NULL creates a ff or ram clone, TRUE returns a ff clone, FALSE returns a ram clone. Handing over an ff object here uses this or stops if not \code{\link{ffsuitable}} }
#!   \item{BATCHSIZE}{ integer scalar limiting the number of elements to be processed in \code{\link{update.ff}} when length(initdata)>1, default from getOption("ffbatchsize") }
#!   \item{BATCHBYTES}{ integer scalar limiting the number of bytes to be processed in \code{\link{update.ff}} when length(initdata)>1, default from getOption("ffbatchbytes"), see also \code{\link{.rambytes}} }
#!   \item{VERBOSE}{ set to TRUE for verbosing in \code{\link{update.ff}} when length(initdata)>1, default FALSE }
#!   \item{\dots}{ further arguments to the generic }
#! }
#! \details{
#!   \command{clone} is generic. \command{clone.ff} is the workhorse behind \code{\link{as.ram}} and \code{\link{as.ff}}.
#!   For creating the desired object it calls \code{\link{ff}} which calls \code{\link{update}} for initialization.
#! }
#! \value{
#!   an ff or ram object
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{ff}}, \code{\link{update}}, \code{\link{as.ram}}, \code{\link{as.ff}} }
#! \examples{
#!   x <- ff(letters, levels=letters)
#!   y <- clone(x, length=52)
#!   rm(x,y); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }


# clone can change the length of the cloned object, but not create subsetted versions
# increased length is filled by recycling
clone.ff <- function(
  x
  , initdata    = x
  , length      = NULL
  , levels      = NULL
  , ordered     = NULL
  , dim         = NULL
  , dimorder    = NULL
  , bydim       = NULL
  , symmetric   = NULL
  , fixdiag     = NULL
  , names       = NULL    # not taken fom initdata
  , dimnames    = NULL
  , ramclass    = NULL
  , ramattribs  = NULL
  , vmode       = NULL
  , update      = NULL    # set to TRUE to suppress upating ff object with initdata
  , pattern     = NULL
  , filename    = NULL
  , overwrite   = FALSE
  , pagesize    = NULL
  , caching     = NULL
  , finalizer   = NULL
  , finonexit   = NULL
  #, hideclass  = FALSE
  , FF_RETURN   = NULL
  , BATCHSIZE   = .Machine$integer.max       # optional batch size restriction in cases (limiting is useful if pulling values from function accesses raw data with more columns that k)
  , BATCHBYTES  = getOption("ffbatchbytes")  # batch size restriction in bytes
  , VERBOSE     = FALSE
  , ... # dummy to keep R CMD check quiet
)
{
  # BTW: we tried fast cloning via file.copy() but that was slower, only system(copy...) would save 33% time (too wacky)
  if (is.null(vmode) && !is.null(x))
    vmode <- vmode(x)
  
  if (is.null(levels))
    levels <- levels(x)
  if (is.null(ordered))
    ordered <- is.ordered(x)
  if (is.null(names))
    names <- names(x)
  if (is.null(ramclass))
    ramclass <- ramclass(x)
  if (is.null(ramattribs))
    ramattribs <- ramattribs(x)
  
  oldlen <- length(x)
  if (is.null(dim) && !is.null(length)){  # not dim but length given
    dimorder <- NULL
    symmetric <- FALSE
    fixdiag <- NULL
    dimnames <- NULL
  }else{ # dim given or none of dim/length
    if (is.null(dim))
      dim <- dim(x)
    if (is.null(dim)){
      if (is.null(length))
        length <- length(x)
    }else length <- as.integer(prod(dim))
    if (is.null(dimorder)){
      dimorder <- virtual(x)$Dimorder  # NOT: dimorder(x) because this would not restore the dimorder from ram objects
    }
    if (is.null(symmetric))
      symmetric <- symmetric(x)
    if (is.null(fixdiag))
      fixdiag <- fixdiag(x)
    if (identical(dim, dim(x))){
      if (is.null(dimnames))
        dimnames <- dimnames(x)
    }else
      dimnames <- NULL
  }
  
  if (is.null(FF_RETURN))
    FF_RETURN <- is.ff(x)
  if ( !(is.logical(FF_RETURN) && length(FF_RETURN)==1) )
    stop("in clone() FF_RETURN must be a logical scalar")
  
  physical <- physical(x)
  if (is.null(pattern)){
    pattern <- physical$pattern
    if (is.null(pattern))
      pattern <- "clone"
  }
  if (is.null(filename) && !is.ff(x))
    filename <- physical$filename
  if (is.null(pagesize))
    pagesize <- physical$pagesize
  if (is.null(caching))
    caching <- physical$caching
  if (is.null(finonexit))
    finonexit <- physical$finonexit
  if (is.null(finonexit))
    finonexit <- physical$finonexit
  
  # don't use "<-" operator with ff argument in order to avoid recursion (if anyone defines method <-.ff as cloning)
  assign("ret", ff(
    initdata    = initdata
    , length      = length
    , levels      = levels
    , ordered     = ordered
    , dim         = dim
    , dimorder    = dimorder
    , bydim       = bydim
    , symmetric   = symmetric
    , fixdiag     = fixdiag
    , names       = names
    , dimnames    = dimnames
    , ramclass    = ramclass
    , ramattribs  = ramattribs
    , vmode       = vmode
    , update      = update
    , pattern     = pattern
    , filename    = filename
    , readonly    = FALSE
    , overwrite   = overwrite
    , pagesize    = pagesize
    , caching     = caching
    , finalizer   = finalizer
    , finonexit   = finonexit
    #, hideclass  = hideclass
    , FF_RETURN   = FF_RETURN
    , BATCHSIZE   = BATCHSIZE
    , BATCHBYTES  = BATCHBYTES
    , VERBOSE     = VERBOSE
  ))
  newlen <- length(ret)
  nam <- names(x)
  if (is.null(dim) && !is.null(nam)){
    if (newlen==oldlen)
      names(ret) <- names(x)
    else if (newlen>oldlen)
      names(ret) <- c(names(x), (oldlen+1L):newlen)
    else
      names(ret) <- names(x)[seq_len(newlen)]
  }
  if (!is.null(physical$na.count)){
    if (newlen==oldlen)
      na.count(x) <- physical$na.count
    else{
      na.count(x) <- NULL
      warning("cloning removed attribute 'na.count'")
    }
  }
  if (!is.null(physical$is.sorted)){
    if (newlen<=oldlen && is.null(dim(x)) && is.null(dim(ret)))
      is.sorted(ret) <- physical$is.sorted
    else{
      is.sorted(ret) <- FALSE
      warning("cloning set 'is.sorted' to FALSE")
    }
  }
  ret
}


#! \name{finalizer}
#! \Rdversion{1.1}
#! \alias{finalizer}
#! \alias{finalizer<-}
#! \alias{finalizer.ff}
#! \alias{finalizer<-.ff}
#! \title{
#!   Get and set finalizer (name)
#! }
#! \description{
#!   The generic \code{finalizer} allows to get the current finalizer. The generic \code{finalizer<-} allows to set the current finalizer or to change an existing finalizer (but not to remove a finalizer).
#! }
#! \usage{
#! finalizer(x, ...)
#! finalizer(x, ...) <- value
#! \method{finalizer}{ff}(x, ...)
#! \method{finalizer}{ff}(x, ...) <- value
#! }
#! \arguments{
#!   \item{x}{an \code{\link{ff}} object}
#!   \item{value}{the name of the new finalizer}
#!   \item{\dots}{ignored}
#! }
#! \details{
#!   If an \code{\link{ff}}  object is created a finalizer is assigned, it has the task to free ressources no longer needed, for example remove the ff file or free the C++ RAM associated with an open ff file.
#!   The assigned finalizer depends on the location of the ff file:
#!   if the file is created in \code{getOption(fftempdir)} it is considered considered temporary and has default finalizer \code{\link[=delete.ff]{delete}},
#!   files created in other locations have default finalizer \code{\link[=close.ff]{close}}.
#!   The user can override this either by setting \code{options("fffinalizer")} or by using argument \code{finalizer} when creating single \code{ff} objects.
#!   Available finalizer generics are "close", "delete" and "deleteIfOpen", available methods are \code{\link{close.ff}}, \code{\link{delete.ff}} and \code{\link{deleteIfOpen.ff}}.
#!   \cr
#!   In order to be able to change the finalizer before finalization, the finalizer is NOT directly passed to R's finalization mechanism \code{\link{reg.finalizer}} (an active finalizer can never be changed other than be executed).
#!   Instead the NAME of the desired finalizer is stored in the ff object and \code{\link{finalize.ff_pointer}} is passed to \code{reg.finalizer}.
#!   \code{finalize.ff_pointer} will at finalization-time determine the desired finalizer and call it.
#!   \cr
#!   There are two possible triggers for execution \code{finalize.ff_pointer}:
#!   \enumerate{
#!     \item the garbage collection \code{\link{gc}} following removal \code{\link{rm}} of the ff object
#!     \item closing R if \code{finonexit} was \code{TRUE} at ff creation-time, determined by \code{options("fffinonexit")} and ff argument \code{finonexit}
#!   }
#!   Furthermore there are two possible triggers for calling the finalizer
#!   \enumerate{
#!     \item an explicit call to \code{\link{finalize}}
#!     \item an explicit call to one of the finalizers \code{\link[=close.ff]{close}}, \code{\link{delete}} and \code{\link{deleteIfOpen}}
#!   }
#!   The user can define custom finalizers by creating a generic function like \code{\link{delete}}, a ff_pointer method like \code{\link{delete.ff_pointer}} and a ff method for manual calls like \code{\link{delete.ff}}. The user then is responsible to take care of two things
#!   \enumerate{
#!     \item adequate freeing of ressources
#!     \item proper maintenance of the finalizer name in the ff object via \code{\link[=Extract.ff]{physical}$finalizer}
#!   }
#!   \code{is.null(finalizer(ff))} indicates NO active finalizer, i.e. no pending execution of \code{finalize.ff_pointer} lurking around after call of \code{reg.finalizer}.
#!   This requires that
#!   \enumerate{
#!     \item the \code{ff_pointer} method sets the finalizer name to \code{NULL}
#!     \item the \code{ff} may change a non-NULL finalizer name to a different name but not change it to NULL
#!   }
#! }
#! \value{
#!   \code{finalizer} returns the name of the active finalizer or \code{NULL} if no finalizer is active. \cr
#!   \code{finalizer<-} returns the changed ff object (reassignment of this return value not needed to keep the change).
#!   If there was no pending call to \code{\link{finalize.ff_pointer}} (\code{is.null(finalizer(ff))}), \code{finalizer<-} will create one by calling \code{reg.finalizer} with the current setting of \code{\link[=Extract.ff]{physical}$finonexit}.
#! }
#! \note{
#!   You can not assign NULL to an active finalizer using \code{finalizer<-} because this would not stop R's finalization mechanism and would carry the risk of assiging MULTIPLE finalization tasks.
#! }
#! \author{
#!   Jens Oehlschlägel
#! }
#! \seealso{
#!   \code{\link{ff}}, \code{\link{finalize}}, \code{\link{reg.finalizer}}
#! }
#! \examples{
#!   x <- ff(1:12, pattern="./finalizerdemo")
#!   fnam <- filename(x)
#!   finalizer(x)
#!   finalizer(x) <- "delete"
#!   finalizer(x)
#!   rm(x)
#!   file.exists(fnam)
#!   gc()
#!   file.exists(fnam)
#! }
#! \keyword{ IO }

finalizer.ff <- function(x, ...){
  attr(attr(x, "physical"), "finalizer")
}

"finalizer<-.ff" <- function(x, ..., value){
  physical <- attr(x, "physical")
  oldfin <- attr(physical, "finalizer")
  if (is.null(value)){
    if (!is.null(oldfin))
      stop("Active finalizer '", oldfin, "'can only be REPLACED by a new name of a finalizer function, but NOT de-activated")
  }else{
    attr(attr(x, "physical"), "finalizer") <- value
    if (is.null(oldfin)){
      reg.finalizer(physical, finalize.ff_pointer, onexit=attr(physical, "finonexit"))
    }
  }
  x
}

#! \name{finalize}
#! \Rdversion{1.1}
#! \alias{finalize}
#! \alias{finalize.ff_pointer}
#! \alias{finalize.ff}
#! \alias{finalize.ffdf}
#! \title{
#!   Call finalizer
#! }
#! \description{
#!   This calls the currently assigned finalizer, either via R's finalization mechanism or manually.
#! }
#! \usage{
#! finalize(x, ...)
#! \method{finalize}{ff_pointer}(x, ...)
#! \method{finalize}{ff}(x, ...)
#! \method{finalize}{ffdf}(x, ...)
#! }
#! \arguments{
#!   \item{x}{ either an \code{\link{ff}} or \code{\link{ffdf}} object or an \code{ff_pointer}, see details }
#!   \item{\dots}{ currently ignored }
#! }
#! \details{
#!   The \code{finalize.ff_pointer} method is called from R after it had been passed to \code{\link{reg.finalizer}}. It will set the finalizer name to \code{NULL} and call the finalizer.
#!   \cr
#!   The \code{finalize} generic can be called manually on \code{\link{ff}} or \code{\link{ffdf}} objects. It will call the finalizer but not touch the finalizer name.
#!   \cr
#!   For more details see \code{\link{finalizer}}
#! }
#! \note{
#!   \code{finalize.ff_pointer} MUST NEVER be called manually - neither directly nor by calling the generic on an ff_pointer (could erroneously signal that there is no pending finalization lurking around)
#! }
#! \value{
#!   returns whatever the called finalizer returns, for ffdf a list with the finalization returns of each physical component is returned.
#! }
#! \author{
#!   Jens Oehlschlägel
#! }
#! \seealso{
#!   \code{\link{finalizer}}
#! }
#! \examples{
#!   x <- ff(1:12, pattern="./finalizerdemo")
#!   fnam <- filename(x)
#!   finalizer(x)
#!   is.open(x)
#!   file.exists(fnam)
#!
#!   finalize(x)
#!
#!   finalizer(x)
#!   is.open(x)
#!   file.exists(fnam)
#!
#!   delete(x)
#!   finalizer(x)
#!   is.open(x)
#!   file.exists(fnam)
#!
#!   rm(x)
#!   gc()
#! }
#! \keyword{ IO }

finalize.ff_pointer <- function(
  x     # ff_pointer
, ...   # ignored
){
  #message("R is finalizing" , attr(x, "filename"), "")
  fin <- attr(x, "finalizer")
  if (is.null(fin))
    TRUE
  else{
    attr(x, "finalizer") <- NULL
    do.call(fin, list(x))
  }
}

finalize.ff <- function(
x       # ff object
, ...   # passed to finalizer
){
  physical <- attr(x, "physical")
  fin <- attr(physical, "finalizer")
  if (is.null(fin))
    TRUE
  else
    do.call(fin, c(list(x), list(...)))
}

finalize.ffdf <- function(
x       # ff object
, ...   # passed to finalizer
){
    p <- .subset2(x, "physical")
    ret <- lapply(p, finalize, ...)
    rnam <- .subset2(x, "row.names")
    if (is.ff(rnam))
        ret <- c(row.names=finalize(rnam, ...), ret)
    ret
}



#! \name{open.ff}
#! \alias{open.ff}
#! \alias{open.ffdf}
#! \title{ Opening an ff file }
#! \description{
#!   \command{open.ff} opens an ff file, optionally marking it readonly and optionally specifying a caching scheme.
#! }
#! \usage{
#!  \method{open}{ff}(con, readonly = FALSE, pagesize = NULL, caching = NULL, assert = FALSE, \dots)
#!  \method{open}{ffdf}(con, readonly = FALSE, pagesize = NULL, caching = NULL, assert = FALSE, \dots)
#! }
#! \arguments{
#!   \item{con}{ an \code{\link{ff}} or \code{\link{ffdf}} object }
#!   \item{readonly}{ \code{readonly} }
#!   \item{pagesize}{ number of bytes to use as pagesize or NULL to take the pagesize stored in the \code{\link[=Extract.ff]{physical}} attribute of the ff object, see \code{\link{getalignedpagesize}} }
#!   \item{caching}{ one of 'mmnoflush' or 'mmeachflush', see \code{\link{ff}} }
#!   \item{assert}{ setting this to TRUE will give a message if the ff was not open already }
#!   \item{\dots}{ further arguments (not used) }
#! }
#! \details{
#!   ff objects will be opened automatically when accessing their content and the file is still closed.
#!   Opening ffdf objects will open all of their \code{\link[=Extract.ffdf]{physical}} components including their \code{\link[=row.names.ffdf]{row.names}} if they are \code{\link{is.ff}}
#! }
#! \value{
#!   TRUE if object could be opened, FALSE if it was opened already (or NA if not all components of an ffdf returned FALSE or TRUE on opening)
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{ff}}, \code{\link{close.ff}}, \code{\link{delete}}, \code{\link{deleteIfOpen}}, \code{\link{getalignedpagesize}} }
#! \examples{
#!   x <- ff(1:12)
#!   close(x)
#!   is.open(x)
#!   open(x)
#!   is.open(x)
#!   close(x)
#!   is.open(x)
#!   x[]
#!   is.open(x)
#!   y <- x
#!   close(y)
#!   is.open(x)
#!   rm(x,y); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }

open.ff <- function(con
, readonly  = FALSE
, pagesize = NULL
, caching = NULL
, assert = FALSE
, ... # dummy to keep R CMD check quiet
)
{
  readonly <- as.logical(readonly)
  physical <- attr(con, "physical")
  if (is.open(con)){
    if (attr(physical, "readonly")!=readonly && !assert)
      stop(paste("ff is already open with readonly=", attr(physical, "readonly"), sep=""))
    return(FALSE)
  }else if (assert){
		message("opening ff ", filename(con))
	}
  filename <- attr(physical, "filename")
  stopifnot(file.access(filename,0)==0 )
  if (!readonly && file.access(filename,2)){
    message("opening ff ", filename, " readonly")
    readonly <- TRUE
  }
  if (!is.null(pagesize)){
    attr(attr(con, "physical"), "pagesize") <- getalignedpagesize(pagesize) # C-code currently reads pagesize directly from the attribute
  }
  if (is.null(caching))
    caching <- attr(physical, "caching")
  else
    caching <- match.arg(caching, caching_schemes)
  if (.Call(C_open, physical, .ffmode[attr(physical, "vmode")], readonly, caching=="mmeachflush", PACKAGE="ff")){
    attr(attr(con, "physical"), "caching") <- caching
    if (is.null(attr(physical, "finalizer"))){
      attr(attr(con, "physical"), "finalizer") <- "close"
      reg.finalizer(physical, finalize.ff_pointer, onexit=attr(physical, "finonexit"))
    }
    return(TRUE)
  }else{
		stop("failed opening ff ", filename(con), "because ", geterrstr.ff(con))
    return(FALSE)
  }
}



#! \name{close.ff}
#! \alias{close.ff}
#! \alias{close.ffdf}
#! \alias{close.ff_pointer}
#! \title{ Closing ff files }
#! \description{
#!   Close frees the Memory Mapping resources and closes the ff file without deleting the file data.
#! }
#! \usage{
#! \method{close}{ff}(con, \dots)
#! \method{close}{ffdf}(con, \dots)
#! \method{close}{ff_pointer}(con, \dots)
#! }
#! \arguments{
#!   \item{con}{ an open ff object }
#!   \item{\dots}{ \code{\dots} }
#! }
#! \details{
#!   The \code{ff_pointer} method is not intended for manual use, it is used at finalizer dispatch time.
#!   Closing ffdf objects will close all of their \code{\link[=Extract.ffdf]{physical}} components including their \code{\link[=row.names.ffdf]{row.names}} if they are \code{\link{is.ff}}
#! }
#! \value{
#!   TRUE if the file could be closed, FALSE if it was closed already (or NA if not all components of an ffdf returned FALSE or TRUE on closing)
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{ff}}, \code{\link{open.ff}}, \code{\link{delete}}, \code{\link{deleteIfOpen}} }
#! \examples{
#!   x <- ff(1:12)
#!   close(x)
#!   x
#!   open(x)
#!   x
#!   rm(x); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }

# version to which the finalizer dispatches
close.ff_pointer <- function(con
, ... # dummy to keep R CMD check quiet
)
{
  .Call(C_delete, con, PACKAGE="ff")  # this is intentionally 'delete' which releases (almost) all ressources (residual C++ RAM ist released when rm(ff) removes the external pointer (said Daniel Adler, 15.11.2007)
}
# version for manual use
close.ff <- function(con
, ... # dummy to keep R CMD check quiet
)
{
  # we do NOT touch the finalizer name if called manually (could be a temporary ff file with a 'delete' finalizer) !!
  .Call(C_delete, attr(con, "physical"), PACKAGE="ff")  # this is intentionally 'delete' which releases (almost) all ressources (residual C++ RAM ist released when rm(ff) removes the external pointer (said Daniel Adler, 15.11.2007)
}


#! \name{delete}
#! \alias{delete}
#! \alias{delete.ff}
#! \alias{delete.ffdf}
#! \alias{delete.ff_pointer}
#! \alias{delete.default}
#! \alias{deleteIfOpen}
#! \alias{deleteIfOpen.ff}
#! \alias{deleteIfOpen.ff_pointer}
#! \title{ Deleting the file behind an ff object }
#! \description{
#!   The generic \command{delete} deletes the content of an object without removing the object itself.
#!   The generic \command{deleteIfOpen} does the same, but only if \code{\link{is.open}} returns TRUE.
#! }
#! \usage{
#! delete(x, \dots)
#! deleteIfOpen(x, \dots)
#! \method{delete}{ff}(x, \dots)
#! \method{delete}{ffdf}(x, \dots)
#! \method{delete}{ff_pointer}(x, \dots)
#! \method{delete}{default}(x, \dots)
#! \method{deleteIfOpen}{ff}(x, \dots)
#! \method{deleteIfOpen}{ff_pointer}(x, \dots)
#! }
#! \arguments{
#!   \item{x}{ an ff or ram object }
#!   \item{\dots}{ further arguments (not used) }
#! }
#! \details{
#!   The proper sequence to fully delete an ff object is: \code{delete(x);rm(x)}, where \command{delete.ff} frees the Memory Mapping resources and deletes the ff file,
#!   leaving intact the R-side object including its \code{\link{class}}, \code{\link[=Extract.ff]{physical}} and \code{\link[=Extract.ff]{virtual}} attributes.
#!   The default method is a compatibility function doing something similar with ram objects: by assiging an empty list to the name of the ram object to the parent frame
#!   we destroy the content of the object, leaving an empty stub that prevents raising an error if the parent frame calls the \code{delete(x);rm(x)} sequence. \cr
#!   The \command{deleteIfOpen} does the same as \command{delete} but protects closed ff objects from deletion, it is mainly intended for use through a finalizer, as are the \code{ff_pointer} methods.
#! }
#! \value{
#!   \command{delete} returns TRUE if the/all ff files could be removed and FALSE otherwise. \cr
#!   \command{deleteIfOpen} returns TRUE if the/all ff files could be removed, FALSE if not and NA if the ff object was open.
#! }
#! \author{ Jens Oehlschlägel }
#! \note{
#!   Deletion of ff files can be triggerd automatically via three routes:
#!   \enumerate{
#!     \item if an ff object with a 'delete' finalizer is removed
#!     \item if an ff object was created with \code{fffinonexit=TRUE} the finalizer is also called when R shuts down.
#!     \item if an ff object was created in \code{getOption("fftempdir")}, it will be unlinked together with the fftempdir \code{\link[base:ns-hooks]{.onUnload}}
#!   }
#!   Thus in order to retain an ff file, one has to create it elsewhere than in fftempdir with a finalizer that does not destroy the file (by default files outside fftempdir get a 'close' finalizer') i.e. one of the following:
#!   \enumerate{
#!     \item name the file AND use \code{fffinalizer="close"}
#!     \item name the file AND use \code{fffinalizer="deleteIfOpen"} AND close the ff object before leaving R
#!     \item name the file AND use \code{fffinalizer="delete"} AND use \code{fffinonexit=FALSE}
#!   }
#! }
#! \seealso{ \code{\link{ff}}, \code{\link{close.ff}}, \code{\link{open.ff}}, \code{\link{reg.finalizer}} }
#! \examples{
#!   message('create the ff file outside getOption("fftempir"), 
#!     it will have default finalizer "close", so you need to delete it explicitely')
#!   x <- ff(1:12, pattern="./ffexample")
#!   delete(x)
#!   rm(x)
#! }
#! \keyword{ IO }
#! \keyword{ data }

# version to which the finalizer dispatches
delete.ff_pointer <- function(x
, ... # dummy to keep R CMD check quiet
)
{
  .Call(C_delete, x, PACKAGE="ff")
  all(file.remove(filename(x)))        # filename() might be a vector of files in the future
}
# version for manual use
delete.ff <- function(x
, ... # dummy to keep R CMD check quiet
)
{
  physical <- attr(x, "physical")
  # make sure 'delete' is not called a second time by assigning a harmless dummy finalizer name which signals that there is still a call to finalize.ff_pointer lurking around
  if (!is.null(attr(physical, "finalizer")))
    attr(physical, "finalizer") <- "close"
  .Call(C_delete, physical, PACKAGE="ff")
  all(file.remove(attr(physical, "filename"))) # filename() might be a vector of files in the future
}
delete.default <- function(x
, ... # dummy to keep R CMD check quiet
){
  assign(deparse(substitute(x)), list(), parent.frame())  # delete memory associated with x but leave stub (because often we have delete(x);rm(x) in the parent frame)
}


# version to which the finalizer dispatches
deleteIfOpen.ff_pointer <- function(x
, ... # dummy to keep R CMD check quiet
)
{
  if (is.open(x)){
    .Call(C_delete, x, PACKAGE="ff")
    all(file.remove(filename(x)))   # filename() might be a vector of files in the future
  }else{
    NA
  }
}


# version for manual use
deleteIfOpen.ff <- function(x
, ... # dummy to keep R CMD check quiet
)
{
  if (is.open(x)){
    delete(x)
  }else{
    NA
  }
}


if (FALSE){
  deleteIfOpen.default <- function(x
  , ... # dummy to keep R CMD check quiet
  )
  {
    message("--- Here deleteIfOpen.default ---")
    message("--- x ---")
    print(x)
    str(x)
    message("--- ... ---")
    print(list(...))
    str(list(...))
    message("------")
  }
}



# --- ff read / write / readwrite ----------------------------------------------------------

# the getset / readwrite / swap functions are efficient for read/write at once (they do maintain na.count() if na.count() is activated )
# the get,set / read,write / [,[<- functions are simplified clones of these, the write versions stop if na.count() is activated

#! \name{getset.ff}
#! \alias{getset.ff}
#! \alias{get.ff}
#! \alias{set.ff}
#! \title{ Reading and writing vectors of values (low-level) }
#! \description{
#!   The three functions \command{get.ff}, \command{set.ff} and \command{getset.ff} provide the simplest interface to access an ff file: getting and setting vector of values identified by positive subscripts
#! }
#! \usage{
#! get.ff(x, i)
#! set.ff(x, i, value, add = FALSE)
#! getset.ff(x, i, value, add = FALSE)
#! }
#! \arguments{
#!   \item{x}{ an ff object }
#!   \item{i}{ an index position within the ff file }
#!   \item{value}{ the value to write to position i }
#!   \item{add}{ TRUE if the value should rather increment than overwrite at the index position }
#! }
#! \details{
#!   \command{getset.ff} combines the effects of \command{get.ff} and \command{set.ff} in a single operation: it retrieves the old value at position \code{i} before changing it.
#!   \command{getset.ff} will maintain \code{\link[bit]{na.count}}.
#! }
#! \value{
#!   \command{get.ff} returns a vector, \command{set.ff} returns the 'changed' ff object (like all assignment functions do) and \command{getset.ff} returns the value at the subscript positions.
#!   More precisely \code{getset.ff(x, i, value, add=FALSE)} returns the old values at the subscript positions \code{i} while \code{getset.ff(x, i, value, add=TRUE)} returns the incremented values at the subscript positions.
#! }
#! \author{ Jens Oehlschlägel }
#! \note{ \command{get.ff}, \command{set.ff} and \command{getset.ff} are low level functions that do not support \code{ramclass} and \code{ramattribs} and thus will not give the expected result with \code{factor} and \code{POSIXct} }
#! \seealso{ \code{\link{readwrite.ff}} for low-level access to contiguous chunks and \code{\link{[.ff}} for high-level access }
#! \examples{
#!  x <- ff(0, length=12)
#!  get.ff(x, 3L)
#!  set.ff(x, 3L, 1)
#!  x
#!  set.ff(x, 3L, 1, add=TRUE)
#!  x
#!  getset.ff(x, 3L, 1, add=TRUE)
#!  getset.ff(x, 3L, 1)
#!  x
#!  rm(x); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }


getset.ff <- function(x, i, value, add=FALSE)
{
	open(x, assert=TRUE)
  if( is.readonly(x) ) stop("ff is readonly")
  if (is.double(i))
    i <- as.integer(i)
  if (!is.integer(i) || any(i<1) || any(i>length(x))) stop("illegal index")
  if(!is.null(vw(x))) stop("please use '[' to access ff with vw")

  nc <- na.count(x)
  if (!is.na(nc))
    new.nc <- is.na(value)
  vm <- vmode(x)
  if (add)
    ret <- as.vmode(.Call(C_addgetset_vec, .ffmode[vm], attr(x, "physical"), i, length(i), as.vmode(value, vm), PACKAGE="ff"), vm)
  else
    ret <- as.vmode(.Call(C_getset_vec, .ffmode[vm], attr(x, "physical"), i, length(i), as.vmode(value, vm), PACKAGE="ff"), vm)
  if (!is.na(nc)){
    old.nc <- is.na(ret)
    na.count(x) <- nc - old.nc + new.nc
  }
  ret
}

get.ff   <- function(x, i)
{
	open(x, assert=TRUE)
  if (length(i)){
    if (is.double(i))
      i <- as.integer(i)
    if (!is.integer(i) || any(i<1) || any(i>length(x))) stop("illegal index")
    if(!is.null(vw(x))) stop("please use '[' to access ff with vw")
    vm <- vmode(x)
    as.vmode(.Call(C_get_vec, .ffmode[vm], attr(x, "physical"), i, length(i), PACKAGE="ff"), vm)
  }else{
    vm <- vmode(x)
    vector.vmode(vm)
  }
}

set.ff <- function(x, i, value, add=FALSE)
{
	open(x, assert=TRUE)
  if( is.readonly(x) ) 
      stop("ff is readonly")
  if (length(i)){
    if (is.double(i))
      i <- as.integer(i)
    if (!is.integer(i) || any(i<1) || any(i>length(x))) stop("illegal index")
    if(!is.null(vw(x))) stop("please use '[' to access ff with vw")
    
    if(!is.null(physical(x)$na.count)) stop("use readwrite.ff instead to maintain na.count (or deactivate na.count(x)<-NULL)")
    vm <- vmode(x)
    if (add)
      attr(x, "physical") <- .Call(C_addset_vec, .ffmode[vm], attr(x, "physical"), i, length(i), as.vmode(value, vm), PACKAGE="ff")
    else
      attr(x, "physical") <- .Call(C_set_vec, .ffmode[vm], attr(x, "physical"), i, length(i), as.vmode(value, vm), PACKAGE="ff")
  }
  x
}

"[[.ff" <- function(x, i){
  if (length(i)!=1L)
    stop("i must have length 1")
  get.ff(x=x, i=i)
}

"[[<-.ff" <- function(x, i, add=FALSE, value){
  if (length(i)!=1L)
    stop("i must have length 1")
  if (length(value)!=1L)
    stop("value must have length 1")
  set.ff(x=x, i=i, value=value, add=add)
}


#! \name{readwrite.ff}
#! \alias{read.ff}
#! \alias{write.ff}
#! \alias{readwrite.ff}
#! \title{ Reading and writing vectors (low-level) }
#! \description{
#!   Simpe low-level interface for reading and writing vectors from ff files.
#! }
#! \usage{
#! read.ff(x, i, n)
#! write.ff(x, i, value, add = FALSE)
#! readwrite.ff(x, i, value, add = FALSE)
#! }
#! \arguments{
#!   \item{x}{ an ff object }
#!   \item{i}{ a start position in the ff file }
#!   \item{n}{ number of elements to read }
#!   \item{value}{ vector of elements to write }
#!   \item{add}{ TRUE if the values should rather increment than overwrite at the target positions }
#! }
#! \details{
#!   \command{readwrite.ff} combines the effects of \command{read.ff} and \command{write.ff} in a single operation: it retrieves the old values starting from position \code{i} before changing them.
#!   \command{getset.ff} will maintain \code{\link[bit]{na.count}}.
#! }
#! \value{
#!   \command{read.ff} returns a vector of values, \command{write.ff} returns the 'changed' ff object (like all assignment functions do) and \command{readwrite.ff} returns the values at the target position.
#!   More precisely \code{readwrite.ff(x, i, value, add=FALSE)} returns the old values at the position \code{i} while \code{readwrite.ff(x, i, value, add=TRUE)} returns the incremented values of \code{x}.
#! }
#! \author{ Jens Oehlschlägel }
#! \note{ \command{read.ff}, \command{write.ff} and \command{readwrite.ff} are low level functions that do not support \code{ramclass} and \code{ramattribs} and thus will not give the expected result with \code{factor} and \code{POSIXct} }
#! \seealso{ \code{\link{getset.ff}} for low-level scalar access and \code{\link{[.ff}} for high-level access }
#! \examples{
#!   x <- ff(0, length=12)
#!   read.ff(x, 3, 6)
#!   write.ff(x, 3, rep(1, 6))
#!   x
#!   write.ff(x, 3, rep(1, 6), add=TRUE)
#!   x
#!   readwrite.ff(x, 3, rep(1, 6), add=TRUE)
#!   readwrite.ff(x, 3, rep(1, 6))
#!   x
#!   rm(x); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }


readwrite.ff <- function(x, i, value, add=FALSE)
{
	open(x, assert=TRUE)
  if( is.readonly(x) ) stop("ff is readonly")
  if(!is.null(vw(x))) stop("please use '[' to access ff with vw")

  n <- length(value)
  stopifnot( 0 < i && i+n-1 <= length(x) )
  
  nc <- na.count(x)
  if (!is.na(nc))
    new.nc <- sum(is.na(value))
  vm <- vmode(x)
  if (add){
    ret <- .Call(C_addgetset_contiguous, .ffmode[vm], attr(x, "physical"), as.integer(i), n, as.vmode(value, vm), PACKAGE="ff")
  }else{
    ret <- .Call(C_getset_contiguous, .ffmode[vm], attr(x, "physical"), as.integer(i), n, as.vmode(value, vm), PACKAGE="ff")
  }
  if (!is.na(nc)){
    old.nc <- sum(is.na(ret))
    na.count(x) <- nc - old.nc + new.nc
  }
  as.vmode(ret, vm)
}

read.ff <- function(x, i, n)
{
	open(x, assert=TRUE)
  if(!is.null(vw(x))) stop("please use '[' to access ff with vw")

  stopifnot( 0 < i && i+n-1 <= length(x) )
  vm <- vmode(x)
  as.vmode(.Call(C_get_contiguous, .ffmode[vmode(x)], attr(x, "physical"), as.integer(i), as.integer(n), PACKAGE="ff"), vm)
}

write.ff <- function(x, i, value, add=FALSE)
{
	open(x, assert=TRUE)
  if( is.readonly(x) ) stop("ff is readonly")
  if(!is.null(vw(x))) stop("please use '[' to access ff with vw")

  if(!is.null(physical(x)$na.count)) stop("use readwrite.ff instead to maintain na.count (or deactivate na.count(x)<-NULL)")

  n <- length(value)
  stopifnot( 0 < i && i+n-1 <= length(x) )

  vm <- vmode(x)
  if (add)
    attr(x, "physical") <- .Call(C_addset_contiguous, .ffmode[vm], attr(x, "physical"), as.integer(i), n, as.vmode(value, vm), PACKAGE="ff")
  else
    attr(x, "physical") <- .Call(C_set_contiguous, .ffmode[vm], attr(x, "physical"), as.integer(i), n, as.vmode(value, vm), PACKAGE="ff")
  x
}




#! \name{swap}
#! \alias{swap}
#! \alias{swap.ff}
#! \alias{swap.ff_array}
#! \alias{swap.default}
#! \title{ Reading and writing in one operation (high-level) }
#! \description{
#!   The generic \command{swap} combines \code{x[i]} and \code{x[i] <- value} in a single operation.
#! }
#! \usage{
#! swap(x, value, \dots)
#! \method{swap}{ff}(x, value, i, add = FALSE, pack = FALSE, \dots)
#! \method{swap}{ff_array}(x, value, \dots, bydim = NULL, drop = getOption("ffdrop"), add = FALSE, pack = FALSE)
#! \method{swap}{default}(x, value, \dots, add = FALSE)
#! }
#! \arguments{
#!   \item{x}{ a ff or ram object }
#!   \item{value}{ the new values to write, possibly recycled, see \code{\link{[.ff}} }
#!   \item{i}{ index information, see \code{\link{[.ff}} }
#!   \item{\dots}{ missing OR up to length(dim(x)) index expressions OR (ff only) \code{\link{hi}} objects }
#!   \item{drop}{ logical scalar indicating whether array dimensions shall be dropped }
#!   \item{bydim}{ how to interpret vector to array data, see \code{\link{[.ff}} }
#!   \item{add}{ TRUE if the values should rather increment than overwrite at the target positions, see \code{\link{readwrite.ff}} }
#!   \item{pack}{ FALSE to prevent rle-packing in hybrid index preprocessing, see \code{\link{as.hi}} }
#! }
#! \details{
#!   \preformatted{
#!   y <- swap(x, value, i, add=FALSE, ...)
#!
#!   is a shorter and more efficient version of
#!
#!   y <- x[i, add=FALSE, ...]
#!   x[i, add=FALSE, ...] <- value
#!
#!   and
#!
#!   y <- swap(x, value, i, add=TRUE, ...)
#!
#!   is a shorter and more efficient version of
#!
#!   y <- x[i, add=TRUE, ...]
#!   y <- y + value
#!   x[i, add=FALSE, ...] <- y
#!   }
#! }
#! \value{
#!   Values at the target positions.
#!   More precisely \code{swap(x, value, i, add=FALSE)} returns the old values at the position \code{i} while \code{swap(x, value, i, add=TRUE)} returns the incremented values of \code{x}.
#! }
#! \author{ Jens Oehlschlägel }
#! \note{
#!   Note that \code{swap.default} changes the object in its parent frame and thus violates R's usual functional programming logic.
#!   When using \code{add=TRUE}, duplicated index positions should be avoided, because ff and ram objects behave differently:
#!   \preformatted{
#!   swap.ff(x, 1, c(3,3), add=TRUE)
#!   # will increment x at position 3 TWICE by 1, while
#!   swap.default(x, 1, c(3,3), add=TRUE)
#!   # will increment x at position 3 just ONCE by 1
#!   }
#! }
#! \seealso{ \code{\link{[.ff}}, \code{\link{add}}, \code{\link{readwrite.ff}}, \code{\link{getset.ff}}, \code{\link{LimWarn}} }
#! \examples{
#!   x <- ff("a", levels=letters, length=52)
#!   y <- swap(x, "b", sample(length(x), 26))
#!   x
#!   y
#!   rm(x,y); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }
#! \keyword{ array }


#! \name{Extract.ff}
#! \alias{Extract.ff}
#! \alias{[.ff}
#! \alias{[<-.ff}
#! \alias{[.ff_array}
#! \alias{[<-.ff_array}
#! \alias{[[.ff}
#! \alias{[[<-.ff}
#! \title{ Reading and writing vectors and arrays (high-level) }
#! \description{
#!   These are the main methods for reading and writing data from ff files.
#! }
#! \usage{
#! \method{[}{ff}(x, i, pack = FALSE)
#! \method{[}{ff}(x, i, add = FALSE, pack = FALSE) <- value
#! \method{[}{ff_array}(x, \dots, bydim = NULL, drop = getOption("ffdrop"), pack = FALSE)
#! \method{[}{ff_array}(x, \dots, bydim = NULL, add = FALSE, pack = FALSE) <- value
#! \method{[[}{ff}(x, i)
#! \method{[[}{ff}(x, i, add = FALSE) <- value
#! }
#! \arguments{
#!   \item{x}{ an ff object }
#!   \item{i}{ missing OR a single index expression OR a \code{\link{hi}} object }
#!   \item{\dots}{ missing OR up to length(dim(x)) index expressions OR \code{\link{hi}} objects }
#!   \item{drop}{ logical scalar indicating whether array dimensions shall be dropped }
#!   \item{bydim}{ the dimorder which shall be used in interpreting vector to/from array data }
#!   \item{pack}{ FALSE to prevent rle-packing in hybrid index preprocessing, see \code{\link{as.hi}} }
#!   \item{value}{ the values to be assigned, possibly recycled }
#!   \item{add}{ TRUE if the values should rather increment than overwrite at the target positions, see \code{\link{readwrite.ff}} }
#! }
#! \details{
#!   The single square bracket operators \command{[} and \command{[<-} are the workhorses for accessing the content of an ff object.
#!   They support \code{ff_vector} and \code{ff_array} access (\code{\link{dim.ff}}), they respect virtual windows (\code{\link{vw}}),
#!   \code{\link{names.ff}} and \code{\link{dimnames.ff}} and retain \code{\link{ramclass}} and \code{\link{ramattribs}}
#!   and thus support \code{\link{POSIXct}} and \code{\link{factor}}, see \code{\link{levels.ff}}.
#!   \cr
#!   The functionality of \command{[} and \command{[<-} cn be combined into one efficient operation, see \code{\link{swap}}.
#!   \cr
#!   The double square bracket operator \command{[[} is a shortcut for \code{\link{get.ff}}
#!   resp. \code{\link{set.ff}}, however, you should not rely on this for the future, see \code{\link{LimWarn}}. For programming please prefer \command{[}.
#! }
#! \section{Index expressions}{
#!   \code{x <- ff(1:12, dim=c(3,4), dimnames=list(letters[1:3], NULL))}
#!   \tabular{rll}{
#!   \emph{allowed expression}    \tab -- \tab \emph{\code{example}}                \cr
#!    positive integers           \tab    \tab \code{x[ 1 ,1]}                      \cr
#!    negative integers           \tab    \tab \code{x[ -(2:12) ]}                  \cr
#!    logical                     \tab    \tab \code{x[ c(TRUE, FALSE, FALSE) ,1]}  \cr
#!    character                   \tab    \tab \code{x[ "a" ,1]}                    \cr
#!    integer matrices            \tab    \tab \code{x[ rbind(c(1,1)) ]}            \cr
#!    hybrid index                \tab    \tab \code{x[ hi ,1]}                     \cr
#!   \emph{disallowed expression} \tab -- \tab \emph{\code{example}}                \cr
#!    zeros                       \tab    \tab \code{x[ 0 ]}                        \cr
#!    NAs                         \tab    \tab \code{x[ NA ]}                       \cr
#!   }
#! }
#! \section{Dimorder and bydim}{
#!   Arrays in R have always standard \code{\link{dimorder} seq_along(dim(x))} while ff allows to store an array in a different dimorder.
#!   Using nonstandard dimorder (see \code{\link{dimorderStandard}}) can speed up certain access operations: while matrix \code{dimorder=c(1,2)} -- column-major order --
#!   allows fast extraction of columns, \code{dimorder=c(2,1)} allows fast extraction of rows.
#!   \cr
#!   While the dimorder -- being an attribute of an \code{ff_array} -- controls how the vector in an ff file is interpreted,
#!   the \code{bydim} argument to the extractor functions controls, how assigment vector values
#!   in \command{[<-} are translated to the array and how the array is translated to a vector in \command{[} subscripting.
#!   Note that \code{bydim=c(2,1)} corresponds to \code{matrix(..., byrow=TRUE)}.
#! }
#! \section{Multiple vector interpretation in arrays }{
#!   In case of non-standard dimorder (see \code{\link{dimorderStandard}})
#!   the vector sequence of array elements in R and in the ff file differs.
#!   To access array elements in file order, you can use \code{\link{getset.ff}}, \code{\link{readwrite.ff}}
#!   or copy the ff object and set \code{dim(ff)<-NULL} to get a vector view into the ff object
#!   (using \code{[} dispatches the vector method \code{\link{[.ff}}).
#!   To access the array elements in R standard dimorder you simply use \code{[} which dispatches
#!   to \code{\link{[.ff_array}}. Note that in this case \code{\link{as.hi}} will unpack the complete index, see next section.
#! }
#! \section{RAM expansion of index expressions}{
#!   Some index expressions do not consume RAM due to the \code{\link{hi}} representation,
#!   for example \code{1:n} will almost consume no RAM hoewever large n.
#!   However, some index expressions are expanded and require to \code{\link[=Extract.ff]{maxindex}(i) * .rambytes["integer"]} bytes,
#!   either because the sorted sequence of index positions cannot be rle-packed efficiently
#!   or because \code{\link{hiparse}} cannot yet parse such expression and falls back to evaluating/expanding the index expression.
#!   If the index positions are not sorted, the index will be expanded and a second vector is needed to store the information for re-ordering,
#!   thus the index requires \code{2 * \link[=Extract.ff]{maxindex}(i) * .rambytes["integer"]} bytes.
#! }
#! \section{RAM expansion when recycling assigment values}{
#!   Some assignment expressions do not consume RAM for recycling, for example \code{x[1:n] <- 1:k}
#!   will not consume RAM hoewever large n compared to k, when x has standard \code{\link{dimorder}}.
#!   However, if \code{length(value)>1}, assignment expressions with non-ascending index positions trigger recycling the value R-side to the full index length.
#!   This will happen if \code{\link{dimorder}} does not match parameter \code{bydim} or if the index is not sorted ascending.
#! }
#! \value{
#!   The read operators \command{[} and \command{[[} return data from the ff object,
#!   possibly decorated with \code{\link[=names.ff]{names}}, \code{\link[=dim.ff]{dim}},
#!   \code{\link[=dimnames.ff]{dimnames}} and further attributes and classes (see \code{\link{ramclass}}, \code{\link{ramattribs}})
#!   \cr
#!   The write operators \command{[<-} and \command{[[<-} return the 'modified' ff object (like all assignment operators do).
#! }
#! \author{ Jens Oehlschlägel }
#! \seealso{ \code{\link{ff}}, \code{\link{swap}}, \code{\link{add}}, \code{\link{readwrite.ff}}, \code{\link{LimWarn}} }
#! \examples{
#!    message("look at different dimorders")
#!    x <- ff(1:12, dim=c(3,4), dimorder=c(1,2))
#!    x[]
#!    as.vector(x[])
#!    x[1:12]
#!    x <- ff(1:12, dim=c(3,4), dimorder=c(2,1))
#!    x[]
#!    as.vector(x[])
#!    message("Beware (might be changed)")
#!    x[1:12]
#!
#!    message("look at different bydim")
#!    matrix(1:12, nrow=3, ncol=4, byrow=FALSE)
#!    x <- ff(1:12, dim=c(3,4), bydim=c(1,2))
#!    x
#!    matrix(1:12, nrow=3, ncol=4, byrow=TRUE)
#!    x <- ff(1:12, dim=c(3,4), bydim=c(2,1))
#!    x
#!    x[,, bydim=c(2,1)]
#!    as.vector(x[,, bydim=c(2,1)])
#!    message("even consistent interpretation of vectors in assignments")
#!    x[,, bydim=c(1,2)] <- x[,, bydim=c(1,2)]
#!    x
#!    x[,, bydim=c(2,1)] <- x[,, bydim=c(2,1)]
#!    x
#!    rm(x); gc()
#!
#!   \dontrun{
#!    message("some performance implications of different dimorders")
#!    n <- 100
#!    m <- 100000
#!    a <- ff(1L,dim=c(n,m))
#!    b <- ff(1L,dim=c(n,m), dimorder=2:1)
#!    system.time(lapply(1:n, function(i)sum(a[i,])))
#!    system.time(lapply(1:n, function(i)sum(b[i,])))
#!    system.time(lapply(1:n, function(i){i<-(i-1)*(m/n)+1; sum(a[,i:(i+m/n-1)])}))
#!    system.time(lapply(1:n, function(i){i<-(i-1)*(m/n)+1; sum(b[,i:(i+m/n-1)])}))
#!
#!    n <- 100
#!    a <- ff(1L,dim=c(n,n,n,n))
#!    b <- ff(1L,dim=c(n,n,n,n), dimorder=4:1)
#!    system.time(lapply(1:n, function(i)sum(a[i,,,])))
#!    system.time(lapply(1:n, function(i)sum(a[,i,,])))
#!    system.time(lapply(1:n, function(i)sum(a[,,i,])))
#!    system.time(lapply(1:n, function(i)sum(a[,,,i])))
#!    system.time(lapply(1:n, function(i)sum(b[i,,,])))
#!    system.time(lapply(1:n, function(i)sum(b[,i,,])))
#!    system.time(lapply(1:n, function(i)sum(b[,,i,])))
#!    system.time(lapply(1:n, function(i)sum(b[,,,i])))
#!
#!    n <- 100
#!    m <- 100000
#!    a <- ff(1L,dim=c(n,m))
#!    b <- ff(1L,dim=c(n,m), dimorder=2:1)
#!    system.time(ffrowapply(sum(a[i1:i2,]), a, RETURN=TRUE, CFUN="csum", BATCHBYTES=16104816\%/\%20))
#!    system.time(ffcolapply(sum(a[,i1:i2]), a, RETURN=TRUE, CFUN="csum", BATCHBYTES=16104816\%/\%20))
#!    system.time(ffrowapply(sum(b[i1:i2,]), b, RETURN=TRUE, CFUN="csum", BATCHBYTES=16104816\%/\%20))
#!    system.time(ffcolapply(sum(b[,i1:i2]), b, RETURN=TRUE, CFUN="csum", BATCHBYTES=16104816\%/\%20))
#!
#!    rm(a,b); gc()
#!   }
#! }
#! \keyword{ IO }
#! \keyword{ data }
#! \keyword{ array }


swap.ff <- function(
  x
, value
, i
, add   = FALSE
, pack  = FALSE
, ... # dummy to keep R CMD check quiet
){
	open(x, assert=TRUE)
  if( is.readonly(x) ) stop("ff is readonly")

  vmode <- vmode(x)
  vw <- vw(x)
  fflen <- length(x)
  fflev <- levels(x)
  ffnam <- names(x)

  if (missing(i)){
    if (fflen){
      index <- hi(from=1, to=fflen, maxindex=fflen, vw=vw, pack=pack)
    #}else{
      #not needed: index <- as.hi(NULL)
    }
    nreturn <- fflen
  }else if(is.ff(i)){
    stop("ff subscripts not implemented for swap.ff")
  }else{
    index <- as.hi(substitute(i), maxindex=fflen, vw=vw, pack=pack, names=ffnam, envir=parent.frame())
    nreturn <- poslength(index)
  }
  
  if (nreturn){
    nc <- na.count(x)
    ixre <- !is.null(index$ix) || index$re
    if (is.null(fflev))
      value <- as.vmode(value, vmode)
    else
      value <- ram2ffcode(value, fflev, vmode)
    nvalue <- length(value)
    if (nvalue>1){
      if (nreturn!=nvalue){
        rb <- nreturn%%nvalue
        if (!is.na(nc))
          new.nc <- nreturn%/%nvalue * sum(is.na(value)) + if (rb) sum(is.na(value[seq_len(rb)])) else 0L
        if (rb)
          warning("number of elements to replace is not multiple of values for replacement")
        if (ixre){
          # if possible we try to recycle on the C-side, but in case of ix or re we need to recycle already here
          value <- rep(value, length.out=nreturn)
          nvalue <- nreturn
        }
      }else{
        if (!is.na(nc))
          new.nc <- sum(is.na(value))
      }
      if (!is.null(index$ix))
        value <- value[index$ix]
      else if (index$re)
        value <- rev(value)
    }else{
      if(nvalue<1)
        stop("no value for replacement")
      if (!is.na(nc))
        new.nc <- nreturn * is.na(value)
    }
    if (add)
      ret <- .Call(C_addgetset_vector, .ffmode[vmode], attr(x, "physical"), index, nreturn, value, PACKAGE="ff")
    else
      ret <- .Call(C_getset_vector, .ffmode[vmode], attr(x, "physical"), index, nreturn, value, PACKAGE="ff")
    ret <- unsort.hi(ret, index)
    if (!is.na(nc)){
      old.nc <- sum(is.na(ret))
      na.count(x) <- nc - old.nc + new.nc
    }
    if (!is.null(ffnam))
      setattr(ret, "names", ffnam[as.integer(index)])  #names(ret) <- ffnam[as.integer(index)]
  }else{
    ret <- vector(mode=.rammode[vmode], length=0)
    if (!is.null(ffnam))
      setattr(ret, "names", character())  #names(ret) <- character()
  }
  if (!is.null(fflev)){
    if (.vunsigned[vmode])
      ret <- ret + 1L
    setattr(ret, "levels", fflev) #levels(ret) <- fflev
  }
  ramattribs <- attr(attr(x, "virtual"), "ramattribs")
  if (!is.null(ramattribs)){
    setattributes(ret, ramattribs)  #attributes(ret) <- c(attributes(ret), ramattribs)
  }
  setattr(ret, "class",attr(attr(x, "virtual"), "ramclass"))  #class(ret) <- attr(attr(x, "virtual"), "ramclass")
  .vset[[vmode]](ret)
  return(ret)
}


"[.ff" <- function(
  x
, i
, pack = FALSE
)
{

  vmode <- vmode(x)
  vw <- vw(x)
  fflen <- length(x)
  fflev <- levels(x)
  ffnam <- names(x)
  #if (fflen){
    if (missing(i)){
      simple <- TRUE
      nreturn <- fflen
    }else if(is.ff(i)){
      return(ffindexget(x, i))
    }else{
      simple <- FALSE
      # note: do not ask inherits(index, "hi") directly, because this would evaluate and could create large objects in RAM
      # calling as.hi(substitute()) will first try to parse, this will fail on hi input and then only evaluate hi
      # this is a bit indirect but can substantially save memory
      index <- as.hi(substitute(i), maxindex=fflen, vw=vw, pack=pack, names=ffnam, envir=parent.frame())
      nreturn <- poslength(index)
    }
  #}else{
  #  nreturn <- 0:0
  #}
  if (nreturn){
		open(x, assert=TRUE)
    if (simple){
      ret <- .Call(C_get_contiguous, .ffmode[vmode], attr(x, "physical"), if (is.null(vw)) 1L else vw[1]+1L, nreturn, PACKAGE="ff")
      if (!is.null(ffnam))
        setattr(ret, "names", ffnam)  #names(ret) <- ffnam
    }else{
      ret <- .Call(C_get_vector, .ffmode[vmode], attr(x, "physical"), index, nreturn, PACKAGE="ff")
      ret <- unsort.hi(ret,index)
      if (!is.null(ffnam))
        setattr(ret, "names", ffnam[as.integer(index)])  #names(ret) <- ffnam[as.integer(index)]
    }
  }else{
    ret <- vector(mode=.rammode[vmode], length=0)
    if (!is.null(ffnam))
      setattr(ret, "names", character())  #names(ret) <- character()
  }
  if (!is.null(fflev)){
    if (.vunsigned[vmode])
      ret <- ret + 1L
    setattr(ret, "levels", fflev)  #levels(ret) <- fflev
  }
  ramattribs <- attr(attr(x, "virtual"), "ramattribs")
  if (!is.null(ramattribs)){
    setattributes(ret, ramattribs) #attributes(ret) <- c(attributes(ret), ramattribs)
  }
  setattr(ret, "class",attr(attr(x, "virtual"), "ramclass"))  #class(ret) <- attr(attr(x, "virtual"), "ramclass")
  .vset[[vmode]](ret)

  return(ret)
}



"[<-.ff" <- function(
  x
, i
, add   = FALSE
, pack  = FALSE
, value
)
{
	open(x, assert=TRUE)
  if( is.readonly(x) ) stop("ff is readonly")
  physical <- physical(x)
  if(!is.null(physical$na.count)) stop("use swap instead to maintain na.count (or deactivate na.count(x)<-NULL for assigning)")
  if(!is.null(physical$is.sorted)) stop("deactivate is.sorted(x)<-NULL for assigning)")

  vmode <- vmode(x)
  vw <- vw(x)
  fflen <- length(x)
  fflev <- levels(x)
  ffnam <- names(x)
    if (missing(i)){
      if (fflen){
        index <- hi(from=1, to=fflen, maxindex=fflen, vw=vw, pack=pack)
      }else{
        # we don't warn here since R does not either
        #if (length(value))
        #  warning("number of elements to replace is not multiple of values for replacement")
        #index <- as.hi(NULL)
      }
      nreturn <- fflen
    }else if(is.ff(i)){
      return(ffindexset(x, i, value))
    }else{
      index <- as.hi(substitute(i), maxindex=fflen, vw=vw, pack=pack, names=ffnam, envir=parent.frame())
      nreturn <- poslength(index)
    }
  if (nreturn){
    ixre <- !is.null(index$ix) || index$re
    vmode <- vmode(x)
    if (is.null(fflev))
      value <- as.vmode(value, vmode)
    else
      value <- ram2ffcode(value, fflev, vmode)
    nvalue <- length(value)
    if (nvalue>1){
      if (nreturn!=nvalue){
        if (nreturn%%nvalue)
          warning("number of elements to replace is not multiple of values for replacement")
        if (ixre){
          # if possible we try to recycle on the C-side, but in case of ix or re we need to recycle already here
          value <- rep(value, length.out=nreturn)
          nvalue <- nreturn
        }
      }
      if (!is.null(index$ix))
        value <- value[index$ix]
      else if (index$re)
        value <- rev(value)
    }else if(nvalue<1){
      stop("no value for replacement")
    }
    if (add)
      attr(x, "physical") <- .Call(C_addset_vector, .ffmode[vmode], attr(x, "physical"), index, nreturn, value, PACKAGE="ff")
    else
      attr(x, "physical") <- .Call(C_set_vector, .ffmode[vmode], attr(x, "physical"), index, nreturn, value, PACKAGE="ff")
  }
  return(x)
}


swap.ff_array <- function(
  x
, value
, ...
, bydim = NULL
, drop  = getOption("ffdrop")
, add   = FALSE
, pack  = FALSE
)
{
	open(x, assert=TRUE)
  if( is.readonly(x) ) stop("ff is readonly")

  vmode <- vmode(x)
  vw <- vw(x)
  fflen <- length(x)
  fflev <- levels(x)
  ffdim <- dim(x)
  ffdimnam <- dimnames(x)
  ffdimord <- dimorder(x)
  ndim <- length(ffdim)

  arguments <- match.call(expand.dots=FALSE)[["..."]]
  narguments <- length(arguments)

  if (narguments==0 || (narguments==1 && is.language(arguments[[1]]) && arguments[[1]]=="")){  # ad[]
    index <- lapply(seq_len(ndim), function(d){
      if (ffdim[[d]])
        hi(from=1, to=ffdim[[d]], maxindex=ffdim[[d]], vw=vw[,d], pack=pack)
      else
        as.hi(NULL, maxindex=ffdim[[d]], vw=vw[,d], pack=pack)
    })
    indexdim <- ffdim
    nreturn <- fflen
    simple <- FALSE
    ixre <- FALSE
    drop <- FALSE  # R always had: no subsetting no drop
  }else if (narguments==1){                                     # ad[cbind()] ad[1:n] 1d[1:n]
    index <- as.hi(arguments[[1]], maxindex=fflen, vw=vw, dim=ffdim, dimorder=ffdimord, pack=pack, envir=parent.frame())
    nreturn <- poslength(index)
    ixre <- !is.null(index$ix) || index$re
    if (!is.null(index$dim) || ndim>1){ # ad[cbind()] ad[1:n]
      indexdim <- NULL
      simple <- TRUE
    }else{ # 1d[1:n]
      index <- list(index)
      indexdim <- nreturn
      simple <- FALSE
    }
  }else if (narguments==ndim){
    ixre <- FALSE
    envir <- parent.frame()
    index <- lapply(seq_len(ndim), function(d){
      if (is.language(arguments[[d]]) && arguments[[d]]==""){
        if (ffdim[[d]])
          hi(from=1, to=ffdim[[d]], maxindex=ffdim[[d]], vw=vw[,d], pack=pack)
        else
          as.hi(NULL, maxindex=ffdim[[d]], vw=vw[,d], pack=pack)
      }else{
        lret <- as.hi(arguments[[d]], maxindex=ffdim[[d]], vw=vw[,d], pack=pack, names=ffdimnam[[d]], envir=envir)
        if (!is.null(lret$ix) || lret$re)
          ixre <<- TRUE
        lret
      }
    })
    indexdim <- sapply(index,poslength)
    nreturn <- as.integer(prod(indexdim))
    simple <- FALSE
    if (all(indexdim != 1))
      drop <- FALSE
  }else{
    stop("wrong number of dimensions")
  }

  nc <- na.count(x)
  if (is.null(fflev))
    value <- as.vmode(value, vmode)
  else
    value <- ram2ffcode(value, fflev, vmode)
  nvalue <- length(value)
  if (nvalue>1){
    if (!dimorderStandard(bydim)){
      value <- vector2array(value, dim=indexdim, dimorder=bydim)
    }
    if (nreturn!=nvalue){
      rb <- nreturn%%nvalue
      if (!is.na(nc))
        new.nc <- nreturn%/%nvalue * sum(is.na(value)) + if (rb) sum(is.na(value[seq_len(rb)])) else 0L
      if (rb)
        warning("number of elements to replace is not multiple of values for replacement")
      if (ixre){
        # if possible we try to recycle on the C-side, but in case of ix or re we need to recycle already here
        value <- rep(value, length.out=nreturn)
        nvalue <- nreturn
      }
    }else{
      if (!is.na(nc))
        new.nc <- sum(is.na(value))
    }
    if (simple){
      if (!is.null(index$ix))
        value <- value[index$ix]
      else if (index$re)
        value <- rev(value)
    }else{
      if (ixre){
        value <- array(value, dim=indexdim)
        ix <- lapply(index, function(i){
                if (is.null(i$ix)){
                  if (i$re)
                    rev(seq_along(i))
                  else
                    seq_along(i)
                }else{
                  i$ix
                }
              })
        value <- do.call("[", c(list(value), ix))
      }else{
        ix <- NULL
      }
    }
  }else{
    if(nvalue<1)
      stop("no value for replacement")
    if (!is.na(nc))
      new.nc <- nreturn * is.na(value)
  }

  if (simple){
    if (add){
      if (fflen && nreturn){
        ret <- .Call(C_addgetset_vector, .ffmode[vmode], attr(x, "physical"), index, nreturn, value, PACKAGE="ff")
      }else{
        ret <- vector(mode=.rammode[vmode], length=0)
      }
    }else{
      if (fflen && nreturn){
        ret <- .Call(C_getset_vector, .ffmode[vmode], attr(x, "physical"), index, nreturn, value, PACKAGE="ff")
      }else{
        ret <- vector(mode=.rammode[vmode], length=0)
      }
    }
    ret <- unsort.hi(ret, index)
  }else{
    if (!is.null(vw))
      ffdim <- as.integer(colSums(vw))
    cumffdim <- c(1L,cumprod(ffdim[ffdimord])[-ndim])
    cumindexdim <- cumprod(indexdim)
    nreturn <- cumindexdim[ndim]
    cumindexdim <- c(1,cumindexdim[-ndim])[ffdimord]

    if (add){
      if (fflen && nreturn){
        ret <- .Call(C_addgetset_array, .ffmode[vmode], attr(x, "physical"), index[ffdimord], as.integer(indexdim[ffdimord]), as.integer(ffdim[ffdimord]), as.integer(ndim), as.integer(nreturn), as.integer(cumindexdim), as.integer(cumffdim), value, PACKAGE="ff")
      }else{
        ret <- vector(mode=.rammode[vmode], length=0)
      }
    }else{
      if (fflen && nreturn){
        ret <- .Call(C_getset_array, .ffmode[vmode], attr(x, "physical"), index[ffdimord], as.integer(indexdim[ffdimord]), as.integer(ffdim[ffdimord]), as.integer(ndim), as.integer(nreturn), as.integer(cumindexdim), as.integer(cumffdim), value, PACKAGE="ff")
      }else{
        ret <- vector(mode=.rammode[vmode], length=0)
      }
    }
    #ret <- array(ret, dim=indexdim)
    setattr(ret, "dim", indexdim)   #dim(ret) <- indexdim
    if (nvalue==1)
      ret <- unsort.ahi(ret, index, ixre)
    else
      ret <- unsort.ahi(ret, index, ixre, ix)
    do.bydim <- !dimorderStandard(bydim)
    if (do.bydim){
      ret <- array2vector(ret, dim=indexdim, dimorder=bydim)
      setattr(ret, "dim", indexdim[bydim])   #dim(ret) <- indexdim[bydim]
    }
    if (!is.null(ffdimnam)){
      dimnam <- seq_len(ndim)
      setattr(dimnam, "names", names(ffdimnam))  #names(dimnam) <- names(ffdimnam)
      dimnam <- lapply(dimnam, function(d){ffdimnam[[d]][as.integer(index[[d]])]})
      if (do.bydim)
        setattr(ret, "dimnames",  dimnam[bydim]) #dimnames(ret) <- dimnam[bydim]
      else
        setattr(ret, "dimnames",  dimnam) #dimnames(ret) <- dimnam
    }
    if (drop){
      ret <- drop(ret)
      if (length(dim(ret))==1)
        ret <- as.vector(ret)
    }
  }
  if (!is.na(nc)){
    old.nc <- sum(is.na(ret))
    na.count(x) <- nc - old.nc + new.nc
  }
    
  if (!is.null(fflev)){
    if (.vunsigned[vmode])
      ret <- ret + 1L
    setattr(ret, "levels", fflev) #levels(ret) <- fflev
  }
  ramattribs <- attr(attr(x, "virtual"), "ramattribs")
  if (!is.null(ramattribs)){
    setattributes(ret, ramattribs)  #attributes(ret) <- c(attributes(ret), ramattribs)
  }
  setattr(ret, "class",attr(attr(x, "virtual"), "ramclass"))  #class(ret) <- attr(attr(x, "virtual"), "ramclass")
  .vset[[vmode]](ret)
  return(ret)
}


"[.ff_array" <- function(
  x
, ...
, bydim = NULL
, drop  = getOption("ffdrop")
, pack  = FALSE
)
{
	open(x, assert=TRUE)
  vmode <- vmode(x)
  vw <- vw(x)
  fflen <- length(x)
  fflev <- levels(x)
  ffdim <- dim(x)
  ffdimord <- dimorder(x)
  ffdimnam <- dimnames(x)
  ndim <- length(ffdim)
  arguments <- match.call(expand.dots=FALSE)[["..."]]
  narguments <- length(arguments)
  if (narguments==0 || (narguments==1 && is.language(arguments[[1]]) && arguments[[1]]=="")){  # ad[]
    index <- lapply(seq_len(ndim), function(d){
      if (ffdim[[d]])
        hi(from=1, to=ffdim[[d]], maxindex=ffdim[[d]], vw=vw[,d], pack=pack)
      else
        as.hi(NULL, maxindex=ffdim[[d]], vw=vw[,d], pack=pack)
    })
    indexdim <- ffdim
    nreturn <- fflen
    simple <- FALSE
    ixre <- FALSE
    drop <- FALSE  # R always had: no subsetting no drop
  }else if (narguments==1){                                     # ad[cbind()] ad[1:n] 1d[1:n]
    index <- as.hi(arguments[[1]], maxindex=fflen, vw=vw, dim=ffdim, dimorder=ffdimord, pack=pack, envir=parent.frame())
    nreturn <- poslength(index)
    ixre <- !is.null(index$ix) || index$re
    if (!is.null(index$dim) || ndim>1){ # ad[cbind()] ad[1:n]
      indexdim <- NULL
      simple <- TRUE
    }else{ # 1d[1:n]
      index <- list(index)
      indexdim <- nreturn
      simple <- FALSE
    }
  }else if (narguments==ndim){
    ixre <- FALSE
    envir <- parent.frame()
    index <- lapply(seq_len(ndim), function(d){
      if (is.language(arguments[[d]]) && arguments[[d]]==""){
        if (ffdim[[d]])
          hi(from=1, to=ffdim[[d]], maxindex=ffdim[[d]], vw=vw[,d], pack=pack)
        else
          as.hi(NULL, maxindex=ffdim[[d]], vw=vw[,d], pack=pack)
      }else{
        lret <- as.hi(arguments[[d]], maxindex=ffdim[[d]], vw=vw[,d], pack=pack, names=ffdimnam[[d]], envir=envir)
        if (!is.null(lret$ix) || lret$re)
          ixre <<- TRUE
        lret
      }
    })
    indexdim <- sapply(index,poslength)
    nreturn <- as.integer(prod(indexdim))
    simple <- FALSE
    if (all(indexdim != 1))
      drop <- FALSE
  }else{
    stop("wrong number of dimensions")
  }
  if (simple){
    if (fflen && nreturn){
      ret <- .Call(C_get_vector, .ffmode[vmode], attr(x, "physical"), index, nreturn, PACKAGE="ff")
      ret <- unsort.hi(ret, index)
    }else{
      ret <- vector(mode=.rammode[vmode], length=0)
    }
  }else{
    if (!is.null(vw))
      ffdim <- as.integer(colSums(vw))
    cumffdim <- c(1,cumprod(ffdim[ffdimord])[-ndim])
    cumindexdim <- cumprod(indexdim)
    nreturn <- cumindexdim[ndim]
    cumindexdim <- c(1,cumindexdim[-ndim])[ffdimord]

    if (fflen && nreturn){
      ret <- .Call(C_get_array, .ffmode[vmode], attr(x, "physical"), index[ffdimord], as.integer(indexdim[ffdimord]), as.integer(ffdim[ffdimord]), as.integer(ndim), as.integer(nreturn), as.integer(cumindexdim), as.integer(cumffdim), PACKAGE="ff")
    }else{
      ret <- vector(mode=.rammode[vmode], length=0)
    }
    #ret <- array(ret, dim=indexdim)
    setattr(ret, "dim", indexdim) #dim(ret) <- indexdim
    ret <- unsort.ahi(ret, index, ixre)
    do.bydim <- !dimorderStandard(bydim)
    if (do.bydim){
      ret <- array2vector(ret, dim=indexdim, dimorder=bydim)
      setattr(ret, "dim", indexdim[bydim]) #dim(ret) <- indexdim[bydim]
    }
    if (!is.null(ffdimnam)){
      dimnam <- seq_len(ndim)
      #names(dimnam) <- names(ffdimnam)
      setattr(dimnam, "names", names(ffdimnam))
      dimnam <- lapply(dimnam, function(d){ffdimnam[[d]][as.integer(index[[d]])]})
      if (do.bydim)
        setattr(ret, "dimnames", dimnam[bydim]) #dimnames(ret) <- dimnam[bydim]
      else
        setattr(ret, "dimnames", dimnam) #dimnames(ret) <- dimnam
    }
    if (drop){
      ret <- drop(ret)
      if (length(dim(ret))==1)
        ret <- as.vector(ret)
    }

  }
  if (!is.null(fflev)){
    if (.vunsigned[vmode])
      ret <- ret + 1L
    setattr(ret, "levels", fflev) #levels(ret) <- fflev
  }
  ramattribs <- attr(attr(x, "virtual"), "ramattribs")
  if (!is.null(ramattribs)){
    setattributes(ret, ramattribs)  #attributes(ret) <- c(attributes(ret), ramattribs)
  }
  setattr(ret, "class",attr(attr(x, "virtual"), "ramclass"))  #class(ret) <- attr(attr(x, "virtual"), "ramclass")
  .vset[[vmode]](ret)
  return(ret)
}


"[<-.ff_array" <- function(
  x
, ...
, bydim = NULL
, add   = FALSE
, pack  = FALSE
, value
)
{
	open(x, assert=TRUE)
  if( is.readonly(x) ) stop("ff is readonly")
  physical <- physical(x)
  if(!is.null(physical$na.count)) stop("use swap instead to maintain na.count (or deactivate na.count(x)<-NULL for assigning)")
  if(!is.null(physical$is.sorted)) stop("deactivate is.sorted(x)<-NULL for assigning)")

  vmode <- vmode(x)
  vw <- vw(x)
  fflen <- length(x)
  fflev <- levels(x)
  ffdim <- dim(x)
  ffdimord <- dimorder(x)
  ffdimnam <- dimnames(x)
  ndim <- length(ffdim)

  arguments <- match.call(expand.dots=FALSE)[["..."]]
  narguments <- length(arguments)
  if (narguments==0 || (narguments==1 && is.language(arguments[[1]]) && arguments[[1]]=="")){  # ad[]
    index <- lapply(seq_len(ndim), function(d){
      if (ffdim[[d]])
        hi(from=1, to=ffdim[[d]], maxindex=ffdim[[d]], vw=vw[,d], pack=pack)
      else
        as.hi(NULL, maxindex=ffdim[[d]], vw=vw[,d], pack=pack)
    })
    indexdim <- ffdim
    nreturn <- fflen
    simple <- FALSE
    ixre <- FALSE
  }else if (narguments==1){                                     # ad[cbind()] ad[1:n] 1d[1:n]
    index <- as.hi(arguments[[1]], maxindex=fflen, vw=vw, dim=ffdim, dimorder=ffdimord, pack=pack, envir=parent.frame())
    nreturn <- poslength(index)
    ixre <- !is.null(index$ix) || index$re
    if (!is.null(index$dim) || ndim>1){ # ad[cbind()] ad[1:n]
      indexdim <- NULL
      simple <- TRUE
    }else{ # 1d[1:n]
      index <- list(index)
      indexdim <- nreturn
      simple <- FALSE
    }
  }else if (narguments==ndim){
    ixre <- FALSE
    envir <- parent.frame()
    index <- lapply(seq_len(ndim), function(d){
      if (is.language(arguments[[d]]) && arguments[[d]]==""){
        if (ffdim[[d]])
          hi(from=1, to=ffdim[[d]], maxindex=ffdim[[d]], vw=vw[,d], pack=pack)
        else
          as.hi(NULL, maxindex=ffdim[[d]], vw=vw[,d], pack=pack)
      }else{
        lret <- as.hi(arguments[[d]], maxindex=ffdim[[d]], vw=vw[,d], pack=pack, names=ffdimnam[[d]], envir=envir)
        if (!is.null(lret$ix) || lret$re)
          ixre <<- TRUE
        lret
      }
    })
    indexdim <- sapply(index,poslength)
    nreturn <- as.integer(prod(indexdim))
    simple <- FALSE
  }else{
    stop("wrong number of dimensions")
  }

  if (fflen && nreturn){
    if (is.null(fflev))
      value <- as.vmode(value, vmode)
    else
      value <- ram2ffcode(value, fflev, vmode)
    nvalue <- length(value)
    if (nvalue>1){
      if (!dimorderStandard(bydim)){
        value <- vector2array(value, dim=indexdim, dimorder=bydim)
      }
      if (nreturn!=nvalue){
        if (nreturn%%nvalue)
          warning("number of elements to replace is not multiple of values for replacement")
        if (ixre){
          # if possible we try to recycle on the C-side, but in case of ix or re we need to recycle already here
          value <- rep(value, length.out=nreturn)
          nvalue <- nreturn
        }
      }
      if (simple){
        if (!is.null(index$ix))
          value <- value[index$ix]
        else if (index$re)
          value <- rev(value)
      }else{
        if (ixre){
          value <- array(value, dim=indexdim)
          ix <- lapply(index, function(i){
                  if (is.null(i$ix)){
                    if (i$re)
                      rev(seq_along(i))
                    else
                      seq_along(i)
                  }else{
                    i$ix
                  }
                })
          value <- do.call("[", c(list(value), ix))
        }else{
          ix <- NULL
        }
      }
    }else if(nvalue<1){
      stop("no value for replacement")
    }
    if (simple){
      if (add)
        attr(x, "physical") <- .Call(C_addset_vector, .ffmode[vmode], attr(x, "physical"), index, nreturn, value, PACKAGE="ff")
      else
        attr(x, "physical") <- .Call(C_set_vector, .ffmode[vmode], attr(x, "physical"), index, nreturn, value, PACKAGE="ff")
    }else{
      if (!is.null(vw))
        ffdim <- as.integer(colSums(vw))
      cumffdim <- c(1,cumprod(ffdim[ffdimord])[-ndim])
      cumindexdim <- cumprod(indexdim)
      nreturn <- cumindexdim[ndim]
      cumindexdim <- c(1,cumindexdim[-ndim])[ffdimord]

      if (add)
        attr(x, "physical") <- .Call(C_addset_array, .ffmode[vmode], attr(x, "physical"), index[ffdimord], as.integer(indexdim[ffdimord]), as.integer(ffdim[ffdimord]), as.integer(ndim), as.integer(nreturn), as.integer(cumindexdim), as.integer(cumffdim), value, PACKAGE="ff")
      else
        attr(x, "physical") <- .Call(C_set_array, .ffmode[vmode], attr(x, "physical"), index[ffdimord], as.integer(indexdim[ffdimord]), as.integer(ffdim[ffdimord]), as.integer(ndim), as.integer(nreturn), as.integer(cumindexdim), as.integer(cumffdim), value, PACKAGE="ff")
    }
  }

  return(x)
}

#! \name{add}
#! \alias{add}
#! \alias{add.ff}
#! \alias{add.default}
#! \title{ Incrementing an ff or ram object }
#! \description{
#!   Yet another assignment interface in order to allow to formulate \code{x[index,...,add=TRUE]<-value}
#!   in a way which works transparently, not only for ff, but also for ram objects: \code{add(x, value, index, ...)}.
#! }
#! \usage{
#! add(x, \dots)
#! \method{add}{ff}(x, value, \dots)
#! \method{add}{default}(x, value, \dots)
#! }
#! \arguments{
#!   \item{x}{ an ff or ram object }
#!   \item{value}{ the amount to increment, possibly recylcled }
#!   \item{\dots}{ further arguments -- especially index information -- passed to \code{\link{[<-}} or \code{\link[=Extract.ff]{[<-.ff}} }
#! }
#! \value{
#!   invisible()
#! }
#! \author{ Jens Oehlschlägel }
#! \note{
#!   Note that \code{add.default} changes the object in its parent frame and thus violates R's usual functional programming logic.
#!   Duplicated index positions should be avoided, because ff and ram objects behave differently:
#!   \preformatted{
#!   add.ff(x, 1, c(3,3))
#!   # will increment x at position 3 TWICE by 1, while
#!   add.default(x, 1, c(3,3))
#!   # will increment x at position 3 just ONCE by 1
#!   }
#! }
#! \seealso{ \code{\link{swap}}, \code{\link{[.ff}}, \code{\link{LimWarn}} }
#! \examples{
#!    message("incrementing parts of a vector")
#!    x <- ff(0, length=12)
#!    y <- rep(0, 12)
#!    add(x, 1, 1:6)
#!    add(y, 1, 1:6)
#!    x
#!    y
#!
#!    message("incrementing parts of a matrix")
#!    x <- ff(0, dim=3:4)
#!    y <- array(0, dim=3:4)
#!    add(x, 1, 1:2, 1:2)
#!    add(y, 1, 1:2, 1:2)
#!    x
#!    y
#!
#!    message("BEWARE that ff and ram methods differ in treatment of duplicated index positions")
#!    add(x, 1, c(3,3))
#!    add(y, 1, c(3,3))
#!    x
#!    y
#!
#!    rm(x); gc()
#! }
#! \keyword{ IO }
#! \keyword{ data }

# for compatibility with the comatibility generic 'add' which unifies += (almost) for ff and ram
add.ff <- function(
  x
, value
, ... # dummy to keep R CMD check quiet
){
  cl <- match.call(expand.dots=TRUE)
  valueterm <- cl$value
  cl[[1]] <- as.symbol("[")
  cl$value <- NULL
  cl$add <- TRUE
  assigncall <- call("<-", cl, valueterm)
  eval.parent(assigncall)
  invisible()
}
add.default <- function(
  x
, value
, ... # dummy to keep R CMD check quiet
){
  cl <- match.call(expand.dots=TRUE)
  valueterm <- cl$value
  cl[[1]] <- as.symbol("[")
  names(cl)[2] <- ""
  cl$value <- NULL
  cl$add <- NULL
  assigncall <- call("<-", cl, call("+", cl, valueterm))
  eval.parent(assigncall)
  invisible()
}



# Attention: changes its argument x
# add=FALSE writes value and returns oldvalue (BEFORE overwriting)
# add=TRUE  writes and returns oldvalue+value (AFTER overwriting)
swap.default <- function(
  x
, value
, ... # dummy to keep R CMD check quiet
, add=FALSE
){
  cl <- match.call(expand.dots=TRUE)
  valueterm <- cl$value
  cl[[1]] <- as.symbol("[")
  names(cl)[2] <- ""
  cl$value <- NULL
  cl$add <- NULL
  if (add){
    assigncall <- call("<-", cl, call("+", cl, valueterm))
    eval.parent(assigncall)
    ret <- eval.parent(cl)
  }else{
    assigncall <- call("<-", cl, value=valueterm)
    ret <- eval.parent(cl)
    eval.parent(assigncall)
  }
  #since we do not enforce the semantics of vmodes (overflow in quads etc.)
  #we do not restore the vmode:
  #setattr(ret, 'vmode', vmode(x))
  ret
}


#! \name{LimWarn}
#! \alias{LimWarn}
#! \title{ ff Limitations and Warnings }
#! \description{
#!   This help page lists the currently known limitations of package ff,
#!   as well as differences between ff and ram methods.
#! }
#! \section{Automatic file removal}{
#!   Remind that not giving parameter \code{ff(filename=)} will result in a temporary file in \code{fftempdir} with 'delete' finalizer,
#!   while giving parameter \code{ff(filename=)} will result in a permanent file with 'close' finalizer.
#!   Do avoid setting \code{setwd(getOption("fftempdir"))}!
#!   Make sure you really understand the implications of automatic unlinking of getOption("fftempdir") \code{\link{.onUnload}},
#!   of finalizer choice and of finalizing behaviour at the end of R sessions as defaulted in getOption("fffinonexit").
#!   \bold{Otherwise you might experience 'unexpected' losses of files and data.}
#! }
#! \section{Size of objects}{
#!   ff objects can have length zero and are limited to \code{.Machine$integer.max} elements. We have not yet ported the R code to support 64bit double indices (in essence 52 bits integer) although the C++ back-end has been prepared for this.
#!   Furthermore filesize limitations of the OS apply, see \code{\link{ff}}.
#! }
#! \section{Side effects}{
#!   In contrast to standard R expressions, ff expressions violate the functional programming logic and are called for their side effects.
#!   This is also true for ram compatibility functions \code{\link{swap.default}}, and \code{\link{add.default}}.
#! }
#! \section{Hybrid copying semantics}{
#!   If you modify a copy of an ff object, changes of data (\code{\link[=Extract.ff]{[<-}}) and of \code{\link[=Extract.ff]{physical}} attributes
#!   will be shared, but changes in \code{\link[=Extract.ff]{virtual}} and class attributes will not.
#! }
#! \section{Limits of compatibility between ff and ram objects}{
#!   If it's not too big, you can move an ff object completely into R's RAM through \code{\link{as.ram}}.
#!   However, you should watch out for three limitations:
#!   \enumerate{
#!     \item Ram objects don't have hybrid copying semantics; changes to a copy of a ram object will never change the original ram object
#!     \item Assigning values to a ram object can easily upgrade to a higher \code{\link{storage.mode}}. This will create conflicts with the
#!           \code{\link{vmode}} of the ram object, which goes undetected until you try to write back to disk through \code{\link{as.ff}}.
#!     \item Writing back to disk with \code{\link{as.ff}} under the same filename requires that the original ff object has been deleted
#!           (or at least closed if you specify parameter \code{overwrite=TRUE}).
#!   }
#! }
#! \section{Index expressions}{
#!   ff index expressions do not allow zeros and NAs, see see \code{\link{[.ff}} and see \code{\link{as.hi}}
#! }
#! \section{Availablility of bydim parameter}{
#!   Parameter \code{bydim} is only available in ff access methods, see \code{\link{[.ff}}
#! }
#! \section{Availablility of add parameter}{
#!   Parameter \code{add} is only available in ff access methods, see \code{\link{[.ff}}
#! }
#! \section{Compatibility of swap and add}{
#!   If index expressions contain duplicated positions, the ff and ram methods for \code{\link{swap}}
#!   and \code{\link{add}} will behave differently, see \code{\link{swap}}.
#! }
#! \section{Definition of [[ and [[<-}{
#!   You should consider the behaviour of \code{\link{[[.ff}} and 
#!   \code{\link{[[<-.ff}} as undefined and not use them in programming.
#!   Currently they are shortcuts to \code{\link{get.ff}} and \code{\link{set.ff}}, 
#!   which unlike \code{\link[=Extract.ff]{[.ff}} and \code{\link[=Extract.ff]{[<-.ff}}
#!   do not support \code{\link{factor}} and \code{\link{POSIXct}}, 
#!   nor \code{\link{dimorder}} or virtual windows \code{\link{vw}}.
#!   In contrast to the standard methods, \code{\link{[[.ff}} and 
#!   \code{\link{[[<-.ff}} only accepts positive integer index positions.
#!   The definition of \code{\link{[[.ff}} and \code{\link{[[<-.ff}} may be 
#!   changed in the future.
#! }
#! \section{Multiple vector interpretation in arrays }{
#!   R objects have always standard \code{\link{dimorder} seq_along(dim)}.
#!   In case of non-standard dimorder (see \code{\link{dimorderStandard}})
#!   the vector sequence of array elements in R and in the ff file differs.
#!   To access array elements in file order, you can use \code{\link{getset.ff}}, \code{\link{readwrite.ff}}
#!   or copy the ff object and set \code{dim(ff)<-NULL} to get a vector view into the ff object
#!   (using \code{[} dispatches the vector method \code{\link{[.ff}}).
#!   To access the array elements in R standard dimorder you simply use \code{[} which dispatches
#!   to \code{\link{[.ff_array}}. Note that in this case \code{\link{as.hi}} will unpack the complete index, see next section.
#! }
#! \section{RAM expansion of index expressions}{
#!   Some index expressions do not consume RAM due to the \code{\link{hi}} representation.
#!   For example \code{1:n} will almost consume no RAM however large n.
#!   However, some index expressions are expanded and require to \code{\link[=Extract.ff]{maxindex}(i) * .rambytes["integer"]} bytes,
#!   either because the sorted sequence of index positions cannot be rle-packed efficiently
#!   or because \code{\link{hiparse}} cannot yet parse such expression and falls back to evaluating/expanding the index expression.
#!   If the index positions are not sorted, the index will be expanded and a second vector is needed to store the information for re-ordering,
#!   thus the index requires \code{2 * \link[=Extract.ff]{maxindex}(i) * .rambytes["integer"]} bytes.
#! }
#! \section{RAM expansion when recycling assigment values}{
#!   Some assignment expressions do not consume RAM for recycling. For example \code{x[1:n] <- 1:k}
#!   will not consume RAM however large is n compared to k, when x has standard \code{\link{dimorder}}.
#!   However, if \code{length(value)>1}, assignment expressions with non-ascending index positions trigger recycling the value R-side to the full index length.
#!   This will happen if \code{\link{dimorder}} does not match parameter \code{bydim} or if the index is not sorted in ascending order.
#! }
#! \section{Byteorder imcompatibility}{
#!   Note that ff files cannot been transferred between systems with different byteorder.
#! }
#! \keyword{ IO }
#! \keyword{ data }
#! \keyword{ package }
truecluster/ff documentation built on Jan. 16, 2025, 5:19 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
truecluster/ff
Memory-Efficient Storage of Large Data on Disk and Fast Access Functions

R/ff.R
In truecluster/ff: Memory-Efficient Storage of Large Data on Disk and Fast Access Functions

R Package Documentation

Browse R Packages

We want your feedback!

truecluster/ff Memory-Efficient Storage of Large Data on Disk and Fast Access Functions

R/ff.R In truecluster/ff: Memory-Efficient Storage of Large Data on Disk and Fast Access Functions

R Package Documentation

Browse R Packages

We want your feedback!

truecluster/ff
Memory-Efficient Storage of Large Data on Disk and Fast Access Functions

R/ff.R
In truecluster/ff: Memory-Efficient Storage of Large Data on Disk and Fast Access Functions