R/RMVL.R

Defines functions mvl_index_lapply print.MVL_INDEX `[[.MVL_OBJECT` `[.MVL_OBJECT` mvl2R names.MVL_OBJECT length.MVL_OBJECT dim.MVL_OBJECT print.MVL_OBJECT mvl_type_name print.MVL names.MVL `$.MVL` `[.MVL` `[[.MVL_OFFSET` `[.MVL_OFFSET` mvl_add_directory_entries mvl_read_object make_mvl_object mvl_object_stats mvl_read_metadata mvl_flatten_string mvl_fused_write_objects mvl_write_serialized_object mvl_write_object mvl_inherits mvl_class mvl_write_object_metadata mvl_write_object_metadata1 mvl_merge mvl_indexed_copy mvl_extent_index_lapply mvl_write_extent_index mvl_compute_repeats mvl_group_lapply mvl_group mvl_find_matches mvl_neighbors_lapply mvl_get_neighbors mvl_write_spatial_groups mvl_write_spatial_index1 mvl_get_groups mvl_write_groups mvl_write_hash_vectors mvl_hash_vectors mvl_order_vectors mvl_write_string mvl_rewrite_vector mvl_start_write_vector mvl_fused_write_vector mvl_write_vector mvl_status mvl_xlength mvl_get_vectors mvl_get_directory mvl_close mvl_remap mvl_open

Documented in dim.MVL_OBJECT length.MVL_OBJECT mvl2R mvl_add_directory_entries mvl_class mvl_close mvl_compute_repeats mvl_extent_index_lapply mvl_find_matches mvl_fused_write_objects mvl_get_groups mvl_get_neighbors mvl_group mvl_group_lapply mvl_hash_vectors mvl_indexed_copy mvl_index_lapply mvl_inherits mvl_merge mvl_neighbors_lapply mvl_object_stats mvl_open mvl_order_vectors mvl_remap mvl_rewrite_vector mvl_start_write_vector mvl_status mvl_write_extent_index mvl_write_groups mvl_write_hash_vectors mvl_write_object mvl_write_serialized_object mvl_write_spatial_groups mvl_write_spatial_index1 mvl_xlength names.MVL names.MVL_OBJECT print.MVL print.MVL_OBJECT

#' @useDynLib RMVL , .registration=TRUE

MVL_SMALL_LENGTH<-1024

#' Open an MVL file
#'
#' Open an MVL format file for reading and/or writing.
#'
#' MVL stands for "Mapped vector library" and is a file format designed for efficient memory mapped access. 
#' An MVL file can be much larger than physical memory of the machine.
#'
#' \code{mvl_open} returns a handle that can be used to access MVL files. Files opened read-only are memory mapped and do not use a file descriptor, and thus are not
#' subject to limits on the number of open files.
#' Files opened for writing data do use a file descriptor.
#' Once opened for read access the data can be accessed using usual R semantics for lists, data.frames and arrays.
#'
#' @param filename  path to file.
#' @param append specify TRUE when you intend to write data into the file
#' @param create when TRUE create file if it did not exist
#' @return handle to opened MVL file
#' @seealso \code{\link{mvl_close}}, \code{\link{mvl_remap}}
#' @examples
#' \dontrun{
#' M1<-mvl_open("test1.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(M1, data.frame(x=1:2, y=rnorm(2)), "test_frame")
#' mvl_close(M1)
#'
#' M2<-mvl_open("test1.mvl")
#' print(names(M2))
#' print(M2["test_frame"])
#' mvl_close(M2)
#'
#' M3<-mvl_open("test2.mvl", append=TRUE, create=TRUE)
#' L<-list()
#' df<-data.frame(x=1:1e6, y=rnorm(1e6), s=rep(c("a", "b"), 5e5))
#' L[["x"]]<-mvl_write_object(M3, df, drop.rownames=TRUE)
#' L[["description"]]<-"Example of large data frame"
#' mvl_write_object(M3, L, "test_object")
#' mvl_close(M3)
#'
#' M4<-mvl_open("test2.mvl")
#' print(names(M4))
#' L<-M4["test_object"]
#' print(L)
#' print(L[["x"]][1:20,])
#' mvl_object_stats(L[["x"]])
#' # If you need to get the whole x, one can use mvl2R(L[["x"]])
#' mvl_close(M4)
#' }
#' @export
#'
mvl_open<-function(filename, append=FALSE, create=FALSE) {
	MVLHANDLE<-list(handle=.Call(mmap_library, path.expand(as.character(filename)), as.integer(ifelse(append, 1, 0)+ifelse(create, 2, 0))))
	class(MVLHANDLE)<-"MVL"
	MVLHANDLE[["directory"]]<-mvl_get_directory(MVLHANDLE)
	return(MVLHANDLE)
	}

	
#' Enlarge memory map to include recently loaded data.
#'
#' This function operates on MVL files opened for writing. When writing new data to the MVL file that data is appended at the end and past the end of previously mapped data. 
#' Calling \code{mvl_remap()} updates the memory mapping to include all the data written before mvl_remap() was called.
#' The MVL file directory is also updated to include recently added entries.  Old handles can still be used, but will not include updated directory information.
#' MVL_OBJECT's previously obtained from this handle continue to be valid.
#'
#'
#'
#' \code{mvl_remap} returns a handle with updated directory.
#'
#' @param MVLHANDLE handle to opened MVL file as generated by \code{mvl_open()} or \code{mvl_remap()}
#' @param append specify FALSE when you do not intend to write the file.
#' @return handle to MVL file, with updated directory.
#' @seealso \code{\link{mvl_open}}, \code{\link{mvl_close}}
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, runif(100), "vec1")
#' Mtmp<-mvl_remap(Mtmp)
#' print(Mtmp["vec1"])
#' }
#' @export
#'
mvl_remap<-function(MVLHANDLE, append=TRUE) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	.Call(remap_library, MVLHANDLE[["handle"]], as.integer(ifelse(append, 1, 0)))
	MVLHANDLE[["directory"]]<-mvl_get_directory(MVLHANDLE)
	return(MVLHANDLE)
	}
	
	
#' Close MVL file
#'
#' Closes MVL file releasing all resources.
#
#' For read-only files the memory is unmapped, reducing the virtual memory footprint.
#' For files opened for writing the directory is written out, so it is important to call \code{mvl_close} or the newly written file will be corrupt.
#' After \code{mvl_close()} all previously obtained MVL_OBJECT's with this handle become invalid.
#'
#' @param MVLHANDLE handle to opened MVL file as generated by mvl_open()
#' @return None
#' @seealso \code{\link{mvl_open}}, \code{\link{mvl_remap}}
#' @export
#'
mvl_close<-function(MVLHANDLE) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	.Call(close_library, unclass(MVLHANDLE)[["handle"]])

	return(invisible(NULL))
	}
	
mvl_get_directory<-function(MVLHANDLE) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	return(.Call(get_directory, unclass(MVLHANDLE)[["handle"]]))
	}

mvl_get_vectors<-function(MVLHANDLE, offsets, raw=FALSE) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	if(!inherits(offsets, "MVL_OFFSET"))stop("not an MVL offset")
	if(raw)
		return(.Call(read_vectors_raw, unclass(MVLHANDLE)[["handle"]], offsets))
		else
		return(.Call(read_vectors, unclass(MVLHANDLE)[["handle"]], offsets))
	}
#' Return length of MVL or R vector as a numeric value
#' 
#' Internally this calls R function xlength() rather than length(). This allows to obtain length of larger vectors. For MVL vectors this returns the length of the vector.
#' @param x  any R object
#' @return length of object as as numeric value
#' @export
#'
mvl_xlength<-function(x) {
	if(inherits(x, "MVL_OBJECT"))return(unclass(x)[["length"]])
	return(.Call(mvl_xlength_int, x))
	}
	
	
#' Return status of MVL package
#'
#' @return list of status values
#' @export
mvl_status<-function() {
	L<-.Call(get_status)
	L[["mvl_small_length"]]<-MVL_SMALL_LENGTH
	return(L)
	}
	
mvl_write_vector<-function(MVLHANDLE, x, metadata.offset=NULL) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	if(!is.null(metadata.offset) && !inherits(metadata.offset, "MVL_OFFSET"))stop("not an MVL offset")
	if(inherits(x, "factor"))x<-as.character(x)
	type<-attr(x, "MVL_TYPE", exact=TRUE)
	if(is.null(type)) {
		if(inherits(x, "MVL_OBJECT")) {
			type<-unclass(x)[["type"]]
			if(type==100)stop("Only vector like MVL_OBJECTs are supported")
			if(type==102)type<-10000
			if(type>0)
				return(.Call(fused_write_vector, unclass(MVLHANDLE)[["handle"]], as.integer(type), list(x), metadata.offset)) 
				else 
				stop("Malformed MVL_OBJECT")
			} else {
			if(inherits(x, "MVL_OFFSET")) type<-100
				else
				type<-switch(typeof(x), double=5, integer=2, character=10000, logical=1, raw=1, -1)
			}
		}
	if(type>0) {
		return(.Call(write_vector, unclass(MVLHANDLE)[["handle"]], as.integer(type), x, metadata.offset)) 
		}
	stop("Could not write vector with class ", class(x))
	}

mvl_fused_write_vector<-function(MVLHANDLE, L, metadata.offset=NULL) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	if(!is.null(metadata.offset) && !inherits(metadata.offset, "MVL_OFFSET"))stop("not an MVL offset")
	
	if(length(L)>0) {
		for(i in 1:length(L)) {
			if(inherits(L[[i]], "factor"))L[[i]]<-as.character(L[[i]])
			}
		type<-attr(L[[1]], "MVL_TYPE", exact=TRUE)
		if(is.null(type) && inherits(L[[1]], "MVL_OBJECT")) {
			type<-mvl_object_stats(L[[1]])[["type"]]
			if(type==102)type<-10000
			}
		if(is.null(type)) {
			# TODO: for now this is meant to work with primitive vectors
			if(inherits(L[[1]], "MVL_OFFSET")) type<-100
				else
				type<-switch(typeof(L[[1]]), double=5, integer=2, character=10000, logical=1, raw=1, -1)
			}
		} else {
		type<-5
		}
	if(type>0) {
		return(.Call(fused_write_vector, unclass(MVLHANDLE)[["handle"]], as.integer(type), L, metadata.offset)) 
		}
	stop("Could not write vector")
	}

#' Piecewise output of very long numeric and integer vectors
#'
#' While \code{mvl_fused_write_objects} can be used to create very large vectors and data frames of arbitrary type, it requires
#' piecewise data to be written first into an MVL file. Functions \code{mvl_start_write_vector()} and \code{mvl_rewrite_vector()} provide a way to create very long vectors in one pass.
#' Only numeric and integer vectors are supported.
#'
#' One convenient use is to compute \code{f(x,y,z,...)} with very long vector arguments by iterating over indices. The iteration can be done using fixed blocks of indices, or by using groups of indices computed with other MVL functions. 
#'
#' It is generally recommended to call \code{mvl_rewrite_vector()} with large blocks to improve I/O performance and reduce number of writes to underlying media.
#'
#' @param MVLHANDLE handle to opened MVL file as generated by mvl_open()
#' @param x an integer or numeric vector
#' @param expected.length the length of vector to create. Use double to pass large values
#' @param name if specified add a named entry to MVL file directory
#' 
#' @seealso mvl_fused_write_objects
#'
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' offset<-mvl_start_write_object(Mtmp, runif(10), expected.length=1000, "vec1")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_rewrite_vector(Mtmp[offset], 50, rnorm(20))
#' }
#' 
#'
#' @export
mvl_start_write_vector<-function(MVLHANDLE, x, expected.length=NULL, name=NULL) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	metadata.offset<-mvl_write_object_metadata(MVLHANDLE, x, drop.rownames=TRUE)
	
	if(inherits(x, "factor"))x<-as.character(x)
	type<-attr(x, "MVL_TYPE", exact=TRUE)
	if(is.null(type)) {
		if(inherits(x, "MVL_OBJECT")) {
			stop("start_write_vector with MVL_OBJECTS is not supported yet")
			type<-unclass(x)[["type"]]
			if(type==100)stop("Only vector like MVL_OBJECTs are supported")
			if(type==102)type<-10000
			if(type>0)
				return(.Call(fused_write_vector, unclass(MVLHANDLE)[["handle"]], as.integer(type), list(x), metadata.offset)) 
				else 
				stop("Malformed MVL_OBJECT")
			} else {
			if(inherits(x, "MVL_OFFSET")) type<-100
				else
				type<-switch(typeof(x), double=5, integer=2, character=10000, logical=1, raw=1, -1)
			}
		}
	if(type>0) {
		offset<-.Call(start_write_vector, unclass(MVLHANDLE)[["handle"]], as.integer(type), expected.length, x, metadata.offset)
		if(!is.null(name))mvl_add_directory_entries(MVLHANDLE, name, offset)	
		return(invisible(offset))
		}
	stop("Could not write vector with class ", class(x))
	}

#' @param obj an MVL vector object to modify
#' @param offset the offset into MVL vector (starting with 1) to write x
#' @param x an integer or numeric vector
#' @rdname mvl_start_write_vector
#' @export
mvl_rewrite_vector<-function(obj, offset, x) {
	return(.Call(rewrite_vector, obj, offset, x))
	}
	
mvl_write_string<-function(MVLHANDLE, x, metadata.offset=NULL) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	if(!is.null(metadata.offset) && !inherits(metadata.offset, "MVL_OFFSET"))stop("not an MVL offset")
	x<-as.character(x)
	if(length(x)!=1)stop("requires a single string as argument")
	return(.Call(write_vector, unclass(MVLHANDLE)[["handle"]], as.integer(10001), x, metadata.offset)) 
	}

#' Return permutation sorting vector entries
#'
#' This function is similar to R  order() function, but operates on MVL_OBJECTS. 
#'
#' @param L  list of vector like MVL_OBJECTs 
#' @param indices  list of indices into objects to sort. If absent or NULL it is assumed to be from 1 to length of the object.
#' @param decreasing whether to sort in ascending or decreasing order. This parameter is provided for compatibility with \code{order()} function
#' @param sort_function specifies desired sort order
#' @return sorted indices
#' @seealso \code{\link{mvl_hash_vectors}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_group}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_indexed_copy}}, \code{\link{mvl_merge}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, runif(100), "vec1")
#' Mtmp<-mvl_remap(Mtmp)
#' permutation1<-mvl_order_vectors(list(Mtmp["vec1", ref=TRUE]))
#' }
#' @export
#'
mvl_order_vectors<-function(L, indices=NULL, decreasing=FALSE, sort_function=ifelse(decreasing, 2, 1)) {
	return(.Call(order_vectors, L, indices, as.integer(sort_function))) 
	}

#' Return hash values for each row
#'
#' This function is passed a list of MVL vectors which are interpreted in data.frame fashion. For each row, i.e. set of vector values with the same index
#' we compute a hash value. Identical rows produce identical hash values. The hash values have good entropy and can be used to map row values into random numbers.
#'
#' @param L  list of vector like MVL_OBJECTs 
#' @param indices  list of indices into objects to sort. If absent or NULL it is assumed to be from 1 to the length of the object.
#' @return hash values in numeric format, with 52 valid bits. Each value is uniform between 1 and 2.
#' @seealso \code{\link{mvl_order_vectors}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_group}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_indexed_copy}}, \code{\link{mvl_merge}}, \code{\link{mvl_write_hash_vectors}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, runif(100), "vec1")
#' Mtmp<-mvl_remap(Mtmp)
#' hash1<-mvl_hash_vectors(list(Mtmp["vec1", ref=TRUE]))
#' }
#' @export
#'
mvl_hash_vectors<-function(L, indices=NULL) {
	return(.Call(hash_vectors, L, indices)) 
	}

#' Write hash values for each row
#'
#' This function is passed a list of MVL vectors which are interpreted in data.frame fashion. For each row, i.e. set of vector values with the same index
#' we compute a 64-bit hash value. Identical rows produce identical hash values. The hash values are written into 64-bit integer vector. 
#' This function is meant for use with data that is too large to handle comfortably.
#'
#' @param MVLHANDLE a handle to MVL file produced by mvl_open()
#' @param L  list of vector like MVL_OBJECTs 
#' @param name if specified add a named entry to MVL file directory
#' @return an object of class MVL_OFFSET that describes an offset into this MVL file. MVL offsets are vectors and can be concatenated. They can be written to MVL file directly, or as part of another object such as list.
#' @seealso \code{\link{mvl_order_vectors}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_group}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_indexed_copy}}, \code{\link{mvl_merge}}, \code{\link{mvl_hash_vectors}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, runif(100), "vec1")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_write_hash_vectors(Mtmp, list(Mtmp["vec1", ref=TRUE]), "vec1_hash")
#' Mtmp<-mvl_remap(Mtmp)
#' print(length(Mtmp["vec1_hash"]))
#' }
#' @export
#'
mvl_write_hash_vectors<-function(MVLHANDLE, L, name=NULL) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	offset<-.Call(write_hash_vectors, unclass(MVLHANDLE)[["handle"]], L)
	if(!is.null(name))mvl_add_directory_entries(MVLHANDLE, name, offset)	
	return(invisible(offset))
	}

#' Write group information for each row
#'
#' This function is passed a list of MVL vectors which are interpreted in data.frame fashion. These rows 
#' are split into groups so that identical rows are guaranteed to belong to the same group. This is done internally based on 20-bit hash values.
#' This function is convenient to use as a way to partition very large datasets before applying \code{mvl_group} or \code{mvl_find_matches}. 
#' The groups can be obtained by using \code{mvl_get_groups}
#'
#' @param MVLHANDLE a handle to MVL file produced by mvl_open()
#' @param L  list of vector like MVL_OBJECTs 
#' @param name if specified add a named entry to MVL file directory
#' @return an object of class MVL_OFFSET that describes an offset into this MVL file. MVL offsets are vectors and can be concatenated. They can be written to MVL file directly, or as part of another object such as list.
#' @seealso \code{\link{mvl_order_vectors}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_group}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_indexed_copy}}, \code{\link{mvl_merge}}, \code{\link{mvl_hash_vectors}}, \code{\link{mvl_get_groups}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, data.frame(x=runif(100), y=1:100), "df1")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_write_groups(Mtmp, list(Mtmp$df1[,"x",ref=TRUE], Mtmp$df1[,"y", ref=TRUE]), "df1_groups")
#' Mtmp<-mvl_remap(Mtmp)
#' print(mvl_get_groups(Mtmp["df1_groups", ref=TRUE]["prev", ref=TRUE], Mtmp$df1_groups$first[1:5]))
#' }
#' @export
#'
mvl_write_groups<-function(MVLHANDLE, L, name=NULL) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	offset<-.Call(write_groups, MVLHANDLE[["handle"]], L)
	if(!is.null(name))mvl_add_directory_entries(MVLHANDLE, name, offset)	
	return(invisible(offset))
	}

#' Retrieve indices belonging to one or more groups
#'
#' This function is passed the \code{prev} vector computed by \code{mvl_write_groups} and one or more indices from the \code{first} vector.
#'
#' @param prev  MVL_OBJECT \code{prev} computed by \code{mvl_write_groups} 
#' @param first_indices  indices from \code{first} vector computed by \code{mvl_write_groups} 
#' @return a vector of indices
#' @seealso \code{\link{mvl_group}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, data.frame(x=runif(100), y=1:100), "df1")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_write_groups(Mtmp, list(Mtmp$df1[,"x",ref=TRUE], Mtmp$df1[,"y", ref=TRUE]), "df1_groups")
#' Mtmp<-mvl_remap(Mtmp)
#' print(mvl_get_groups(Mtmp["df1_groups", ref=TRUE]["prev", ref=TRUE], Mtmp$df1_groups$first[1:5]))
#' }
#' @export
#'
mvl_get_groups<-function(prev, first_indices) {
	return(.Call(get_groups, prev, first_indices))
	}

#' Write spatial group information for each row
#'
#' This function is passed a list of MVL vectors which are interpreted in data.frame fashion. These rows 
#' are split into groups so that identical rows are guaranteed to belong to the same group. This is done using partition into equal sized bins.
#' This function is meant for constructing spatial indexes.
#'
#' @param MVLHANDLE a handle to MVL file produced by mvl_open()
#' @param L  list of vector like MVL_OBJECTs 
#' @param bits a vector of bit values to use for each member of L
#' @param name if specified add a named entry to MVL file directory
#' @return an object of class MVL_OFFSET that describes an offset into this MVL file. MVL offsets are vectors and can be concatenated. They can be written to MVL file directly, or as part of another object such as list.
#' @seealso \code{\link{mvl_order_vectors}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_group}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_indexed_copy}}, \code{\link{mvl_merge}}, \code{\link{mvl_hash_vectors}}, \code{\link{mvl_get_groups}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, data.frame(x=runif(100), y=1:100), "df1")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_write_spatial_index1(Mtmp, list(Mtmp$df1[,"x",ref=TRUE], Mtmp$df1[,"y", ref=TRUE]),
#'                                                              c(2, 3), "df1_sp_groups")
#' Mtmp<-mvl_remap(Mtmp)
#' print(mvl_get_neighbors(Mtmp["df1_sp_groups", ref=TRUE], list(c(0.5, 0.6), c(2, 3))))
#' }
#' @export
#'
mvl_write_spatial_index1<-function(MVLHANDLE, L, bits, name=NULL) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	if(length(bits)==1)bits<-rep(bits, length(L))
	offset<-.Call(write_spatial_groups, unclass(MVLHANDLE)[["handle"]], L, as.integer(bits))
	if(!is.null(name))mvl_add_directory_entries(MVLHANDLE, name, offset)	
	return(invisible(offset))
	}

#' Write spatial group information for each row
#'
#' Please use mvl_write_spatial_index1() instead.
#'
#' @param MVLHANDLE a handle to MVL file produced by mvl_open()
#' @param L  list of vector like MVL_OBJECTs 
#' @param bits a vector of bit values to use for each member of L
#' @param name if specified add a named entry to MVL file directory
#' @return an object of class MVL_OFFSET that describes an offset into this MVL file. MVL offsets are vectors and can be concatenated. They can be written to MVL file directly, or as part of another object such as list.
#' @seealso \code{\link{mvl_order_vectors}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_group}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_indexed_copy}}, \code{\link{mvl_merge}}, \code{\link{mvl_hash_vectors}}, \code{\link{mvl_get_groups}}
#' @export
#'
mvl_write_spatial_groups<-function(MVLHANDLE, L, bits, name=NULL) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	if(length(bits)==1)bits<-rep(bits, length(L))
	offset<-.Call(write_spatial_groups, unclass(MVLHANDLE)[["handle"]], L, as.integer(bits))
	if(!is.null(name))mvl_add_directory_entries(MVLHANDLE, name, offset)	
	return(invisible(offset))
	}
	
#' Retrieve indices of nearby rows.
#'
#' This function is passed the index computed by \code{mvl_write_spatial_index1} and a list of vectors, which rows are interpreted as points.
#' For each row, the function returns a vector of indices describing rows that are close to it.
#'
#' @param spatial_index  MVL_OBJECT computed by \code{mvl_write_spatial_index1} 
#' @param data_list  a list of vectors of equal length. They can be MVL_OBJECTs or R vectors. 
#' @return a list of vectors of indices
#' @seealso \code{\link{mvl_write_spatial_index1}}, \code{\link{mvl_index_lapply}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, data.frame(x=runif(100), y=1:100), "df1")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_write_spatial_index1(Mtmp, list(Mtmp$df1[,"x",ref=TRUE], Mtmp$df1[,"y", ref=TRUE]),
#'                                                               c(2, 3), "df1_sp_groups")
#' Mtmp<-mvl_remap(Mtmp)
#' print(mvl_get_neighbors(Mtmp["df1_sp_groups", ref=TRUE], list(c(0.5, 0.6), c(2, 3))))
#' }
#' @export
#'
mvl_get_neighbors<-function(spatial_index, data_list) {
	return(.Call(get_neighbors, spatial_index, data_list))
	}
	
	
#' Apply function to indices of nearby rows
#'
#' Please use generic function \code{mvl_index_lapply()} instead.
#'
#' This function is passed the index computed by \code{mvl_write_spatial_index1} and a list of vectors, which rows are interpreted as points.
#' For each row, we call the function \code{fn(i, idx)}, where \code{i} gives the index of query row, and \code{idx} gives the indices of nearby rows.
#'
#' @param spatial_index  MVL_OBJECT computed by \code{mvl_write_spatial_index1} 
#' @param data_list  a list of vectors of equal length. They can be MVL_OBJECTs or R vectors. 
#' @param fn a function of two arguments - and index into \code{data_list} and a corresponding list of indices
#' @return a list of results of function \code{fn}
#' @seealso \code{\link{mvl_group}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, data.frame(x=runif(100), y=1:100), "df1")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_write_spatial_index1(Mtmp, list(Mtmp$df1[,"x",ref=TRUE], Mtmp$df1[,"y", ref=TRUE]),
#'                                                                c(2, 3), "df1_sp_groups")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_neighbors_lapply(Mtmp["df1_sp_groups", ref=TRUE], list(c(0.5, 0.6), c(2, 3)),
#'                                            function(i, idx) { return(list(i, idx))})
#' }
#' @export
#'
mvl_neighbors_lapply<-function(spatial_index, data_list, fn) {
	L<-.Call(neighbors_lapply, spatial_index, data_list, fn, new.env())
	return(L)
	}
	
#' Find matching rows
#'
#' This function is passed two lists of MVL vectors which are interpreted in data.frame fashion. 
#' The indices of pairwise matches are returned in order of the arguments ("index1" and "index2"). 
#' In addition we return indices describing stretches with "index1" value constant ( stretch_index1[i] to stretch_index1[i+1]-1)
#'
#' @param L1  list of vector like MVL_OBJECTs 
#' @param indices1  list of indices into objects to sort. If absent or NULL it is assumed to be from 1 to the length of the object.
#' @param L2  list of vector like MVL_OBJECTs 
#' @param indices2  list of indices into objects to sort. If absent or NULL it is assumed to be from 1 to the length of the object.
#' @return A list of matches and match stretches
#' @seealso \code{\link{mvl_hash_vectors}}, \code{\link{mvl_order_vectors}}, \code{\link{mvl_group}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_indexed_copy}}, \code{\link{mvl_merge}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, data.frame(x=rep(c("a", "b"), 50), y=1:100), "df1")
#' mvl_write_object(Mtmp, data.frame(x=rep(c("b", "c"), 50), y=21:120), "df2")
#' Mtmp<-mvl_remap(Mtmp)
#' L<-mvl_find_matches(list(Mtmp$df1[,"x",ref=TRUE], Mtmp$df1[,"y", ref=TRUE]), 
#'                         list(Mtmp$df2[,"x",ref=TRUE], Mtmp$df2[,"y", ref=TRUE]))
#' }
#' @export
#'
mvl_find_matches<-function(L1, L2, indices1=NULL, indices2=NULL) {
	L<-.Call(find_matches, L1, indices1, L2, indices2)
	names(L)<-c("stretch_index1", "index1", "index2")
	return(L) 
	}

#' Group identical rows
#'
#' This function groups identical rows. The result is formatted as two vectors \code{stretch_index} and \code{index}
#' Vector \code{index} contains stretches of indices with identical rows. Vector \code{stretch_index} describes stretches as stretch_index[i] to stretch_index[i+1]-1
#' This allows fast iteration over indices without creating excessive numbers of R objects when group sizes are small.
#' 
#'
#' @param L  list of vector like MVL_OBJECTs 
#' @param indices  list of indices into objects to group. If absent or NULL it is assumed to be from 1 to the length of the object.
#' @return A list of groups and group stretches
#' @seealso \code{\link{mvl_group_lapply}}, \code{\link{mvl_hash_vectors}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_order_vectors}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_indexed_copy}}, \code{\link{mvl_merge}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, data.frame(x=rep(c("a", "b"), 50), y=(1:100)/5), "df1")
#' Mtmp<-mvl_remap(Mtmp)
#' df1<-Mtmp["df1", ref=TRUE]
#' G<-mvl_group(list(df1[,"x",ref=TRUE], df1[,"y", ref=TRUE]))
#' mvl_group_lapply(G, function(idx) { return(sum(df1[idx, "y"]))})
#' }
#' @export
#'
mvl_group<-function(L, indices=NULL) {
	L<-.Call(group_vectors, L, indices)
	names(L)<-c("stretch_index", "index")
	return(L) 
	}

#' Apply function to index stretches
#'
#' Iteratively call function \code{fn(idx)} over index stretches previously computed with \code{mvl_group}
#'
#' @param G a list of groups and group stretches produced by \code{mvl_group}
#' @param fn a function of one argument - list of indices
#' @return a list of results of function \code{fn}
#' @seealso \code{\link{mvl_group}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, data.frame(x=rep(c("a", "b"), 50), y=(1:100)/5), "df1")
#' Mtmp<-mvl_remap(Mtmp)
#' df1<-Mtmp$df1
#' G<-mvl_group(list(df1[,"x",ref=TRUE], df1[,"y", ref=TRUE]))
#' mvl_group_lapply(G, function(idx) { return(sum(df1[idx, "y"]))})
#' }
#' @export
#'
mvl_group_lapply<-function(G, fn) {
	L<-.Call(group_lapply, mvl2R(G$stretch_index), mvl2R(G$index), fn, new.env())
	return(L) 
	}
	
#' Find stretches of repeated rows among vectors 
#'
#' This function is passed a list of vector like MVL_OBJECTs which are considered as columns in a table.
#' It returns a vector V starting with 1 and ending with number of rows plus 1, so that stretches of repeated rows can be found as V[i]:V[i+1]
#'
#' @param L  list of vector like MVL_OBJECTs 
#' @return partition describing repeated rows
#'
#' @export
#'
mvl_compute_repeats<-function(L) {
	ans<-.Call(compute_repeats, L)
	return(invisible(ans))
	}
	
#' Compute and write extent index
#'
#' This function computes a hash-based index that allows to find indices of rows which hashes match query values.
#' While it can be applied to arbitrary data, it is optimized for the common case when vectors contain stretches of repeated values
#' describing row groups to be processed. This is particularly relevant for R because vectorized processing of row batches is the only practical way to scan very large tables using pure-R code. 
#'
#' \code{mvl_write_extent_index()} creates the index in memory and then writes it out. The memory usage is proportional to the number of 
#' repeat stretches. Sorting tables improves performance, but is not a requirement.
#'
#' @param MVLHANDLE a handle to MVL file produced by mvl_open()
#' @param L  list of vector like MVL_OBJECTs 
#' @param name if specified add a named entry to MVL file directory
#' @return an object of class MVL_OFFSET that describes an offset into this MVL file. MVL offsets are vectors and can be concatenated. They can be written to MVL file directly, or as part of another object such as list.
#' @seealso \code{\link{mvl_order_vectors}}, \code{\link{mvl_index_lapply}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_group}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_indexed_copy}}, \code{\link{mvl_merge}}, \code{\link{mvl_hash_vectors}}, \code{\link{mvl_get_groups}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, data.frame(x=runif(100), y=(1:100) %% 10), "df1")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_write_extent_index(Mtmp, list(Mtmp$df1[,"y",ref=TRUE]), "df1_extent_index_y")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_index_lapply(Mtmp["df1_extent_index_y", ref=TRUE], list(c(2, 3)),
#'                                            function(i, idx) { return(list(i, idx))})
#' # Example of full scan
#' mvl_index_lapply(Mtmp["df1_extent_index_y", ref=TRUE], ,
#'                                            function(i, idx) { return(list(i, idx))})
#' }
#' @export
#'
mvl_write_extent_index<-function(MVLHANDLE, L, name=NULL) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	offset<-.Call(write_extent_index, MVLHANDLE[["handle"]], L)
	if(!is.null(name))mvl_add_directory_entries(MVLHANDLE, name, offset)
	return(invisible(offset))
	}
	
#' Apply function to indices of rows with matching hashes
#'
#' Please use generic function \code{mvl_index_lapply()} instead.
#'
#' This function is passed the index computed by \code{mvl_write_extent_index()} and a list of vectors, which rows are used to compute 64-bit hashes.
#' For each row, we call the function \code{fn(i, idx)}, where \code{i} gives the index of query row, and \code{idx} gives the indices of with matching hashes.
#'
#' 64-bit hashes have very few collisions, nevertheless the user is advised to double check that the values actually match.
#' 
#' The hash computation is type dependent, so \code{1} stored as an integer will produce a different hash than when stored as floating point. This function accounts for this by internally converting to types the index was generated with.
#'
#' @param extent_index  MVL_OBJECT computed by \code{mvl_write_extent_index()} 
#' @param data_list  a list of vectors of equal length. They can be MVL_OBJECTs or R vectors. If missing, scan the entire table one hash at a time.
#' @param fn a function of two arguments - and index into \code{data_list} and a corresponding list of indices
#' @return a list of results of function \code{fn}
#' @seealso \code{\link{mvl_index_lapply}}, \code{\link{mvl_group}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, data.frame(x=runif(100), y=(1:100) %% 10), "df1")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_write_extent_index(Mtmp, list(Mtmp$df1[,"y",ref=TRUE]), "df1_extent_index_y")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_extent_index_lapply(Mtmp["df1_extent_index_y", ref=TRUE], list(c(2, 3)),
#'                                            function(i, idx) { return(list(i, idx))})
#' # Example of full scan
#' mvl_extent_index_lapply(Mtmp["df1_extent_index_y", ref=TRUE], ,
#'                                            function(i, idx) { return(list(i, idx))})
#' }
#' @export
#'
mvl_extent_index_lapply<-function(extent_index, data_list, fn) {
	if(missing(data_list))
		L<-.Call(extent_index_scan, extent_index, fn, new.env())
		else
		L<-.Call(extent_index_lapply, extent_index, data_list, fn, new.env())
	return(L)
	}
	
#' Index copy vector
#'
#' This function creates new MVL vectors and data frames by copying only rows or values specified by given indices. 
#' The vector indices can be an R integer or numeric vector, a logical vector of the size matching to the object being copied, 
#' or a suitable vector stored in MVL file.
#' 
#'
#' @param MVLHANDLE a handle to MVL file produced by mvl_open()
#' @param x a vector-like MVL_OBJECT or a data.frame stored in MVL file
#' @param indices  a vector of indices into x
#' @param name if specified add a named entry to MVL file directory
#' @param only.columns if x is MVL_OBJECT with class data.frame copy only columns specified in this character or integer vector
#' @return an object of class MVL_OFFSET that describes an offset into this MVL file. MVL offsets are vectors and can be concatenated. They can be written to MVL file directly, or as part of another object such as list.
#' @seealso \code{\link{mvl_hash_vectors}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_group}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_order_vectors}}, \code{\link{mvl_merge}}, \code{\link{mvl_write_object}}, \code{\link{mvl_fused_write_objects}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, runif(100), "vec1")
#' Mtmp<-mvl_remap(Mtmp)
#' permutation1<-mvl_order_vectors(list(Mtmp["vec1", ref=TRUE]))
#' mvl_indexed_copy(Mtmp, Mtmp["vec1", ref=TRUE], permutation1, name="vec1_sorted")
#' Mtmp<-mvl_remap(Mtmp)
#' print(Mtmp$vec1_sorted)
#' }
#' @export
#'
mvl_indexed_copy<-function(MVLHANDLE, x, indices, name=NULL, only.columns=NULL) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	
	if(mvl_inherits(x, "data.frame")) {
		if(is.null(only.columns)) {
			j<-1:(dim(x)[2])
			} else {
			if(is.integer(only.columns) || is.numeric(only.columns)) {
				j<-only.columns
				} else {
				j<-match(only.columns, names(x))
				if(any(is.na(j))) {
					stop("Unknown columns ", paste(only.columns[is.na(j)], collapse=" "))
					}
				}
			}
		if(length(j)<1) {
			stop("No columns selected")
			}
		L<-list()
		for(k in 1:length(j)) {
			L[[k]]<-mvl_indexed_copy(MVLHANDLE, x[,j[k]], indices)
			}
		L<-unlist(L)
		class(L)<-"MVL_OFFSET"
		
		metadata.offset<-mvl_write_object_metadata(MVLHANDLE, NULL, class.override="data.frame", names.override=names(x)[j], dim.override=c(mvl_xlength(indices), dim(x)[2]))
		
		offset<-mvl_write_vector(MVLHANDLE, L, metadata.offset)
		if(!is.null(name))mvl_add_directory_entries(MVLHANDLE, name, offset)
		return(invisible(offset))
		}
		
	if(!inherits(x, "MVL_OBJECT")) {
		offset<-mvl_write_object(MVLHANDLE, x[mvl2R(indices)], name=name)
		return(invisible(offset))
		}
	
	cl<-NULL
	m<-unclass(x)[["metadata"]]
	if(!is.null(m) && !is.null(m[["class"]]))cl<-m[["class"]]
	if(!is.null(cl))
		metadata.offset<-mvl_write_object_metadata(MVLHANDLE, NULL, class.override=cl)
		else
		metadata.offset<-NULL
	
	offset<-.Call(indexed_copy_vector, MVLHANDLE[["handle"]], x, indices, metadata.offset)
	if(!is.null(name))mvl_add_directory_entries(MVLHANDLE, name, offset)
	return(invisible(offset))
	}
	
	
#' Merge two MVL data frames and write the result
#'
#' @param MVLHANDLE a handle to MVL file produced by \code{mvl_open()}
#' @param df1 a data.frame stored in MVL file
#' @param df2 a data.frame stored in MVL file
#' @param name if specified add a named entry to MVL file directory
#' @param by  list of columns to use as key
#' @param by.x  list of columns to use as key for \code{df1}
#' @param by.y  list of columns to use as key for \code{df1}
#' @param suffixes  rename columns with identical names using these suffixes
#' @param only.columns.x only copy these columns from df1
#' @param only.columns.y only copy these columns from df2
#' @return an object of class MVL_OFFSET that describes an offset into this MVL file. MVL offsets are vectors and can be concatenated. They can be written to MVL file directly, or as part of another object such as list.
#' @seealso \code{\link{mvl_hash_vectors}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_group}}, \code{\link{mvl_find_matches}}, \code{\link{mvl_indexed_copy}}, \code{\link{mvl_order_vectors}}, \code{\link{mvl_fused_write_objects}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, data.frame(x=rep(c("a", "b"), 50), y=1:100), "df1")
#' mvl_write_object(Mtmp, data.frame(x=rep(c("b", "c"), 50), y=runif(100), z=21:120), "df2")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_merge(Mtmp, Mtmp$df1, Mtmp$df2, by.x="y", by.y="z", only.columns.y=c("x"), name="df_merged")
#' Mtmp<-mvl_remap(Mtmp)
#' print(Mtmp$df_merged[1:10,])
#' }
#' @export
#'
mvl_merge<-function(MVLHANDLE, df1, df2, name=NULL, by=NULL, by.x=by, by.y=by, suffixes=c(".x", ".y"), only.columns.x=NULL, only.columns.y=NULL)
{
if(is.null(by.x) || is.null(by.y))stop("You need to specify which columns to merge using by, or by.x and by.y")

L1<-lapply(by.x, function(x){return(df1[,x,ref=TRUE])})
L2<-lapply(by.y, function(x){return(df2[,x,ref=TRUE])})

cols1<-names(df1)
if(!is.null(only.columns.x)) {
	cols1<-cols1[cols1 %in% only.columns.x]
	}

cols1<-cols1[!(cols1 %in% by.x)]
cols1<-c(by.x, cols1)

cols2<-names(df2)
if(!is.null(only.columns.y)) {
	cols2<-cols2[cols2 %in% only.columns.y]
	}
cols2<-cols2[!(cols2 %in% by.y)]

rename.cols<-intersect(cols1, cols2)

merge_plan<-mvl_find_matches(L1, L2)

L<-list()

if(length(cols1)>0)
for(i in 1:length(cols1)) {
	L[[length(L)+1]]<-mvl_indexed_copy(MVLHANDLE, df1[,cols1[[i]],ref=TRUE], merge_plan[["index1"]])
	}

if(length(cols2)>0)
for(i in 1:length(cols2)) {
	L[[length(L)+1]]<-mvl_indexed_copy(MVLHANDLE, df2[,cols2[[i]],ref=TRUE], merge_plan[["index2"]])
	}
	
Fr<-cols1 %in% rename.cols
if(any(Fr)) {
	cols1[Fr]<-paste(cols1[Fr], suffixes[[1]], sep="")
	}
Fr<-cols2 %in% rename.cols
if(any(Fr)) {
	cols2[Fr]<-paste(cols2[Fr], suffixes[[2]], sep="")
	}
n<-as.character(c(cols1, cols2))

L<-unlist(L)
class(L)<-"MVL_OFFSET"

m<-mvl_write_object_metadata(MVLHANDLE, NULL, dim.override=c(mvl_xlength(merge_plan[["index1"]]), length(n)), names.override=n, class.override="data.frame")

offset<-mvl_write_vector(MVLHANDLE, L, m)
if(!is.null(name))mvl_add_directory_entries(MVLHANDLE, name, offset)
return(invisible(offset))
}
	
mvl_write_object_metadata1<-function(MVLHANDLE, x, drop.rownames=FALSE, dim.override=NULL, class.override=NULL, names.override=NULL, rownames.override=NULL) {
	n<-mvl_write_string(MVLHANDLE, "MVL_LAYOUT")
	o<-mvl_write_string(MVLHANDLE, "R")
	
	metadata_overrides<-attr(x, "MVL_METADATA")
	
	if(!is.null(metadata_overrides)) {
		if(is.null(dim.override))dim.override<-metadata_overrides[["dim"]]
		if(is.null(class.override))class.override<-metadata_overrides[["class"]]
		if(is.null(names.override))names.override<-metadata_overrides[["names"]]
		if(is.null(rownames.override))rownames.override<-metadata_overrides[["rownames"]]
		}
	
	if(!is.null(dim(x)) || !is.null(dim.override)) {
		n<-c(n, mvl_write_string(MVLHANDLE, "dim"))
		if(!is.null(dim.override))
			o<-c(o, mvl_write_vector(MVLHANDLE, dim.override))
			else
			o<-c(o, mvl_write_vector(MVLHANDLE, dim(x)))
		}
	if(!is.null(class.override) || ( !is.null(class(x)) && !(mvl_inherits(x,  c("raw", "numeric", "integer"))))) {
		n<-c(n, mvl_write_string(MVLHANDLE, "class"))
		if(!is.null(class.override))
			o<-c(o, mvl_write_vector(MVLHANDLE, as.character(class.override)))
			else
			o<-c(o, mvl_write_vector(MVLHANDLE, mvl_class(x)))
		}
	if(!is.null(names(x)) || !is.null(names.override)) {
		n<-c(n, mvl_write_string(MVLHANDLE, "names"))
		if(!is.null(names.override))
			o<-c(o, mvl_write_vector(MVLHANDLE, names.override))
			else
			o<-c(o, mvl_write_vector(MVLHANDLE, names(x)))
		}
	if(!drop.rownames && (!is.null(rownames(x)) || !is.null(rownames.override))) {
		n<-c(n, mvl_write_string(MVLHANDLE, "rownames"))
		if(!is.null(rownames.override))
			o<-c(o, mvl_write_vector(MVLHANDLE, rownames.override))
			else
			o<-c(o, mvl_write_vector(MVLHANDLE, rownames(x)))
		}
	if(is.null(n))return(NULL)
	ofs<-c(n, o)
	class(ofs)<-"MVL_OFFSET"
	return(mvl_write_vector(MVLHANDLE, ofs))
	}

mvl_write_object_metadata<-function(MVLHANDLE, x, drop.rownames=FALSE, dim.override=NULL, class.override=NULL, names.override=NULL, rownames.override=NULL) {

	metadata_overrides<-c(list(), attr(x, "MVL_METADATA"))
	
	if(!is.null(dim.override))metadata_overrides[["dim"]]<-dim.override
	if(!is.null(class.override))metadata_overrides[["class"]]<-class.override
	if(!is.null(names.override))metadata_overrides[["names"]]<-names.override
	if(!is.null(rownames.override))metadata_overrides[["rownames"]]<-rownames.override

	if(inherits(x, "MVL_OBJECT"))
		a<-unclass(x)[["metadata"]]
		else
		a<-attributes(x)
		
	if(length(a)>0) {
		metadata_overrides<-c(metadata_overrides, a[!(names(a) %in% c("MVL_METADATA", "MVL_LAYOUT", names(metadata_overrides)))])
		}
		
	if(is.null(metadata_overrides[["dim"]]) && length(dim(x))>1)metadata_overrides[["dim"]]<-dim(x)
	
	if(is.null(metadata_overrides[["class"]])) {
		if(inherits(x, c("factor", "logical")))metadata_overrides[["class"]]<-class(x)
			else
		if(length(metadata_overrides[["dim"]])>1)metadata_overrides[["class"]]<-class(x)
		}
		
	
	if(length(metadata_overrides)>0) {
		#print(metadata_overrides)
		
		nn<-names(metadata_overrides)
		
		n<-sapply(names(metadata_overrides), function(x) { return(mvl_write_string(MVLHANDLE, x))})
		o<-sapply(metadata_overrides, function(x) { return(mvl_write_object(MVLHANDLE, x)) })
		} else return(NULL)
	
	
	#if(is.null(n))return(NULL)
	ofs<-c(mvl_write_string(MVLHANDLE, "MVL_LAYOUT"), n, mvl_write_string(MVLHANDLE, "R"), o)
	class(ofs)<-"MVL_OFFSET"
	return(mvl_write_vector(MVLHANDLE, ofs))
	}
	
#' Return underlying R class of object
#'
#' This function returns the equivalent R class of underlying MVL object, i.e. the class it would have if converted into a regular R object.
#' For non-MVL objects the function simply calls the usual R class(), so it can be used instead of class() for code that operates on both usual R objects and MVL objects.
#'
#' @param x  any object
#' @return character string giving object class
#'  
#' @export
#'
mvl_class<-function(x) {
	if(!inherits(x, "MVL_OBJECT"))return(class(x))
	m<-unclass(x)[["metadata"]]
	if(is.null(m) || is.null(m[["class"]])) {
		st<-mvl_object_stats(x)
		if(st[["type"]] %in% c(1,2))return("integer")
			else
		if(st[["type"]] %in% c(3,4,5))return("numeric")
			else
		if(st[["type"]] %in% c(102))return("character")
		return("MVL_OBJECT")
		}
	return(m[["class"]])
	}

#' Check inheritance of R or MVL objects
#'
#' This function works just like the usual R \code{inherits()}, except that for MVL_OBJECTS it used the class value stored in the MVL file.
#' For non-MVL objects the function simply calls the usual R \code{inherit()}, so it can be used instead of \code{inherit()} for code that operates on both usual R objects and MVL objects.
#'
#' @param x  any object
#' @param clstr classes to match against
#' @param which when TRUE return a boolean array indicating of which classes named in \code{clstr} are inherited by x. When FALSE return a single boolean indicating inheritance of any class named in \code{clstr}.
#' @return character string giving object class
#'  
#' @export
#'
mvl_inherits<-function(x, clstr, which=FALSE) {
	if(!inherits(x, "MVL_OBJECT"))return(inherits(x, clstr, which=which))
	cl<-mvl_class(x)
	inh<-inherits(x, clstr, which=TRUE) | ( clstr %in% cl)
	if(which)return(inh)
		else return(any(inh))
	}
	
#' Write R object into MVL file
#'
#' @param MVLHANDLE a handle to MVL file produced by mvl_open()
#' @param x a suitable R object (vector, array, list, data.frame) or a vector-like MVL_OBJECT
#' @param name if specified add a named entry to MVL file directory
#' @param drop.rownames set to TRUE to prevent rownames from being written
#' @return an object of class MVL_OFFSET that describes an offset into this MVL file. MVL offsets are vectors and can be concatenated. They can be written to MVL file directly, or as part of another object such as list.
#' @seealso \code{\link{mvl_indexed_copy}}, \code{\link{mvl_merge}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, runif(100), "vec1")
#' L<-list()
#' L[["x"]]<-mvl_write_object(Mtmp, 1:5)
#' L[["y"]]<-mvl_write_object(Mtmp, c("a", "b"))
#' L[["df"]]<-mvl_write_object(Mtmp, data.frame(x=1:100, z=runif(100)))
#' mvl_write_object(Mtmp, L, "L")
#' Mtmp<-mvl_remap(Mtmp)
#' print(Mtmp$L)
#' }
#' @export
#'
mvl_write_object<-function(MVLHANDLE, x, name=NULL, drop.rownames=FALSE) {
	#cat("Writing", class(x), typeof(x), "\n")
	metadata<-mvl_write_object_metadata(MVLHANDLE, x, drop.rownames=drop.rownames)
	
	if(inherits(x, c("numeric", "character", "integer", "factor", "raw", "array", "matrix", "logical", "MVL_OBJECT", "Date", "MVL_R_SERIALIZED"))) {
		offset<-mvl_write_vector(MVLHANDLE, x, metadata)
		if(!is.null(name))mvl_add_directory_entries(MVLHANDLE, name, offset)
		return(invisible(offset))
		}
	if(inherits(x, c("list", "data.frame"))) {
		v<-unlist(lapply(x, function(x){return(mvl_write_object(MVLHANDLE, x))}))
		class(v)<-"MVL_OFFSET"
		offset<-mvl_write_vector(MVLHANDLE, v, metadata)
		if(!is.null(name))mvl_add_directory_entries(MVLHANDLE, name, offset)
		return(invisible(offset))
		}
	if(inherits(x, "MVL_OFFSET")) {
		# Already written
		return(invisible(x))
		}
	stop("Could not write object with class ", class(x))
	}
	
#' Write R object in serialized form
#'
#' This function packages the object into a raw vector before writing it out. The raw vector is tagged with 
#' special class that assures the object is automatically converted back to R representation when reading.
#' Serialized objects can only be read completely.
#' 
#' This function can be used in rare cases when it is important to store a complete R object, but it is not
#' important for it to be accessible by other programs, and it is not important to conserve memory or bandwidth.
#' 
#' @param MVLHANDLE a handle to MVL file produced by mvl_open()
#' @param x a suitable R object (vector, array, list, data.frame) or a vector-like MVL_OBJECT
#' @param name if specified add a named entry to MVL file directory
#' @return an object of class MVL_OFFSET that describes an offset into this MVL file. MVL offsets are vectors and can be concatenated. They can be written to MVL file directly, or as part of another object such as list.
#' @seealso \code{\link{mvl_write_object}}
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_serialized_object(Mtmp, lm(rnorm(100)~runif(100)), "LM1")
#' Mtmp<-mvl_remap(Mtmp)
#' print(mvl2R(Mtmp$LM1))
#' }
#' @export
#'
mvl_write_serialized_object<-function(MVLHANDLE, x, name=NULL) {
	offset<-mvl_write_object(MVLHANDLE, structure(serialize(x, NULL), class="MVL_R_SERIALIZED"))
	if(!is.null(name))mvl_add_directory_entries(MVLHANDLE, name, offset)
	return(invisible(offset))
	}
	
#' Concatenate objects and write result into MVL file. 
#'
#' This function can concatenate a mixture of R and MVL objects. For vectors it is the equivalent of \code{c()}. For array and matrices it works as \code{cbind()}
#' For data frames it works as \code{rbind}, but row names are always dropped.
#'
#' @param MVLHANDLE a handle to MVL file produced by \code{mvl_open()}
#' @param L a list of suitable R objects (vector, array, data.frame) or equivalent MVL objects.
#' @param name if specified add a named entry to MVL file directory
#' @param drop.rownames set to TRUE to prevent rownames from being written
#' @return any object of class MVL_OFFSET that describes an offset into this MVL file. MVL offsets are vectors and can be concatenated. They can be written to MVL file directly, or as part of another object such as list.
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, runif(100), "vec1")
#' mvl_write_object(Mtmp, runif(100), "vec2")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_fused_write_objects(Mtmp, list(Mtmp["vec1", ref=TRUE], Mtmp["vec2", ref=TRUE], runif(3)),
#'                                                                                   name="vec3")
#' }
#' @export
#'
mvl_fused_write_objects<-function(MVLHANDLE, L, name=NULL, drop.rownames=TRUE) {
	#cat("Writing", class(x), typeof(x), "\n")
	if(length(L)<1)stop("No objects to concatenate")
	if(!drop.rownames)stop("Cannot write out row names")
	
	cl<-mvl_class(L[[1]])
		
	dims<-lapply(L, dim)
	dim1<-unlist(lapply(dims, function(x) { return(length(x)<2)}))
	if(any(dim1) && any(!dim1))stop("Cannot concatenate: some objects have more than one dimension some not")
	
	lengths<-lapply(L, length)
	if(is.null(dims[[1]]))dims<-lengths
	
	if(length(L)>1) {
		kd<-length(dims[[1]])
		if(kd>1) {
			if(any(cl %in% c("data.frame"))) idx<-2:kd
				else idx<-1:(kd-1)
			for(i in 2:length(L)) {
				if(any(dims[[i]][idx]!=dims[[1]][idx]))stop("Cannot concatenate: inconsistent dimensions for objects 1 and ", i, ": ", paste(dims[[1]], collapse=","), " ", paste(dims[[i]], collapse=","))
				}
			}
		}
	
	
	if(any(cl %in% c("numeric", "character", "integer", "factor", "raw", "array", "matrix", "logical", "Date"))) {
		dims_new<-dim(L[[1]])
		if(!is.null(dims_new)) {
			if(length(dims_new)>1) {
				dims_new<-c(dims_new[1:(length(dims_new)-1)], sum(unlist(lapply(dims, function(x){return(x[[length(dims_new)]])}))))
				} else {
				dims_new<-sum(unlist(dims))
				}
			}
		metadata<-mvl_write_object_metadata(MVLHANDLE, L[[1]], drop.rownames=drop.rownames, dim.override=dims_new)
		offset<-mvl_fused_write_vector(MVLHANDLE, L, metadata)
		if(!is.null(name))mvl_add_directory_entries(MVLHANDLE, name, offset)
		return(invisible(offset))
		}
	if(any(cl %in% c("data.frame"))) {
		dims_new<-dim(L[[1]])
		if(!is.null(dims_new)) {
			if(length(dims_new)>1) {
				dims_new<-c(sum(unlist(lapply(dims, function(x){return(x[[1]])}))), dims_new[2:length(dims_new)])
				} else {
				dims_new<-sum(unlist(dims))
				}
			}
		metadata<-mvl_write_object_metadata(MVLHANDLE, L[[1]], drop.rownames=drop.rownames, dim.override=dims_new)
		v<-list()
		for(i in 1:length(L[[1]])) {
			Lcol<-lapply(L, function(x){
				if(!inherits(x, "MVL_OBJECT")) {
					if(inherits(x[[i]], "factor"))return(as.character(x[[i]]))
					return(x[[i]])
					}
				return(x[,i,ref=TRUE]) 
				})
			v[[i]]<-mvl_fused_write_objects(MVLHANDLE, Lcol)
			}
		v<-unlist(v)
		class(v)<-"MVL_OFFSET"
		offset<-mvl_write_vector(MVLHANDLE, v, metadata)
		if(!is.null(name))mvl_add_directory_entries(MVLHANDLE, name, offset)
		return(invisible(offset))
		}
	stop("Could not perform fused write of ", length(L), " objects with class ", paste(cl, collapse=" "))
	}
	
mvl_flatten_string<-function(v) {
	if(is.character(v))return(v)
	return(unlist(lapply(v, function(x){return(x[[1]])})))
	}

mvl_read_metadata<-function(MVLHANDLE, metadata_offset, recurse=FALSE) {
	metadata<-mvl_read_object(MVLHANDLE, metadata_offset, recurse=recurse)
	if(!is.null(metadata)) {
		n<-metadata[1:(length(metadata)/2)]
		metadata<-metadata[(length(metadata)/2+1):length(metadata)]
		names(metadata)<-unlist(n)
		}
	return(metadata)
	}
	
#' Return MVL object properties
#'
#' Provide detailed information on stored MVL object without retrieving it
#'
#' This function is given either an MVL handle and an offset in MVL file to examine, or just a single parameter of class MVL_OBJECT that contains
#'  both handle and offset
#'
#' This function returns a list of object parameters describing total number of elements, element type (as used by libMVL) and a pointer to the underlying data.
#' The pointer is passed via a cast to double to preserve its 64-bit value and can be used with custom C code, for example by using package inline.
#'
#' @param MVLHANDLE either a handle provided by mvl_open() or an MVL object such as produced by indexing operators
#' @param offset offset to the object which properties are to be retrieved
#' @param scan scan vector element to obtain additional statistics
#' @return list with object properties
#' @export
#'
mvl_object_stats<-function(MVLHANDLE, offset=NULL, scan=FALSE) {
	if(!inherits(MVLHANDLE, "MVL") && !inherits(MVLHANDLE, "MVL_OBJECT")) stop("not an MVL object")
	mh<-unclass(MVLHANDLE)
	if(is.null(offset) && inherits(MVLHANDLE, "MVL_OBJECT"))offset<-mh[["offset"]]
	if(!inherits(offset, "MVL_OFFSET"))stop("not an MVL offset")
	
	
	L<-list(handle=mh[["handle"]], 
		offset=offset, 
		metadata_offset=.Call(read_metadata, mh[["handle"]], offset),
		length=.Call(read_lengths, mh[["handle"]], offset), 
		type=.Call(read_types, mh[["handle"]], offset), 
		data_pointer=.Call(get_vector_data_ptr, mh[["handle"]], offset)
		)
		
	if(scan) {
		vstats<-.Call(compute_vector_stats, mh[["handle"]], offset)
		names(vstats)<-c("max", "min", "center", "scale", "average_repeat_length", "nrepeat")
		L<-c(L, vstats)
		}
	
	return(L)
	}
	
make_mvl_object<-function(MVLHANDLE, offset) {
	mh<-unclass(MVLHANDLE)
	L<-list(handle=mh[["handle"]], offset=offset, length=.Call(read_lengths, mh[["handle"]], offset), type=.Call(read_types, mh[["handle"]], offset), metadata_offset=.Call(read_metadata, mh[["handle"]], offset))
	
	L[["metadata"]]<-mvl_read_metadata(MVLHANDLE, L[["metadata_offset"]])
	
	L[["values_fixup"]]<-0
	
	object_class<-L[["metadata"]][["class"]]
	if(any(object_class=="data.frame")) {
		L[["bracket_dispatch"]]<-1
		} else 
	if(any(object_class=="logical")) {
		L[["values_fixup"]]<-1
		if(any(object_class %in% c("array", "matrix"))) {
			L[["bracket_dispatch"]]<-2
			} else {
			L[["bracket_dispatch"]]<-3
			}
		} else
	if(any(object_class %in% c("array", "matrix"))) {
		L[["bracket_dispatch"]]<-2
		} else
	if(any(object_class=="MVL_INDEX")) {
		L[["bracket_dispatch"]]<-3
		} else 
	if(is.null(object_class) && length(L[["metadata"]][["dim"]])>1) {
		L[["bracket_dispatch"]]<-2
		} else
	if(is.null(object_class) || any(object_class=="list")) {
		L[["bracket_dispatch"]]<-3
		} else
		L[["bracket_dispatch"]]<-0
		
	class(L)<-"MVL_OBJECT"
	return(L)
	}
	
mvl_read_object<-function(MVLHANDLE, offset, idx=NULL, recurse=FALSE, raw=FALSE, ref=FALSE) {
	if(!inherits(MVLHANDLE, "MVL") && !inherits(MVLHANDLE, "MVL_OBJECT")) stop("not an MVL object")
	if(!inherits(offset, "MVL_OFFSET"))stop("not an MVL offset")
	if(is.na(offset))return(NA)
	if(offset==0)return(NULL)
	mh<-unclass(MVLHANDLE)
	metadata_offset<-.Call(read_metadata, mh[["handle"]], offset)
	metadata<-mvl_read_metadata(MVLHANDLE, metadata_offset, recurse=recurse)
	cl<-metadata[["class"]]
	
	if(any(metadata[["MVL_LAYOUT"]]=="R") && !recurse && !is.null(cl) && any(cl %in% c("data.frame", "MVL_INDEX"))) {
		return(make_mvl_object(MVLHANDLE, offset))
		}
		
	if(is.null(idx)) {
		if(raw) 
			vec<-.Call(read_vectors_raw, mh[["handle"]], offset)[[1]]
			else
			vec<-.Call(read_vectors, mh[["handle"]], offset)[[1]]
		} else {
		if(raw)
			vec<-.Call(read_vectors_idx_raw2, mh[["handle"]], offset, idx[[1]])[[1]]
			else
			vec<-.Call(read_vectors_idx3, mh[["handle"]], offset, idx[[1]])[[1]]
		}
	if(inherits(vec, "MVL_OFFSET")) {
		lengths<-.Call(read_lengths, mh[["handle"]], vec)
		if(recurse) {
			vec<-lapply(vec, function(x){class(x)<-"MVL_OFFSET" ; return(mvl_read_object(MVLHANDLE, x, recurse=TRUE, raw=raw))})
		 } else {
			Fsmall<-lengths<MVL_SMALL_LENGTH & !ref
			vec[Fsmall]<-lapply(vec[Fsmall], function(x){class(x)<-"MVL_OFFSET" ; return(mvl_read_object(MVLHANDLE, x, recurse=FALSE, raw=raw))})
			vec[!Fsmall]<-lapply(vec[!Fsmall], function(x) {class(x)<-"MVL_OFFSET"; return(make_mvl_object(MVLHANDLE, x)) } )
			}
		}
#	attr(vec, "metadata")<-metadata
	if(any(metadata[["MVL_LAYOUT"]]=="R")) {
		if(!is.null(cl)) {
			if(any(cl=="MVL_R_SERIALIZED")) {
				return(unserialize(vec))
				}
	
			if(any(cl=="factor") || any(cl=="character")) {
				vec<-mvl_flatten_string(vec)
				if(cl=="factor")vec<-as.factor(vec)
				}
			if(!any(cl %in% c("data.frame")) && !is.null(metadata[["dim"]]))dim(vec)<-metadata[["dim"]]
			if(any(cl=="logical")) {
				F<-vec==255
				vec<-as.logical(vec)
				vec[F]<-NA
				}
			#if(cl=="data.frame" && any(unlist(lapply(vec, class))=="MVL_OBJECT"))cl<-"MVL_OBJECT"
			class(vec)<-cl
			}
		if(!is.null(metadata[["names"]]))names(vec)<-mvl_flatten_string(metadata[["names"]])
		rn<-metadata[["rownames"]]
		if(!is.null(rn) && !inherits(rn, "MVL_OBJECT"))rownames(vec)<-mvl_flatten_string(metadata[["rownames"]])
			else
		if(!is.null(cl) && any(cl=="data.frame"))rownames(vec)<-1:(metadata[["dim"]][1])
		}
	return(vec)
	}
	
#' Add entries to MVL directory
#' 
#' Add one or more entries to MVL directory
#'
#' This function is used to expand MVL directory. The offsets must be created by calling \code{mvl_write_object} on the same handle.
#' Note that \code{mvl_write_object} has an optional parameter \code{name} that will add an entry when specified.
#' Thus this function is meant for special circumstances, such as creating multiple entries in the directory that point to the same offset
#'
#' @param MVLHANDLE handle to open MVL file created by \code{mvl_open}
#' @param tag a vector of one or more character tags
#' @param offsets a vector of MVL_OFFSET objects, one per tag, created by \code{mvl_write_object}
#'
#' @export
mvl_add_directory_entries<-function(MVLHANDLE, tag, offsets) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	if(!inherits(offsets, "MVL_OFFSET"))stop("not an MVL offset")
	return(.Call(add_directory_entries, unclass(MVLHANDLE)[["handle"]], as.character(tag), offsets))
	}
	
#' @export
`[.MVL_OFFSET`<-function(x, y) {
	z<-unclass(x)[y]
	class(z)<-"MVL_OFFSET"
	return(z)
	}

#' @export
`[[.MVL_OFFSET`<-function(x, y) {
	z<-unclass(x)[[y]]
	class(z)<-"MVL_OFFSET"
	return(z)
	}
	
# We are exporting plain function as well, so one can list its source code from command line
#' MVL handle subscription operator
#'
#' Retrieve objects stored in mappable vector library
#'
#' See \code{mvl_open} for example.
#'
#' @param MVLHANDLE - handle to opened MVL file as generated by \code{mvl_open}
#' @param y - name of object to retrieve
#' @param raw - request to return data in raw format when it does not map exactly to R data types. 
#' @param ref - always return an MVL_OBJECT
#' @param drop - whether to drop dimensionality, such as when a sublist contains only one element
#' @return Stored object
#' @export [.MVL
#' @export
`[.MVL`<-function(MVLHANDLE, y, raw=FALSE, ref=FALSE, drop=TRUE) {
	if(!inherits(MVLHANDLE, "MVL")) stop("not an MVL object")
	
	if(length(y)==0)return(NULL)
	
	if(is.factor(y))y<-as.character(y)
	
	if(is.character(y)) {
#		y<-MVLHANDLE[["directory"]][[y]]
		y<-.Call(find_directory_entries, MVLHANDLE[["handle"]], y)
		}
		
	if(inherits(y, "MVL_OFFSET")) {
		L<-lapply(y, function(offset) {
			if(offset==0)return(NULL)
			class(offset)<-"MVL_OFFSET"
			obj<-make_mvl_object(MVLHANDLE, offset)
			obj2<-unclass(obj)
		
			if(!ref && obj2[["length"]]<MVL_SMALL_LENGTH)obj<-mvl_read_object(MVLHANDLE, obj2[["offset"]], recurse=FALSE, raw=raw)
			return(obj)
			})
			
		if(drop && (length(L)==1))return(L[[1]])
			else return(L)
		}
	stop("Cannot process ", y, " class=", class(y))
	}
	
#' MVL handle subscription operator
#'
#' Retrieve objects stored in the library. Unlike for R lists the match on name is always exact. 
#'
#' @param MVLHANDLE - handle to opened MVL file as generated by \code{mvl_open}
#' @param name - name of object to retrieve
#' @return Stored object
#' @export $.MVL
#' @export
`$.MVL`<-function(MVLHANDLE, name) {
	return(MVLHANDLE[name])
	}
	
#' Print MVL directory
#' 
#' @param x handle to MVL file as created by \code{mvl_open}
#' @return character vector of names present in the directory
#'
#' @export
names.MVL<-function(x) {
	if(!inherits(x, "MVL")) stop("not an MVL object")
	return(names(x[["directory"]]))
	}
	
#' Print MVL 
#' 
#' @param x handle to MVL file as created by \code{mvl_open}
#' @param \ldots not used.
#' @return invisible(MVLHANDLE)
#'
#' @export
print.MVL<-function(x, ...) {
	if(!inherits(x, "MVL")) stop("not an MVL object")
	x2<-unclass(x)
	if(length(x2[["directory"]])< MVL_SMALL_LENGTH)
		cat("MVL(handle ", x2[["handle"]], " directory with ", length(x2[["directory"]]), " entries, [c(\"", paste0(names(x2[["directory"]]), collapse="\", \""), "\")])\n", sep="")
		else
		cat("MVL(handle ", x2[["handle"]], " directory with ", length(x2[["directory"]]), " entries, [c(\"", paste0(names(x2[["directory"]])[1:MVL_SMALL_LENGTH], collapse="\", \""), "\")])\n", sep="")
	invisible(x)
	}
	
MVL_TYPE_NAME<-list("UINT8", "INT32", "INT64", "FLOAT", "DOUBLE")
MVL_TYPE_NAME[[100]]<-"OFFSET64"
MVL_TYPE_NAME[[101]]<-"CSTRING"
MVL_TYPE_NAME[[102]]<-"STRVEC"
	
mvl_type_name<-function(x) {
	y<-lapply(MVL_TYPE_NAME[x], function(xx){if(is.null(xx))return(NA); return(xx)})
	return(unlist(y))
	}
	
#' Print MVL object
#
#' This is a convenience function for displaying MVL_OBJECTs.
#' 
#' @param x MVL_OBJECT as retrieved by subscription operators
#' @param \ldots not used.
#' @param small_length do not list more than this number of columns in data frames
#' @return invisible(obj)
#'
#' @export
print.MVL_OBJECT<-function(x, ..., small_length=10) {
	obj<-unclass(x)
	object_class<-obj[["metadata"]][["class"]]
	if(is.null(object_class) || (object_class %in% c("numeric", "integer", "character"))) {
		tname<-mvl_type_name(obj[["type"]])
		len<-obj[["length"]]
		cat("MVL_OBJECT(", tname, " length ", len, ")\n", sep="")
		} else
	if(object_class %in% c("data.frame", "array", "matrix")) {
		od<-obj[["metadata"]][["dim"]]
		if(is.null(od))od<-obj[["length"]]
		
		nm<-obj[["metadata"]][["names"]]
		if(length(nm)<1 || length(od)!=2) 
			cat("MVL_OBJECT(", mvl_type_name(obj[["type"]]), " ", object_class, " ", paste0(od, collapse="x"), ")\n", sep="")
			else
		if(length(nm)< small_length) 
			cat("MVL_OBJECT(", mvl_type_name(obj[["type"]]), " ", object_class, " ", paste0(od, collapse="x"), " [,c(\"", paste0(nm, collapse="\", \""), "\")] )\n", sep="")
			else
			cat("MVL_OBJECT(", mvl_type_name(obj[["type"]]), " ", object_class, " ", paste0(od, collapse="x"), " [,c(\"", paste0(nm[1:small_length], collapse="\", \""), "\", ...)] )\n", sep="")
		} else
	if(any(object_class=="MVL_INDEX")) {
		print.MVL_INDEX(x, ...)
		} else {
		cat("MVL_OBJECT(", mvl_type_name(obj[["type"]]), " ", object_class, ")\n", sep="")
		}
	invisible(obj)
	}
	
#' Obtain dimensions of MVL object
#' 
#' @param x MVL_OBJECT as retrieved by subscription operators
#' @return object dimensions, or NULL if not present
#'
#' @export
dim.MVL_OBJECT<-function(x) {
	x2<-unclass(x)
	if(is.null(x2[["metadata"]]))return(x2[["length"]])
	return(x2[["metadata"]][["dim"]])
	}
	
#' Obtain length of MVL object
#' 
#' @param x MVL_OBJECT as retrieved by subscription operators
#' @return object length as stored in MVL file. This is the total length of object for arrays, and number of columns for data frames.
#'
#' @export
length.MVL_OBJECT<-function(x) {
	return(unclass(x)[["length"]])
	}
	
	
#' Retrieve MVL object names
#' 
#' @param x MVL_OBJECT as retrieved by subscription operators
#' @return character vector of names
#'
#' @export
names.MVL_OBJECT<-function(x) {
	m<-unclass(x)[["metadata"]]
	if(is.null(m))return(NULL)
	return(mvl2R(m[["names"]]))
	}
	
#' Make sure the object is fully converted to its R representation
#'
#' If the object is stored in MVL file, we return its pure R representation. 
#' Otherwise, we return the object itself.
#'
#' @param obj - MVL object retrieved by subscription of MVL library or other objects
#' @param raw - request to return data in raw format when it does not map exactly to R data types. 
#' @return Stored object
#' @export
mvl2R<-function(obj, raw=FALSE) {
	if(inherits(obj, "MVL_OBJECT")) {
		return(mvl_read_object(obj, unclass(obj)[["offset"]], recurse=TRUE, raw=raw))
		}
	return(obj)
	}
	
# We are exporting plain function as well, so one can list its source code from command line
#' MVL object subscription operator
#'
#' Retrieve objects stored in mappable vector library. Large nested objects are returned as instances of MVL_OBJECT to delay access until needed.
#'
#' See \code{mvl_open} for example.
#'
#' @param obj - MVL object retrieved by subscription of MVL library or other objects
#' @param i - optional index.
#' @param \ldots optional additional indices for multidimensional arrays and data frames
#' @param drop - whether to drop dimensionality, such as done with R array or data frames
#' @param raw - request to return data in raw format when it does not map exactly to R data types. 
#' @param recurse - force recursive conversion to pure R objects. 
#' @param ref - always return an MVL_OBJECT
#' @return Stored object
#' @export [.MVL_OBJECT
#' @export
`[.MVL_OBJECT`<-function(obj, i, ..., drop=TRUE, raw=FALSE, recurse=FALSE, ref=FALSE) {
	obj2<-unclass(obj)
	if(missing(i) && ...length()==0) {
		return(mvl_read_object(obj, obj2[["offset"]], recurse=!ref, raw=raw, ref=ref))
		}
	#cat("obj class ", obj[["metadata"]][["class"]], "\n")
	if(obj2[["bracket_dispatch"]]==1) {
		if(...length()>1)stop("Object", obj, "has only two dimensions")
		n<-obj2[["metadata"]][["names"]]
		if(...length()<1 || missing(..1)) {
			j<-1:length(n)
			} else {
			j<-..1
			if(is.logical(j)) {
				j<-which(j)
				} else
			if(is.character(j) || is.factor(j)) {
				if(is.factor(j))j<-as.character(j)
				j0<-match(j, n)
				if(any(is.na(j0)))
					stop("Unknown columns ", paste(j[is.na(j0)], collapse=" "))
				j<-j0
				}
			n<-n[j]
			}
		if(missing(i)) {
			if(length(j)==1 && drop) {
				ofs<-.Call(read_vectors_idx3, obj2[["handle"]], obj2[["offset"]], j)[[1]]
				
				L<-make_mvl_object(obj, ofs)
				
				if(!ref && (length(L)<MVL_SMALL_LENGTH || recurse) )L<-mvl_read_object(L, unclass(L)[["offset"]], recurse=recurse, ref=ref, raw=raw)
				
				return(L)
				}
			d<-obj2[["metadata"]][["dim"]]
			if(is.null(d))d<-length(obj)
			i<-1:(d[1])
			}
		if(is.logical(i)) {
			i<-which(i)
			}
		if(raw)
			ofs<-.Call(read_vectors_idx_raw2, obj2[["handle"]], obj2[["offset"]], j)[[1]]
			else
			ofs<-.Call(read_vectors_idx3, obj2[["handle"]], obj2[["offset"]], j)[[1]]
			
		df<-lapply(ofs, function(x){class(x)<-"MVL_OFFSET" ; return(mvl_read_object(obj, x, idx=list(i)))})
				
		names(df)<-n
		class(df)<-"data.frame"
		if(dim(df)[2]==1 && !is.null(drop) && drop)return(df[,1])
		
		if(length(i)>0) {
			rn<-obj2[["metadata"]][["rownames"]]
			if(inherits(rn, "MVL_OBJECT")) {
				rn<-mvl_flatten_string(rn[i, recurse=TRUE])
				rownames(df)<-rn
				} else {
				rownames(df)<-1:(length(i))
				}
			}
		return(df)
		}
	if(obj2[["bracket_dispatch"]]==2) {
		od<-obj2[["metadata"]][["dim"]]
		if(is.null(od))od<-obj2[["length"]]
		
		if(missing(i)) {
			d<-od[1]
			idx<-1:(od[1])
			} else {
			if(is.logical(i))i<-which(i)
			d<-length(i)
			idx<-i
			}
		idx<-idx-1
		mult<-1
		
		if(...length()+1!=length(od))stop("Array dimension is ", length(od), " but ", ...length()+1, " indices given")
		
		if(...length()>0) {
			for(j in 1:...length()) {
				ii<-NULL
				try({ii<-...elt(j)}, silent=TRUE)
				if(is.null(ii)) {
					d<-c(d, od[j+1])
					ii<-1:od[j+1]
					} else {
					if(is.logical(ii))ii<-which(ii)
					d<-c(d, length(ii))
					}
				mult<-mult*od[j]
				idx<-outer(idx, (ii-1)*mult, FUN="+")
				}
			}
		if(raw)
			vec<-.Call(read_vectors_idx_raw2, obj2[["handle"]], obj2[["offset"]], idx+1)[[1]]
			else
			vec<-.Call(read_vectors_idx3, obj2[["handle"]], obj2[["offset"]], idx+1)[[1]]
		
		if(obj2[["values_fixup"]]==1 && !raw) {
			F<-vec==255
			vec<-as.logical(vec)
			vec[F]<-NA
			}
		
		if(is.null(drop) || drop==TRUE) {
			d<-d[d!=1]
			if(length(d)>0)dim(vec)<-d
			} else
			dim(vec)<-d
		return(vec)
		}
	if(obj2[["bracket_dispatch"]]==3) {
		if(...length()==0) {
	# 		if(is.logical(i)) {
	# 			i<-which(i)
	# 			}
			nn<-obj2[["metadata"]][["names"]]
			
			if(is.factor(i))i<-as.character(i)
			if(is.character(i)) {
				if(is.null(nn))stop("Object has no names")
				i<-match(i, nn)
				}
	#		if(is.numeric(i)) 
				{
				if(raw)
					vec<-.Call(read_vectors_idx_raw2, obj2[["handle"]], obj2[["offset"]], i)[[1]]
					else
					vec<-.Call(read_vectors_idx3, obj2[["handle"]], obj2[["offset"]], i)[[1]]
	
				if(obj2[["values_fixup"]]==1 && !raw) {
					F<-vec==255
					vec<-as.logical(vec)
					vec[F]<-NA
					}

				if(inherits(vec, "MVL_OFFSET")) {
					if(length(vec)==1 && ref) {
						vec<-make_mvl_object(obj, vec)
						} else
					if(recurse) {
						vec<-lapply(vec, function(x) {class(x)<-"MVL_OFFSET" ; return(mvl_read_object(obj, x, recurse=recurse, ref=ref, raw=raw)) })
						} else {
						lengths<-.Call(read_lengths, obj2[["handle"]], vec)
						vec<-lapply(1:length(vec), function(i) {
							x<-vec[i] 
							class(x)<-"MVL_OFFSET"
							if(lengths[i]<MVL_SMALL_LENGTH) {
								return(mvl_read_object(obj, x, recurse=recurse, ref=ref, raw=raw))
								} else {
								return(make_mvl_object(obj, x))
								}
							})
						}
					}
				if(!is.null(nn))names(vec)<-nn[i]
#				if(drop && length(vec)==1)vec<-unlist(vec)
				return(vec)
				}
			} else {
			stop("Too many indices")
			}
		stop("Cannot process ", obj)
		}
	stop("Cannot process ", obj)
	}
	
# We are exporting plain function as well, so one can list its source code from command line
#' MVL object subscription operator
#'
#' Retrieve objects stored in mappable vector library. Large nested objects are returned as instances of MVL_OBJECT to delay access until needed.
#'
#' See \code{mvl_open} for example.
#'
#' @param obj - MVL object retrieved by subscription of MVL library or other objects
#' @param i - index.
#' @param raw - request to return data in raw format when it does not map exactly to R data types. 
#' @param recurse - force recursive conversion to pure R objects. 
#' @param ref - always return an MVL_OBJECT
#' @return Stored object
#' @export [[.MVL_OBJECT
#' @export
`[[.MVL_OBJECT`<-function(obj, i, raw=FALSE, recurse=FALSE, ref=FALSE) {
	obj2<-unclass(obj)
	if(length(i)!=1)stop("You can only select one element in vector index")
	if(is.factor(i))i<-as.character(i)
	if(is.character(i)) {
		nn<-obj2[["metadata"]][["names"]]
		if(is.null(nn))stop("Object has no names")
		#i<-which.max(obj2[["metadata"]][["names"]]==i)
		i<-match(i, nn)
		}
	if(is.na(i)) {
		# R behaviour is mixed in this situation
		# For lists R returns empty list, but (1:5)[[NA]] throws an exception
		# It would not be unreasonable to think that vec[[NA]] should be NA
		# On the other hand, subscripting with NA is inefficient, and throwing an exception
		# forces to filter out NAs first
		# For now, throw an exception
		stop("NA subscript is out of bounds")
		return(NA)
		}
	if(raw)
		vec<-.Call(read_vectors_idx_raw2, obj2[["handle"]], obj2[["offset"]], i)[[1]]
		else
		vec<-.Call(read_vectors_idx3, obj2[["handle"]], obj2[["offset"]], i)[[1]]

	if(obj2[["values_fixup"]]==1) {
		F<-vec==255
		vec<-as.logical(vec)
		vec[F]<-NA
		}

	if(inherits(vec, "MVL_OFFSET")) {
		if(length(vec)==1 && ref) {
			vec<-make_mvl_object(obj, vec)
			} else
		if(recurse) {
			vec<-lapply(vec, function(x) {class(x)<-"MVL_OFFSET" ; return(mvl_read_object(obj, x, recurse=recurse, ref=ref, raw=raw)) })
			} else {
			lengths<-.Call(read_lengths, obj2[["handle"]], vec)
			if(lengths[1]<MVL_SMALL_LENGTH) {
					return(mvl_read_object(obj, vec, recurse=recurse, ref=ref, raw=raw))
					} else {
					return(make_mvl_object(obj, vec))
					}
			}
		}
	
	return(vec)
	}
	
# #' Print summary information of MVL_INDEX
# #' 
# #' @param obj MVL_INDEX object
# #' @param \ldots not used.
# #' @return invisible(obj)
# #'
# #' @export
print.MVL_INDEX<-function(obj, ...) {
	obj2<-unclass(obj)
	obj2$metadata$class<-"MVL_OBJECT"
	class(obj2)<-"MVL_OBJECT"
	index_type<-obj2["index_type"]
	if(index_type==1) {
		vec_types<-unlist(mvl2R(obj2["vec_types"]))
		cat("MVL_INDEX(extent index using ", length(vec_types), " column",ifelse(length(vec_types)>1, "s", ""),": ", paste(unlist(lapply(vec_types, mvl_type_name)), collapse=","), ")\n", sep="")
		return(invisible(obj))
		}
	if(index_type==2) {
		vec_bits<-unlist(mvl2R(obj2["bits"]))
		cat("MVL_INDEX(spatial_index1 using ", length(vec_bits), " column",ifelse(length(vec_bits)>1, "s", ""),")\n", sep="")
		return(invisible(obj))
		}
	cat("MVL_INDEX(unknown index type)\n")
	return(invisible(obj))
	}
	
#' Apply function to indices of nearby rows
#'
#' This function is passed the index computed by \code{mvl_write_spatial_index1} or \code{mvl_write_extent_index} and a list of vectors, which are interpreted in a data frame fashion, or an R data.frame.
#' For each row we retrieve that set of indices that matches it and call function fn(i, idx) with index i of row being processed and vector idx listing matched indices.
#'
#' The notion of "matched indices" is specific to the type of index being used.
#'
#' For an index created with \code{mvl_write_spatial_index1} we return the indices of nearby rows. The user should apply an additional cut to narrow down to actual indices needed.
#'
#' For an index created with \code{mvl_write_extent_index} we return the indices of rows with identical hashes. Even though 64-bit hashes produce very few collisions, it is recommended to apply additional cut to ensure that only the exactly matching rows are returned.
#'
#' @param index  MVL_OBJECT computed by \code{mvl_write_spatial_index1} or \code{mvl_write_extent_index} 
#' @param data_list  a list of vectors of equal length. They can be MVL_OBJECTs or R vectors, or a data.fame.
#' @param fn a function of two arguments - and index into \code{data_list} and a corresponding list of indices
#' @return a list of results of function \code{fn}
#' @seealso \code{\link{mvl_group}}
#'  
#' @examples
#' \dontrun{
#' Mtmp<-mvl_open("tmp_a.mvl", append=TRUE, create=TRUE)
#' mvl_write_object(Mtmp, data.frame(x=runif(100), y=1:100), "df1")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_write_spatial_index1(Mtmp, list(Mtmp$df1[,"x",ref=TRUE], Mtmp$df1[,"y", ref=TRUE]),
#'                                                                c(2, 3), "df1_sp_groups")
#' Mtmp<-mvl_remap(Mtmp)
#' mvl_index_lapply(Mtmp["df1_sp_groups", ref=TRUE], list(c(0.5, 0.6), c(2, 3)),
#'                                            function(i, idx) { return(list(i, idx))})
#' }
#' @export
#'
mvl_index_lapply<-function(index, data_list, fn) {
	index_type<-mvl2R(index["index_type"])
	if(index_type==1) {
		if(missing(data_list))
			L<-.Call(extent_index_scan, index, fn, new.env())
			else
			L<-.Call(extent_index_lapply, index, data_list, fn, new.env())
		return(L)
		}
	if(index_type==2) {
		if(missing(data_list))stop("Spatial index1 does not support full index scan")
		L<-.Call(neighbors_lapply, index, data_list, fn, new.env())
		return(L)
		}
	stop("Unrecognized index of type ", index_type)
	}
	
.onUnload <- function (libpath) {
  library.dynam.unload("RMVL", libpath)
}

Try the RMVL package in your browser

Any scripts or data that you put into this service are public.

RMVL documentation built on Nov. 2, 2023, 6:09 p.m.