Nothing
#' @title Create or load 'lazyarray' instance
#' @description If path is missing, create a new array. If path exists and
#' meta file is complete, load existing file, otherwise create new meta file
#' and import from existing data.
#' @author Zhengjia Wang
#' @seealso \code{\link{create_lazyarray}}, \code{\link{load_lazyarray}}
#' @param path path to a local drive where array data is stored
#' @param file_names partition names without prefix nor extension; see details
#' @param storage_format data type, choices are \code{"double"},
#' \code{"integer"}, \code{"character"}, and \code{"complex"}; see details
#' @param dim integer vector, dimension of array, see \code{\link{dim}}
#' @param dimnames list of vectors, names of each dimension, see \code{\link{dimnames}}
#' @param multipart whether to split array into multiple partitions, default is true
#' @param prefix character prefix of array partition
#' @param multipart_mode 1, or 2, mode of partition, see \code{\link{create_lazyarray}}
#' @param compress_level 0 to 100, level of compression. 0 means
#' no compression, 100 means maximum compression. For persistent data,
#' it's recommended to set 100. Default is 50.
#' @param meta_name header file name, default is \code{"lazyarray.meta"}
#' @param read_only whether created array is read-only
#' @param quiet whether to suppress messages, default is false
#' @param ... ignored
#'
#' @details There are three cases and \code{lazyarray} behaves differently
#' under each cases. Case 1: if \code{path} is missing, then the function calls
#' \code{\link{create_lazyarray}} to create a blank array instance. Case 2:
#' if \code{path} exists and it contains \code{meta_name}, then load existing
#' instance with given read/write access. In this case, parameters other than
#' \code{read_only}, \code{path}, \code{meta_name} will be ignored. Case 3: if
#' \code{meta_name} is missing and \code{path} is missing, then \code{lazyarray}
#' will try to create arrays from existing data files.
#'
#' If \code{lazyarray} enters case 3, then \code{file_names} will be used to
#' locate partition files. Under multi-part mode (\code{multipart=TRUE}),
#' \code{file_names} is default to 1, 2, ..., \code{dim[length(dim)]}. These
#' correspond to \code{'1.fst'}, \code{'2.fst'}, etc. under \code{path} folder.
#' You may specify your own \code{file_names} if irregular names are used.
#' and file format for each partition will be \code{<prefix><file_name>.fst}.
#' For example, a file name \code{file_names=c('A', 'B')} and
#' \code{prefix="file-"} means the first partition will be stored as
#' \code{"file-A.fst"}, and \code{"file-B.fst"}. It's fine if some files are
#' missing, the corresponding partition will be filled with \code{NA} when
#' trying to obtain values from those partition. However, length of
#' \code{file_names} must equals to the last dimension when
#' \code{multipart=TRUE}. If \code{multipart=FALSE}, \code{file_names} should
#' have length 1 and the corresponding file is the data file.
#'
#' It's worth note to import from existing partition files generated by
#' other packages such as \code{'fst'}, the partition files must be homogeneous,
#' meaning the stored data length, dimension, and storage type must be the same.
#' Because \code{'fstcore'} package stores data in data frame internally,
#' the column name must be 'V1', 'V2', etc. for non-complex elements or
#' 'V1R', 'V1I', ... for complex numbers (real and imaginary data are stored
#' in different columns).
#'
#' @examples
#'
#' path <- tempfile()
#'
#' # ---------------- case 1: Create new array ------------------
#' arr <- lazyarray(path, storage_format = 'double', dim = c(2,3,4),
#' meta_name = 'lazyarray.meta')
#' arr[] <- 1:24
#'
#' # Subset and get the first partition
#' arr[,,1]
#'
#' # Partition file path (total 4 partitions)
#' arr$get_partition_fpath()
#'
#' # Removing array doesn't clear the data
#' rm(arr); gc()
#'
#' # ---------------- Case 2: Load from existing directory ----------------
#' ## Important!!! Run case 1 first
#' # Load from existing path, no need to specify other params
#' arr <- lazyarray(path, meta_name = 'lazyarray.meta', read_only = TRUE)
#'
#' arr[,,1]
#'
#' # ---------------- Case 3: Import from existing data ----------------
#' ## Important!!! Run case 1 first
#'
#' # path exists, but meta is missing, all other params are required
#' # Notice the partition count increased from 4 to 5, and storage type converts
#' # from double to character
#' arr <- lazyarray(path = path, meta_name = 'lazyarray-character.meta',
#' file_names = c(1,2,3,4,'additional'),
#' storage_format = 'character', dim = c(2,3,5),
#' quiet = TRUE, read_only = FALSE)
#'
#' # partition names
#' arr$get_partition_fpath(1:4, full_path = FALSE)
#' arr$get_partition_fpath(5, full_path = FALSE)
#'
#' # The first dimension still exist and valid
#' arr[,,1]
#'
#' # The additional partition is all NA
#' arr[,,5]
#'
#' # Set data to 5th partition
#' arr[,,5] <- rep(0, 6)
#'
#' # -------- Advanced usage: create fst data and import manually --------
#'
#' # Clear existing files
#' path <- tempfile()
#' unlink(path, recursive = TRUE)
#' dir.create(path, recursive = TRUE)
#'
#' # Create array of dimension 2x3x4, but 3rd partition is missing
#' # without using lazyarray package
#'
#' # Column names must be V1 or V1R, V1I (complex)
#' fst::write_fst(data.frame(V1 = 1:6), path = file.path(path, 'part-1.fst'))
#' fst::write_fst(data.frame(V1 = 7:12), path = file.path(path, 'part-B.fst'))
#' fst::write_fst(data.frame(V1 = 19:24), path = file.path(path, 'part-d.fst'))
#'
#' # Import via lazyarray
#' arr <- lazyarray(path, meta_name = 'test-int.meta',
#' storage_format = 'integer',
#' dim = c(2,3,4), prefix = 'part-',
#' file_names = c('1', 'B', 'C', 'd'),
#' quiet = TRUE)
#'
#' arr[]
#'
#' # Complex case
#' fst::write_fst(data.frame(V1R = 1:6, V1I = 1:6),
#' path = file.path(path, 'cplx-1.fst'))
#' fst::write_fst(data.frame(V1R = 7:12, V1I = 100:105),
#' path = file.path(path, 'cplx-2.fst'))
#' fst::write_fst(data.frame(V1R = 19:24, V1I = rep(0,6)),
#' path = file.path(path, 'cplx-4.fst'))
#' arr <- lazyarray(path, meta_name = 'test-cplx.meta',
#' storage_format = 'complex',
#' dim = c(2,3,4), prefix = 'cplx-',
#' file_names = 1:4, quiet = TRUE)
#'
#' arr[]
#'
#' @export
lazyarray <- function(
path, storage_format, dim, dimnames = NULL,
multipart = TRUE, prefix = "",
multipart_mode = 1, compress_level = 50L,
file_names = list('', seq_len(dim[[length(dim)]]))[[multipart + 1]],
meta_name = 'lazyarray.meta',
read_only = FALSE, quiet = FALSE, ...
){
if(file.exists(path) && !dir.exists(path)){
stop('lazyarray path must be a directory path, but a file was found.')
}
if(!dir.exists(path)){
# not exists, create a new one
arr <- create_lazyarray(
path = path, storage_format = storage_format, dim = dim,
dimnames = dimnames, compress_level = compress_level, prefix = prefix,
multipart = multipart, multipart_mode = multipart_mode,
file_names = file_names, meta_name = meta_name)
if(read_only){
arr <- load_lazyarray(path = path, read_only = TRUE,
meta_name = meta_name)
}
return(arr)
}
# path exists, locate meta_name
if(file.exists(file.path(path, meta_name))){
arr <- load_lazyarray(path = path, read_only = read_only,
meta_name = meta_name)
return(arr)
}
if(!quiet){
message('meta file not found, create one with existing files')
}
# library(raveio)
# ts <- lapply(1:20, function(ii){
# Tensor$new(
# data = 1:9000, c(30,300,1),
# dimnames = list(A = 1:30, B = 1:300, C = ii),
# varnames = c('A', 'B', 'C'), use_index = 2, temporary = FALSE)
# })
# file_names <- sapply(ts, '[[', 'swap_file')
# path <- stringr::str_split(file_names[[1]], '/file|\\.fst', simplify = TRUE)[,1]
# file_names <- stringr::str_split(file_names, '/file|\\.fst', simplify = TRUE)[,2]
# prefix <- 'file'
# join_tensors(ts, temporary = FALSE)
# Otherwise meta_name does not exist
if(multipart){
if(length(file_names) != dim[[length(dim)]]){
stop('path exists, but cannot find meta file. Please specify file_names\n',
' See ', sQuote('?lazyarray'), ' for more details')
}
path <- normalizePath(path)
fs <- file.path(path, sprintf('%s%s.fst', prefix, file_names))
fe <- file.exists(fs)
if(any(fe)){
fs <- fs[fe]
ds <- sapply(fs, function(f){
# f=ts[[1]]$swap_file
tryCatch({
meta <- cpp_fst_meta_orig(normalizePath(f))
if(inherits(meta, 'fst_error')){ stop(meta) }
meta
}, error = function(e){
stop('Cannot open array file(s). \n ', f)
})
c(meta$nrOfCols, meta$nrOfRows)
}, USE.NAMES = FALSE)
ds <- unique(t(ds))
if(length(ds) != 2){
stop('All existing files must be homogeneous')
}
mp_dim <- dim[-length(dim)]
len1 <- prod(mp_dim)
len2 <- ds[[1]] * ds[[2]]
if(storage_format == 'complex'){
len1 <- len1 * 2
}
if(len1 != len2){
stop('Dimension provided does not match with existing files')
}
if(multipart_mode == 1){
last_d <- ds[[1]]
if(storage_format != 'complex'){ last_d <- last_d * 2 }
if(last_d != 2){
stop('Multipart mode=1, partition dimension should be ',
paste(mp_dim, collapse = 'x'),
'x1, but invalid dimension found.')
}
part_dimension <- c(mp_dim, 1)
} else {
part_dimension <- mp_dim
}
} else {
# no file exists, new data?
if( multipart_mode == 1 ){
part_dimension <- dim
part_dimension[length(dim)] <- 1
} else if(multipart_mode == 2){
part_dimension <- dim[-length(dim)]
}
}
} else {
if(length(file_names) == 0){
file_names <- ''
}
if(length(file_names) != 1){
stop('path exists, but cannot find meta file. Please specify file_names\n',
' See ', sQuote('?lazyarray'), ' for more details')
}
path <- normalizePath(path)
f <- file.path(path, sprintf('%s%s.fst', prefix, file_names))
meta <- tryCatch({
meta <- cpp_fst_meta_orig(f)
if(inherits(meta, 'fst_error')){
stop(meta)
}
meta
}, error = function(e){
stop('Cannot open array file(s). \n ', f)
})
last_dim <- meta$nrOfCols
prev_dim <- meta$nrOfRows
if(last_dim != dim[length(dim)] || (last_dim * prev_dim != prod(dim))){
stop(sprintf(
'Array dimension not match, expected last dimension to be %d and total length %d, but last dim(%d) and length(%d) is given',
last_dim, prev_dim * last_dim, dim[[length(dim)]], prod(dim)
))
}
part_dimension <- dim
}
# make a meta file
meta <- list(
lazyarray_version = 0,
file_format = 'fst',
storage_format = storage_format,
dim = dim,
dimnames = dimnames,
partitioned = multipart,
prefix = prefix,
part_dimension = part_dimension,
postfix = '.fst',
compress_level = compress_level,
file_names = file_names
)
meta_path <- file.path(path, meta_name)
save_yaml(meta, meta_path)
ClassLazyArray$new(path = path, read_only = read_only, meta_name = meta_name)
}
#' Automatically remove array data
#' @author Zhengjia Wang
#' @description Remove the files containing array data once no
#' 'lazyarray' instance is using the folder. Require
#' installation of \code{dipsaus} package (at least version 0.0.8).
#' @param x 'lazyarray' instance
#' @param onexit passed to \code{\link{reg.finalizer}}
#'
#' @details \code{auto_clear_lazyarray} attempts to remove the entire folder
#' containing array data. However, if some files are not created by the
#' array, only partition data and meta file will be removed, all the
#' artifacts will remain and warning will be displayed. One exception is
#' if all files left in the array directory are \code{*.meta} files,
#' all these meta files will be removed along with the folder.
#'
#' @examples
#'
#' path <- tempfile()
#' arr_dbl <- lazyarray(path, storage_format = 'double',
#' dim = 2:4, meta_name = 'meta-dbl.meta')
#' arr_dbl[] <- 1:24
#' auto_clear_lazyarray(arr_dbl)
#'
#' arr_chr <- lazyarray(path, storage_format = 'character',
#' dim = 2:4, meta_name = 'meta-chr.meta',
#' quiet = TRUE)
#' auto_clear_lazyarray(arr_chr)
#'
#' # remove either one, the directory still exists
#' rm(arr_dbl); invisible(gc(verbose = FALSE))
#'
#' arr_chr[1,1,1]
#'
#' # Remove the other one, and path will be removed
#' rm(arr_chr); invisible(gc(verbose = FALSE))
#'
#' dir.exists(path)
#' arr_check <- lazyarray(path, storage_format = 'character',
#' dim = 2:4, meta_name = 'meta-chr',
#' quiet = TRUE)
#'
#' # data is removed, so there should be no data (NAs)
#' arr_check[]
#'
#' @export
auto_clear_lazyarray <- function(x, onexit = FALSE){
if(requireNamespace('dipsaus', quietly = TRUE)){
path <- dirname(x$storage_path)
path <- normalizePath(path)
dipsaus::shared_finalizer(x, key = path, function(e){
e$remove_data(force = TRUE)
}, onexit = onexit)
rm(path)
}
rm(x, onexit)
invisible()
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.