Description Usage Arguments Details Value See Also Examples
h5mread
is the result of experimenting with alternative
rhdf5::h5read
implementations.
It should still be considered experimental!
1 2 3 4 | h5mread(filepath, name, starts=NULL, counts=NULL, noreduce=FALSE,
as.integer=FALSE, as.sparse=FALSE, method=0L)
get_h5mread_returned_type(filepath, name, as.integer=FALSE)
|
filepath |
The path (as a single string) to the HDF5 file where the dataset to read from is located. |
name |
The name of the dataset in the HDF5 file. |
starts, counts |
If If Each list element in If Finally note that when |
noreduce |
TODO |
as.integer |
TODO |
as.sparse |
TODO |
method |
TODO |
COMING SOON...
An array for h5mread
.
The type of the array that will be returned by h5mread
for
get_h5mread_returned_type
.
Equivalent to:
1 2 |
where ndim
is the number of dimensions (a.k.a. the rank
in HDF5 jargon) of the dataset. get_h5mread_returned_type
is
provided for convenience.
h5read
in the rhdf5 package.
type
in the DelayedArray
package.
extract_array
in the DelayedArray
package.
The TENxBrainData
dataset (in the
TENxBrainData package).
h5mread_from_reshaped
to read data from a virtually
reshaped HDF5 dataset.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | ## ---------------------------------------------------------------------
## BASIC USAGE
## ---------------------------------------------------------------------
m0 <- matrix((runif(600) - 0.5) * 10, ncol=12)
M0 <- writeHDF5Array(m0, name="M0")
m <- h5mread(path(M0), "M0")
stopifnot(identical(m0, m))
m <- h5mread(path(M0), "M0", starts=list(NULL, c(3, 12:8)))
stopifnot(identical(m0[ , c(3, 12:8)], m))
m <- h5mread(path(M0), "M0", starts=list(integer(0), c(3, 12:8)))
stopifnot(identical(m0[NULL , c(3, 12:8)], m))
m <- h5mread(path(M0), "M0", starts=list(1:5, NULL), as.integer=TRUE)
storage.mode(m0) <- "integer"
stopifnot(identical(m0[1:5, ], m))
a0 <- array(1:350, c(10, 5, 7))
A0 <- writeHDF5Array(a0, filepath=path(M0), name="A0")
h5ls(path(A0))
a <- h5mread(path(A0), "A0", starts=list(c(2, 7), NULL, 6),
counts=list(c(4, 2), NULL, NULL))
stopifnot(identical(a0[c(2:5, 7:8), , 6, drop=FALSE], a))
## Load the data in a sparse array representation:
m1 <- matrix(c(5:-2, rep.int(c(0L, 99L), 11)), ncol=6)
M1 <- writeHDF5Array(m1, name="M1", chunkdim=c(3L, 2L))
index <- list(5:3, NULL)
m <- h5mread(path(M1), "M1", starts=index)
sas <- h5mread(path(M1), "M1", starts=index, as.sparse=TRUE)
class(sas) # SparseArraySeed object (see ?SparseArraySeed)
as(sas, "dgCMatrix")
stopifnot(identical(m, sparse2dense(sas)))
## ---------------------------------------------------------------------
## PERFORMANCE
## ---------------------------------------------------------------------
library(ExperimentHub)
hub <- ExperimentHub()
## With the "sparse" TENxBrainData dataset
## ---------------------------------------
fname0 <- hub[["EH1039"]]
h5ls(fname0) # all datasets are 1D datasets
index <- list(77 * sample(34088679, 5000, replace=TRUE))
## h5mread() is about 4x faster than h5read():
system.time(a <- h5mread(fname0, "mm10/data", index))
system.time(b <- h5read(fname0, "mm10/data", index=index))
stopifnot(identical(a, b))
index <- list(sample(1306127, 7500, replace=TRUE))
## h5mread() is about 20x faster than h5read():
system.time(a <- h5mread(fname0, "mm10/barcodes", index))
system.time(b <- h5read(fname0, "mm10/barcodes", index=index))
stopifnot(identical(a, b))
## With the "dense" TENxBrainData dataset
## --------------------------------------
fname1 <- hub[["EH1040"]]
h5ls(fname1) # "counts" is a 2D dataset
index <- list(sample( 27998, 250),
sample(1306127, 250))
## h5mread() is about 2x faster than h5read():
system.time(a <- h5mread(fname1, "counts", index))
system.time(b <- h5read(fname1, "counts", index=index))
stopifnot(identical(a, b))
## Alternatively 'as.sparse=TRUE' can be used to reduce memory usage:
system.time(sas <- h5mread(fname1, "counts", index, as.sparse=TRUE))
stopifnot(identical(a, sparse2dense(sas)))
## The bigger the selection, the greater the speedup between
## h5read() and h5mread():
## Not run:
index <- list(sample( 27998, 1000),
sample(1306127, 1000))
## h5mread() about 8x faster than h5read() (20s vs 2m30s):
system.time(a <- h5mread(fname1, "counts", index))
system.time(b <- h5read(fname1, "counts", index=index))
stopifnot(identical(a, b))
## With 'as.sparse=TRUE' (about the same speed as with 'as.sparse=FALSE'):
system.time(sas <- h5mread(fname1, "counts", index, as.sparse=TRUE))
stopifnot(identical(a, sparse2dense(sas)))
## End(Not run)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.