MsBackend | R Documentation |
Note that the classes described here are not meant to be used directly by the end-users and the material in this man page is aimed at package developers.
MsBackend
is a virtual class that defines what each different
backend needs to provide. MsBackend
objects provide access to
mass spectrometry data. Such backends can be classified into
in-memory or on-disk backends, depending on where the data, i.e
spectra (m/z and intensities) and spectra annotation (MS level,
charge, polarity, ...) are stored.
Typically, in-memory backends keep all data in memory ensuring fast data access, while on-disk backends store (parts of) their data on disk and retrieve it on demand.
The Backend functions and implementation notes for new backend classes section documents the API that a backend must implement.
Currently available backends are:
MsBackendMemory
and MsBackendDataFrame
: store all data in memory. The
MsBackendMemory
is optimized for accessing and processing the peak data
(i.e. the numerical matrices with the m/z and intensity values) while the
MsBackendDataFrame
keeps all data in a DataFrame
.
MsBackendMzR
: stores the m/z and intensities on-disk in raw
data files (typically mzML
or mzXML
) and the spectra
annotation information (header) in memory in a DataFrame
. This
backend requires the mzR
package.
MsBackendHdf5Peaks
: stores the m/z and intensities on-disk in custom hdf5
data files and the remaining spectra variables in memory (in a
DataFrame
). This backend requires the rhdf5
package.
See below for more details about individual backends.
## S4 method for signature 'MsBackend'
backendBpparam(object, BPPARAM = bpparam())
## S4 method for signature 'MsBackend'
backendInitialize(object, ...)
## S4 method for signature 'list'
backendMerge(object, ...)
## S4 method for signature 'MsBackend'
backendMerge(object, ...)
## S4 method for signature 'MsBackend'
backendParallelFactor(object, ...)
## S4 method for signature 'MsBackend'
export(object, ...)
## S4 method for signature 'MsBackend'
acquisitionNum(object)
## S4 method for signature 'MsBackend'
peaksData(object, columns = c("mz", "intensity"))
## S4 method for signature 'MsBackend'
peaksVariables(object)
## S4 method for signature 'MsBackend,dataframeOrDataFrameOrmatrix'
cbind2(x, y = data.frame(), ...)
## S4 method for signature 'MsBackend'
centroided(object)
## S4 replacement method for signature 'MsBackend'
centroided(object) <- value
## S4 method for signature 'MsBackend'
collisionEnergy(object)
## S4 replacement method for signature 'MsBackend'
collisionEnergy(object) <- value
## S4 method for signature 'MsBackend'
dataOrigin(object)
## S4 replacement method for signature 'MsBackend'
dataOrigin(object) <- value
## S4 method for signature 'MsBackend'
dataStorage(object)
## S4 replacement method for signature 'MsBackend'
dataStorage(object) <- value
## S4 method for signature 'MsBackend'
dropNaSpectraVariables(object)
## S4 method for signature 'MsBackend,ANY'
extractByIndex(object, i)
## S4 method for signature 'MsBackend,missing'
extractByIndex(object, i)
## S4 method for signature 'MsBackend'
filterAcquisitionNum(object, n, file, ...)
## S4 method for signature 'MsBackend'
filterDataOrigin(object, dataOrigin = character())
## S4 method for signature 'MsBackend'
filterDataStorage(object, dataStorage = character())
## S4 method for signature 'MsBackend'
filterEmptySpectra(object, ...)
## S4 method for signature 'MsBackend'
filterIsolationWindow(object, mz = numeric(), ...)
## S4 method for signature 'MsBackend'
filterMsLevel(object, msLevel = integer())
## S4 method for signature 'MsBackend'
filterPolarity(object, polarity = integer())
## S4 method for signature 'MsBackend'
filterPrecursorMzRange(object, mz = numeric())
## S4 method for signature 'MsBackend'
filterPrecursorMz(object, mz = numeric())
## S4 method for signature 'MsBackend'
filterPrecursorMzValues(object, mz = numeric(), ppm = 20, tolerance = 0)
## S4 method for signature 'MsBackend'
filterPrecursorCharge(object, z = integer())
## S4 method for signature 'MsBackend'
filterPrecursorScan(object, acquisitionNum = integer(), f = dataOrigin(object))
## S4 method for signature 'MsBackend'
filterRanges(
object,
spectraVariables = character(),
ranges = numeric(),
match = c("all", "any")
)
## S4 method for signature 'MsBackend'
filterRt(object, rt = numeric(), msLevel. = uniqueMsLevels(object))
## S4 method for signature 'MsBackend'
filterValues(
object,
spectraVariables = character(),
values = numeric(),
ppm = 0,
tolerance = 0,
match = c("all", "any")
)
## S4 method for signature 'MsBackend'
intensity(object)
## S4 replacement method for signature 'MsBackend'
intensity(object) <- value
## S4 method for signature 'MsBackend'
ionCount(object)
## S4 method for signature 'MsBackend'
isCentroided(object, ...)
## S4 method for signature 'MsBackend'
isEmpty(x)
## S4 method for signature 'MsBackend'
isolationWindowLowerMz(object)
## S4 replacement method for signature 'MsBackend'
isolationWindowLowerMz(object) <- value
## S4 method for signature 'MsBackend'
isolationWindowTargetMz(object)
## S4 replacement method for signature 'MsBackend'
isolationWindowTargetMz(object) <- value
## S4 method for signature 'MsBackend'
isolationWindowUpperMz(object)
## S4 replacement method for signature 'MsBackend'
isolationWindowUpperMz(object) <- value
## S4 method for signature 'MsBackend'
isReadOnly(object)
## S4 method for signature 'MsBackend'
length(x)
## S4 method for signature 'MsBackend'
msLevel(object)
## S4 replacement method for signature 'MsBackend'
msLevel(object) <- value
## S4 method for signature 'MsBackend'
mz(object)
## S4 replacement method for signature 'MsBackend'
mz(object) <- value
## S4 method for signature 'MsBackend'
lengths(x, use.names = FALSE)
## S4 method for signature 'MsBackend'
polarity(object)
## S4 replacement method for signature 'MsBackend'
polarity(object) <- value
## S4 method for signature 'MsBackend'
precScanNum(object)
## S4 method for signature 'MsBackend'
precursorCharge(object)
## S4 method for signature 'MsBackend'
precursorIntensity(object)
## S4 method for signature 'MsBackend'
precursorMz(object)
## S4 replacement method for signature 'MsBackend'
precursorMz(object, ...) <- value
## S4 replacement method for signature 'MsBackend'
peaksData(object) <- value
## S4 method for signature 'MsBackend'
reset(object)
## S4 method for signature 'MsBackend'
rtime(object)
## S4 replacement method for signature 'MsBackend'
rtime(object) <- value
## S4 method for signature 'MsBackend'
scanIndex(object)
## S4 method for signature 'MsBackend'
selectSpectraVariables(object, spectraVariables = spectraVariables(object))
## S4 method for signature 'MsBackend'
smoothed(object)
## S4 replacement method for signature 'MsBackend'
smoothed(object) <- value
## S4 method for signature 'MsBackend'
spectraData(object, columns = spectraVariables(object))
## S4 replacement method for signature 'MsBackend'
spectraData(object) <- value
## S4 method for signature 'MsBackend'
spectraNames(object)
## S4 replacement method for signature 'MsBackend'
spectraNames(object) <- value
## S4 method for signature 'MsBackend'
spectraVariables(object)
## S4 method for signature 'MsBackend,ANY'
split(x, f, drop = FALSE, ...)
## S4 method for signature 'MsBackend'
supportsSetBackend(object, ...)
## S4 method for signature 'MsBackend'
tic(object, initial = TRUE)
## S4 method for signature 'MsBackend'
x[i, j, ..., drop = FALSE]
## S4 method for signature 'MsBackend'
x$name
## S4 replacement method for signature 'MsBackend'
x$name <- value
## S4 method for signature 'MsBackend'
x[[i, j, ...]]
## S4 replacement method for signature 'MsBackend'
x[[i, j, ...]] <- value
## S4 method for signature 'MsBackend'
uniqueMsLevels(object, ...)
## S4 method for signature 'MsBackend'
dataStorageBasePath(object)
## S4 replacement method for signature 'MsBackend'
dataStorageBasePath(object) <- value
MsBackendDataFrame()
## S4 method for signature 'MsBackendDataFrame'
backendInitialize(object, data, peaksVariables = c("mz", "intensity"), ...)
MsBackendHdf5Peaks()
MsBackendMemory()
## S4 method for signature 'MsBackendMemory'
backendInitialize(object, data, peaksVariables = c("mz", "intensity"), ...)
MsBackendMzR()
object |
Object extending |
BPPARAM |
for |
... |
Additional arguments. |
columns |
For |
x |
Object extending |
y |
For |
value |
replacement value for |
i |
For |
n |
for |
file |
For |
dataOrigin |
For |
dataStorage |
For |
mz |
For |
msLevel |
|
polarity |
For |
ppm |
For |
tolerance |
For |
z |
For |
acquisitionNum |
for |
f |
|
spectraVariables |
For |
ranges |
for |
match |
For |
rt |
for |
msLevel. |
same as |
values |
For |
use.names |
For |
drop |
For |
initial |
For |
j |
For |
name |
For |
data |
For |
peaksVariables |
For |
See documentation of respective function.
Backends extending MsBackend
must implement all of its methods (listed
above). Developers of new MsBackend
s should follow the
MsBackendMemory
implementation. To ensure a new implementation being
conform with the MsBackend
definition, developers should included test
suites provided by this package in their unit test setup. For that a variable
be
should be created in the package's "testthat.R"
file that represents
a (initialized) instance of the developed backend. Then the path to the
test suites should be defined with
test_suite <- system.file("test_backends", "test_MsBackend", package = "Spectra")
followed by test_dir(test_suite)
to run all test
files in that directory. Individual unit test files could be run with
test_file(file.path(test_suite, "test_spectra_variables.R"), stop_on_failure = TRUE)
(note that without stop_on_failure = TRUE
tests
would fail silently) . Adding this code to the packages "testthat.R"
file
ensures that all tests checking the validity of an MsBackend
instance
defined in the Spectra
package are also run on the newly develped backend
class.
The MsBackend
defines the following slots:
@readonly
: logical(1)
whether the backend supports writing/replacing
of m/z or intensity values.
Backends extending MsBackend
must implement all of its methods (listed
above). Developers of new MsBackend
s should follow the
MsBackendDataFrame
implementation.
The MsBackendCached()
backend provides a caching mechanism to allow
read only backends to add or change spectra variables. This
backend shouldn't be used on its own, but is meant to be extended. See
MsBackendCached()
for details.
The MsBackend
defines the following slots:
@readonly
: logical(1)
whether the backend supports writing/replacing
of m/z or intensity values.
New backend classes must extend the base MsBackend
class will have to
implement some of the following methods (see the MsBackend
vignette for
detailed description and examples):
[
: subset the backend. Only subsetting by element (row/i
) is
allowed. Parameter i
should support integer
indices and logical
and should throw an error if i
is out of bounds. The
MsCoreUtils::i2index
could be used to check the input i
.
For i = integer()
an empty backend should be returned. Implementation
of this method is optional, as the default calls the extractByIndex()
method (which has to be implemented as the main subsetting method).
$
, $<-
: access or set/add a single spectrum variable (column) in the
backend. Using a value
of NULL
should allow deleting the specified
spectra variable. An error should be thrown if the spectra variable is not
available.
[[
, [[<-
: access or set/add a single spectrum variable (column) in the
backend. The default implementation uses $
, thus these methods don't have
to be implemented for new classes extending MsBackend
.
acquisitionNum()
: returns the acquisition number of each
spectrum. Returns an integer
of length equal to the number of
spectra (with NA_integer_
if not available).
backendBpparam()
: return the parallel processing setup supported by
the backend class. This function can be used by any higher
level function to evaluate whether the provided parallel processing
setup (or the default one returned by bpparam()
) is supported
by the backend. Backends not supporting parallel processing (e.g.
because they contain a connection to a database that can not be
shared across processes) should extend this method to return only
SerialParam()
and hence disable parallel processing for (most)
methods and functions. See also backendParallelFactor()
for a
function to provide a preferred splitting of the backend for parallel
processing.
backendInitialize()
: initialises the backend. This method is
supposed to be called rights after creating an instance of the
backend class and should prepare the backend (e.g. set the data
for the memory backend or read the spectra header data for the
MsBackendMzR
backend). Parameters can be defined freely for each
backend, depending on what is needed to initialize the backend. It
is however suggested to also support a parameter data
that can be
used to submit the full spectra data as a DataFrame
to the
backend. This would allow the backend to be also usable for the
setBackend()
function from Spectra
. Note that eventually (for
read-only backends) also the supportsSetBackend
method would need
to be implemented to return TRUE
.
The backendInitialize()
method has also to ensure to correctly set
spectra variable dataStorage
.
backendMerge()
: merges (combines) MsBackend
objects into a single
instance. All objects to be merged have to be of the same type (e.g.
MsBackendDataFrame()
).
backendParallelFactor()
: returns a factor
defining an optimal
(preferred) way how the backend can be split for parallel processing
used for all peak data accessor or data manipulation functions.
The default implementation returns a factor of length 0 (factor()
)
providing thus no default splitting. backendParallelFactor()
for
MsBackendMzR
on the other hand returns factor(dataStorage(object))
hence suggesting to split the object by data file.
backendRequiredSpectraVariables()
: returns a character
with spectra
variable names that are mandatory for a specific backend. The default
returns an empty character()
. The implementation for MsBackendMzR
returns c("dataStorage", "scanIndex")
as these two spectra variables
are required to load the MS data on-the-fly. This method needs only to
be implemented if a backend requires specific variables to be defined.
dataOrigin()
: gets a character
of length equal to the number of
spectra in object
with the data origin of each spectrum. This could
e.g. be the mzML file from which the data was read.
dataStorage()
: gets a character
of length equal to the number of
spectra in object
with the data storage of each spectrum. Note that
missing values (NA_character_
) are not supported for dataStorage
.
dataStorageBasePath()
, dataStorageBasePath<-: gets or sets the common *base* path of the directory containing all data files. If supported, the function is expected to return (or accept) a
characterof length 1. Most backends (such as for example the
MsBackendMemorywill not support this function and
dataStorageBasePath()will return
NA_character_. For
MsBackendMzR, this function allows to get or change the path to the directory containing the original data files, which is required if e.g. a serialized
MsBackendMzR' instance gets copied to another computer or
file system.
dropNaSpectraVariables()
: removes spectra variables (i.e. columns in the
object's spectraData
that contain only missing values (NA
). Note that
while columns with only NA
s are removed, a spectraData()
call after
dropNaSpectraVariables()
might still show columns containing NA
values
for core spectra variables.
cbind2()
: allows to appends multiple new spectra variables to the
backend at once. The values for the new spectra variables have to
be in the same order as the spectra in x
. Replacing existing spectra
variables is not supported through this function. For a more controlled
way of adding spectra variables, the joinSpectraData()
should be used.
centroided()
, centroided<-
: gets or sets the centroiding
information of the spectra. centroided()
returns a logical
vector of length equal to the number of spectra with TRUE
if a
spectrum is centroided, FALSE
if it is in profile mode and NA
if it is undefined. See also isCentroided()
for estimating from
the spectrum data whether the spectrum is centroided. value
for centroided<-
is either a single logical
or a logical
of
length equal to the number of spectra in object
.
collisionEnergy()
, collisionEnergy<-
: gets or sets the
collision energy for all spectra in object
. collisionEnergy()
returns a numeric
with length equal to the number of spectra
(NA_real_
if not present/defined), collisionEnergy<-
takes a
numeric
of length equal to the number of spectra in object
.
export()
: exports data from a Spectra
class to a file. This method is
called by the export,Spectra
method that passes itself as a second
argument to the function. The export,MsBackend
implementation is thus
expected to take a Spectra
class as second argument from which all data
is exported. Taking data from a Spectra
class ensures that also all
eventual data manipulations (cached in the Spectra
's lazy evaluation
queue) are applied prior to export - this would not be possible with only a
MsBackend class. An example implementation is the export()
method
for the MsBackendMzR
backend that supports export of the data in
mzML or mzXML format. See the documentation for the MsBackendMzR
class below for more information.
extractByIndex()
: function to subset a backend to selected elements
defined by the provided index. Similar to [
, this method should allow
extracting (or to subset) the data in any order. In contrast to [
,
however, i
is expected to be an integer
(while [
should also
support logical
and eventually character
). While being apparently
redundant to [
, this methods avoids package namespace errors/problems
that can result in implementations of [
being not found by R (which
can happen sometimes in parallel processing using the
BiocParallel::SnowParam()
). This method is used internally by Spectra
to extract/subset its backend. Implementation of this method is mandatory.
filterAcquisitionNum()
: filters the object keeping only spectra matching
the provided acquisition numbers (argument n
). If dataOrigin
or
dataStorage
is also provided, object
is subsetted to the spectra with
an acquisition number equal to n
in spectra with matching dataOrigin
or dataStorage values retaining all other spectra.
filterDataOrigin()
: filters the object retaining spectra matching the
provided dataOrigin
. Parameter dataOrigin
has to be of type
character
and needs to match exactly the data origin value of the
spectra to subset.
filterDataOrigin()
should return the data ordered by the provided
dataOrigin
parameter, i.e. if dataOrigin = c("2", "1")
was provided,
the spectra in the resulting object should be ordered accordingly (first
spectra from data origin "2"
and then from "1"
).
Implementation of this method is optional since a default implementation
for MsBackend
is available.
filterDataStorage()
: filters the object retaining spectra matching the
provided dataStorage
. Parameter dataStorage
has to be of type
character
and needs to match exactly the data storage value of the
spectra to subset.
filterDataStorage()
should return the data ordered by the provided
dataStorage
parameter, i.e. if dataStorage = c("2", "1")
was provided,
the spectra in the resulting object should be ordered accordingly (first
spectra from data storage "2"
and then from "1"
).
Implementation of this method is optional since a default implementation
for MsBackend
is available.
filterEmptySpectra()
: removes empty spectra (i.e. spectra without peaks).
Implementation of this method is optional since a default implementation
for MsBackend
is available.
filterFile()
: retains data of files matching the file index or file name
provided with parameter file
.
filterIsolationWindow()
: retains spectra that contain mz
in their
isolation window m/z range (i.e. with an isolationWindowLowerMz
<=
mz
and isolationWindowUpperMz
>=
mz
.
Implementation of this method is optional since a default implementation
for MsBackend
is available.
filterMsLevel()
: retains spectra of MS level msLevel
.
Implementation of this method is optional since a default implementation
for MsBackend
is available.
filterPolarity()
: retains spectra of polarity polarity
.
Implementation of this method is optional since a default implementation
for MsBackend
is available.
filterPrecursorMzRange()
(previously filterPrecursorMz
): retains
spectra with a precursor m/z within the provided m/z range.
Implementation of this method is optional since a default implementation
for MsBackend
is available.
filterPrecursorMzValues()
: retains spectra with a precursor m/z matching
any of the provided m/z values (given ppm
and tolerance
).
Implementation of this method is optional since a default implementation
for MsBackend
is available.
filterPrecursorCharge()
: retains spectra with the defined precursor
charge(s).
Implementation of this method is optional since a default implementation
for MsBackend
is available.
filterPrecursorScan()
: retains parent (e.g. MS1) and children scans (e.g.
MS2) of acquisition number acquisitionNum
. Parameter f
is supposed to
define the origin of the spectra (i.e. the original data file) to ensure
related spectra from the same file/sample are selected and retained.
Implementation of this method is optional since a default implementation
for MsBackend
is available.
filterRanges()
: allows filtering of the Spectra
object based on user
defined numeric ranges (parameter ranges
) for one or more available
spectra variables in object (spectra variable names can be specified with
parameter spectraVariables
). Spectra for which the value of a spectra
variable is within it's defined range are retained. If multiple
ranges/spectra variables are defined, the match
parameter can be used
to specify whether all conditions (match = "all"
; the default) or if
any of the conditions must match (match = "any"
; all spectra for which
values are within any of the provided ranges are retained).
Implementation of this method is optional since a default implementation
for MsBackend
is available.
filterRt()
: retains spectra of MS level msLevel
with retention times
within (>=
) rt[1]
and (<=
) rt[2]
.
Implementation of this method is optional since a default implementation
for MsBackend
is available.
filterValues()
: allows filtering of the Spectra
object based on
similarities of numeric values of one or more spectraVariables(object)
(parameter spectraVariables
) to provided values (parameter values
)
given acceptable differences (parameters tolerance and ppm). If multiple
values/spectra variables are defined, the match
parameter can be used
to specify whether all conditions (match = "all"
; the default) or if
any of the conditions must match (match = "any"
; all spectra for which
values are within any of the provided ranges are retained).
Implementation of this method is optional since a default implementation
for MsBackend
is available.
intensity()
: gets the intensity values from the spectra. Returns
a IRanges::NumericList()
of numeric
vectors (intensity values for each
spectrum). The length of the list
is equal to the number of
spectra
in object
.
intensity<-
: replaces the intensity values. value
has to be a list
(or IRanges::NumericList()
) of length equal to the number of spectra
and the number of values within each list element identical to the
number of peaks in each spectrum (i.e. the lengths(x)
). Note that just
writeable backends support this method.
ionCount()
: returns a numeric
with the sum of intensities for
each spectrum. If the spectrum is empty (see isEmpty()
),
NA_real_
is returned.
isCentroided()
: a heuristic approach assessing if the spectra in
object
are in profile or centroided mode. The function takes
the qtl
th quantile top peaks, then calculates the difference
between adjacent m/z value and returns TRUE
if the first
quartile is greater than k
. (See Spectra:::.peaks_is_centroided
for
the code.)
isEmpty()
: checks whether a spectrum in object
is empty
(i.e. does not contain any peaks). Returns a logical
vector of
length equal number of spectra.
isolationWindowLowerMz()
, isolationWindowLowerMz<-
: gets or sets the
lower m/z boundary of the isolation window.
isolationWindowTargetMz()
, isolationWindowTargetMz<-
: gets or sets the
target m/z of the isolation window.
isolationWindowUpperMz()
, isolationWindowUpperMz<-
: gets or sets the
upper m/z boundary of the isolation window.
isReadOnly()
: returns a logical(1)
whether the backend is read
only or does allow also to write/update data.
length()
: returns the number of spectra in the object.
lengths()
: gets the number of peaks (m/z-intensity values) per
spectrum. Returns an integer
vector (length equal to the
number of spectra). For empty spectra, 0
is returned.
msLevel()
: gets the spectra's MS level. Returns an integer
vector (of length equal to the number of spectra) with the MS
level for each spectrum (or NA_integer_
if not available).
msLevel<-
: replaces the spectra's MS level.
mz()
: gets the mass-to-charge ratios (m/z) from the
spectra. Returns a IRanges::NumericList()
or length equal to the
number of spectra, each element a numeric
vector with the m/z values of
one spectrum.
mz<-
: replaces the m/z values. value
has to be a list
of length equal
to the number of spectra and the number of values within each list element
identical to the number of peaks in each spectrum (i.e. the
lengths(x)
). Note that just writeable backends support this method.
polarity()
, polarity<-
: gets or sets the polarity for each
spectrum. polarity()
returns an integer
vector (length equal
to the number of spectra), with 0
and 1
representing negative
and positive polarities, respectively. polarity<-
expects an
integer vector of length 1 or equal to the number of spectra.
precursorCharge()
, precursorIntensity()
, precursorMz()
,
precScanNum()
, precAcquisitionNum()
: get the charge (integer
),
intensity (numeric
), m/z (numeric
), scan index (integer
)
and acquisition number (interger
) of the precursor for MS level
2 and above spectra from the object. Returns a vector of length equal to
the number of spectra in object
. NA
are reported for MS1
spectra of if no precursor information is available.
peaksData()
returns a list
with the spectras' peak data, i.e. m/z and
intensity values or other peak variables. The length of the list is
equal to the number of spectra in object
. Each element of the list has
to be a two-dimensional array (matrix
or data.frame
)
with columns depending on the provided columns
parameter (by default
"mz"
and "intensity"
, but depends on the backend's available
peaksVariables
). For an empty spectrum, a matrix
(data.frame
) with
0 rows and columns according to columns
is returned. The optional
parameter columns
, if supported by the backend, allows to define which
peak variables should be returned in the numeric
peak matrix
. As a
default c("mz", "intensity")
should be used.
peaksData<-
replaces the peak data (m/z and intensity values) of the
backend. This method expects a list
of two dimensional arrays (matrix
or data.frame
) with columns representing the peak variables. All
existing peaks data is expected to be replaced with these new values. The
length of the list
has to match the number of spectra of object
.
Note that only writeable backends need to support this method.
peaksVariables()
: lists the available variables for mass peaks. Default
peak variables are "mz"
and "intensity"
(which all backends need to
support and provide), but some backends might provide additional variables.
All these variables are expected to be returned (if requested) by the
peaksData()
function.
reset()
a backend (if supported). This method will be called on the
backend by the reset,Spectra
method that is supposed to restore the data
to its original state (see reset,Spectra
for more details). The function
returns the reset backend. The default implementation for MsBackend
returns the backend as-is.
rtime()
, rtime<-
: gets or sets the retention times for each
spectrum (in seconds). rtime()
returns a numeric
vector (length equal
to the number of spectra) with the retention time for each spectrum.
rtime<-
expects a numeric vector with length equal to the
number of spectra.
scanIndex()
: returns an integer
vector with the scan index
for each spectrum. This represents the relative index of the
spectrum within each file. Note that this can be different to the
acquisitionNum()
of the spectrum which is the index of the
spectrum as reported in the mzML file.
selectSpectraVariables()
: reduces the information within the backend to
the selected spectra variables. It is suggested to not remove values
for the "dataStorage"
variable, since this might be required for some
backends to work properly (such as the MsBackendMzR
).
smoothed()
,smoothed<-
: gets or sets whether a spectrum is
smoothed. smoothed()
returns a logical
vector of length equal
to the number of spectra. smoothed<-
takes a logical
vector
of length 1 or equal to the number of spectra in object
.
spectraData()
, spectraData<-
: gets or sets general spectrum
metadata (annotation, also called header). spectraData()
returns
a DataFrame
, spectraData<-
expects a DataFrame
with the same number
of rows as there are spectra in object
. Note that spectraData()
has to
return the full data, i.e. also the m/z and intensity values (as a list
or SimpleList
in columns "mz"
and "intensity"
.
spectraNames()
: returns a character
vector with the names of
the spectra in object
or NULL
if not set. spectraNames<-
allows to
set spectra names (if the object is not read-only).
spectraVariables()
: returns a character
vector with the
available spectra variables (columns, fields or attributes)
available in object
. This should return all spectra variables which
are present in object
, also "mz"
and "intensity"
(which are by
default not returned by the spectraVariables,Spectra
method).
split()
: splits the backend into a list
of backends (depending on
parameter f
). The default method for MsBackend
uses split.default()
,
thus backends extending MsBackend
don't necessarily need to implement
this method.
supportsSetBackend()
: whether a MsBackend
supports the Spectra
setBackend()
function. For a MsBackend
to support setBackend()
it
needs to have a parameter called data
in its backendInitialize()
method
that support receiving all spectra data as a DataFrame
from another
backend and to initialize the backend with this data. In general
read-only backends do not support setBackend()
hence, the default
implementation of supportsSetBackend()
returns !isReadOnly(object)
. If
a read-only backend would support the setBackend()
and being initialized
with a DataFrame
an implementation of this method for that backend could
be defined that returns TRUE
(see also the MsBackend
vignette for
details and examples).
tic()
: gets the total ion current/count (sum of signal of a
spectrum) for all spectra in object
. By default, the value
reported in the original raw data file is returned. For an empty
spectrum, NA_real_
is returned.
uniqueMsLevels()
: gets the unique MS levels of all spectra in object
.
The default implementation calls unique(msLevel(object))
but more
efficient implementations could be defined for specific backends.
Backend classes must support (implement) the [
method to subset the object.
This method should only support subsetting by spectra (rows, i
) and has
to return a MsBackend
class.
Backends extending MsBackend
should also implement the backendMerge()
method to support combining backend instances (only backend classes of the
same type should be merged). Merging should follow the following rules:
The whole spectrum data of the various objects should be merged. The
resulting merged object should contain the union of the individual objects'
spectra variables (columns/fields), with eventually missing variables in
one object being filled with NA
.
MsBackendMemory
and MsBackendDataFrame
:
The MsBackendMemory
and MsBackendDataFrame
objects keep all MS data in
memory are thus ideal for fast data processing. Due to their large memory
footprint they are however not suited for large scale experiments. The two
backends store the data different. The MsBackendDataFrame
stores
all data in a DataFrame
and thus supports also S4-classes as
spectra variables. Also, sepratate access to m/z or intensity values (i.e.
using the mz()
and intensity()
methods) is faster for the
MsBackendDataFrame
. The MsBackendMemory
on the other hand, due to the
way the data is organized internally, provides much faster access to the
full peak data (i.e. the numerical matrices of m/z and intensity values).
Also subsetting and access to any spectra variable (except "mz"
and
"intensity"
) is fastest for the MsBackendMemory
.
Thus, for most use cases, the MsBackendMemory
provides a higher
performance and flexibility than the MsBackendDataFrame
and should thus be
preferred. See also issue
246 for a
performance comparison.
New objects can be created with the MsBackendMemory()
and
MsBackendDataFrame()
function, respectively. Both backends can be
subsequently initialized with the backendInitialize()
method, taking a
DataFrame
(or data.frame
) with the (full) MS data as first parameter
data
. The second parameter peaksVariables
allows to define which columns
in data
contain peak variables such as the m/z and intensity values of
individual peaks per spectrum. The default for this parameter is
peaksVariables = c("mz", "intensity")
. Note that it is not supported to
provide either "mz"
or "intensity"
, if provided, both need to be
present in the data frame. Alternatively, the function also supports a data
frame without m/z and intensity values, in which case a Spectra
without
mass peaks is created.
Suggested columns of this DataFrame
are:
"msLevel"
: integer
with MS levels of the spectra.
"rt"
: numeric
with retention times of the spectra.
"acquisitionNum"
: integer
with the acquisition number of the spectrum.
"scanIndex"
: integer
with the index of the scan/spectrum within the
mzML/mzXML/CDF file.
"dataOrigin"
: character
defining the data origin.
"dataStorage"
: character
indicating grouping of spectra in different
e.g. input files. Note that missing values are not supported.
"centroided"
: logical
whether the spectrum is centroided.
"smoothed"
: logical
whether the spectrum was smoothed.
"polarity"
: integer
with the polarity information of the spectra.
"precScanNum"
: integer
specifying the index of the (MS1) spectrum
containing the precursor of a (MS2) spectrum.
"precursorMz"
: numeric
with the m/z value of the precursor.
"precursorIntensity"
: numeric
with the intensity value of the
precursor.
"precursorCharge"
: integer
with the charge of the precursor.
"collisionEnergy"
: numeric
with the collision energy.
"mz"
: IRanges::NumericList()
of numeric
vectors representing the
m/z values for each spectrum.
"intensity"
: IRanges::NumericList()
of numeric
vectors
representing the intensity values for each spectrum.
Additional columns are allowed too.
The peaksData()
function for MsBackendMemory
and MsBackendDataFrame
returns a list
of numeric
matrix
by default (with parameter
columns = c("mz", "intensity")
). If other peak variables are requested,
a list
of data.frame
is returned (ensuring m/z and intensity values
are always numeric
).
MsBackendMzR
, on-disk MS data backendThe MsBackendMzR
keeps only a limited amount of data in memory,
while the spectra data (m/z and intensity values) are fetched from
the raw files on-demand. This backend uses the mzR
package for
data import and retrieval and hence requires that package to be
installed. Also, it can only be used to import and represent data
stored in mzML, mzXML and CDF files.
The MsBackendMzR
backend extends the MsBackendDataFrame
backend using
its DataFrame
to keep spectra variables (except m/z and intensity) in
memory.
New objects can be created with the MsBackendMzR()
function which
can be subsequently filled with data by calling backendInitialize()
passing the file names of the input data files with argument files
.
This backend provides an export()
method to export data from a Spectra
in
mzML or mzXML format. The definition of the function is:
export(object, x, file = tempfile(), format = c("mzML", "mzXML"), copy = FALSE)
The parameters are:
object
: an instance of the MsBackendMzR
class.
x
: the Spectra object to be exported.
file
: character
with the (full) output file name(s). Should be
of length 1 or equal length(x)
. If a single file is specified, all
spectra are exported to that file. Alternatively it is possible to specify
for each spectrum in x
the name of the file to which it should be
exported (and hence file
has to be of length equal length(x)
).
format
: character(1)
, either "mzML"
or "mzXML"
defining the output
file format.
copy
: logical(1)
whether general file information should be copied from
the original MS data files. This only works if x
uses a MsBackendMzR
backend and if dataOrigin(x)
contains the original MS data file names.
BPPARAM
: parallel processing settings.
See examples in Spectra or the vignette for more details and examples.
The MsBackendMzR
ignores parameter columns
of the peaksData()
function and returns always m/z and intensity values.
MsBackendHdf5Peaks
, on-disk MS data backendThe MsBackendHdf5Peaks
keeps, similar to the MsBackendMzR
, peak data
(i.e. m/z and intensity values) in custom data files (in HDF5 format) on
disk while the remaining spectra variables are kept in memory. This backend
supports updating and writing of manipulated peak data to the data files.
New objects can be created with the MsBackendHdf5Peaks()
function which
can be subsequently filled with data by calling the object's
backendInitialize()
method passing the desired file names of the HDF5 data
files along with the spectra variables in form of a DataFrame
(see
MsBackendDataFrame
for the expected format). An optional parameter
hdf5path
allows to specify the folder where the HDF5 data files should be
stored to. If provided, this is added as the path to the submitted file
names (parameter files
).
By default backendInitialize()
will store all peak data into a single HDF5
file which name has to be provided with the parameter files
. To store peak
data across several HDF5 files data
has to contain a column
"dataStorage"
that defines the grouping of spectra/peaks into files: peaks
for spectra with the same value in "dataStorage"
are saved into the same
HDF5 file. If parameter files
is omitted, the value in dataStorage
is
used as file name (replacing any file ending with ".h5"
. To specify the
file names, files
' length has to match the number of unique elements in
"dataStorage"
.
For details see examples on the Spectra()
help page.
The MsBackendHdf5Peaks
ignores parameter columns
of the peaksData()
function and returns always m/z and intensity values.
Johannes Rainer, Sebastian Gibb, Laurent Gatto, Philippine Louail
## The MsBackend class is a virtual class and can not be instantiated
## directly. Below we define a new backend class extending this virtual
## class
MsBackendDummy <- setClass("MsBackendDummy", contains = "MsBackend")
MsBackendDummy()
## This class inherits now all methods from `MsBackend`, all of which
## however throw an error. These methods would have to be implemented
## for the new backend class.
try(mz(MsBackendDummy()))
## See `MsBackendDataFrame` as a reference implementation for a backend
## class (in the *R/MsBackendDataFrame.R* file).
## MsBackendDataFrame
##
## The `MsBackendDataFrame` uses a `S4Vectors::DataFrame` to store all MS
## data. Below we create such a backend by passing a `DataFrame` with all
## data to it.
data <- DataFrame(msLevel = c(1L, 2L, 1L), scanIndex = 1:3)
data$mz <- list(c(1.1, 1.2, 1.3), c(1.4, 54.2, 56.4, 122.1), c(15.3, 23.2))
data$intensity <- list(c(3, 2, 3), c(45, 100, 12.2, 1), c(123, 12324.2))
## Backends are supposed to be created with their specific constructor
## function
be <- MsBackendDataFrame()
be
## The `backendInitialize()` method initializes the backend filling it with
## data. This method can take any parameters needed for the backend to
## get loaded with the data (e.g. a file name from which to load the data,
## a database connection or, in this case, a data frame containing the data).
be <- backendInitialize(be, data)
be
## Data can be accessed with the accessor methods
msLevel(be)
mz(be)
## Even if no data was provided for all spectra variables, its accessor
## methods are supposed to return a value.
precursorMz(be)
## The `peaksData()` method is supposed to return the peaks of the spectra as
## a `list`.
peaksData(be)
## List available peaks variables
peaksVariables(be)
## Use columns to extract specific peaks variables. Below we extract m/z and
## intensity values, but in reversed order to the default.
peaksData(be, columns = c("intensity", "mz"))
## List available spectra variables (i.e. spectrum metadata)
spectraVariables(be)
## Extract precursor m/z, rtime, MS level spectra variables
spectraData(be, c("precursorMz", "rtime", "msLevel"))
## MsBackendMemory
##
## The `MsBackendMemory` uses a more efficient internal data organization
## and allows also adding arbitrary additional peaks variables (annotations)
## Below we thus add a column "peak_ann" with arbitrary names/ids for each
## peak and add the name of this column to the `peaksVariables` parameter
## of the `backendInitialize()` method (in addition to `"mz"` and
## `"intensity"` that should **always** be specified.
data$peak_ann <- list(c("a", "", "d"), c("", "d", "e", "f"), c("h", "i"))
be <- backendInitialize(MsBackendMemory(), data,
peaksVariables = c("mz", "intensity", "peak_ann"))
be
spectraVariables(be)
## peak_ann is also listed as a peaks variable
peaksVariables(be)
## The additional peaks variable can be accessed using the `peaksData()`
## function
peaksData(be, "peak_ann")
## The $<- method can be used to replace values of an existing peaks
## variable. It is important that the number of elements matches the
## number of peaks per spectrum.
be$peak_ann <- list(1:3, 1:4, 1:2)
## A peaks variable can again be removed by setting it to NULL
be$peak_ann <- NULL
peaksVariables(be)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.