Description Usage Arguments Details Value Creation of objects, conversion, changing the backend and export Accessing spectra data Data subsetting, filtering and merging Data manipulation and analysis methods Author(s) Examples
The Spectra
class encapsules spectral mass spectrometry data and
related metadata.
It supports multiple data backends, e.g. in-memory (MsBackendDataFrame()
),
on-disk as mzML (MsBackendMzR()
) or HDF5 (MsBackendHdf5Peaks()
).
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 | applyProcessing(object, f = dataStorage(object), BPPARAM = bpparam(), ...)
concatenateSpectra(x, ...)
combineSpectra(
x,
f = x$dataStorage,
p = x$dataStorage,
FUN = combinePeaks,
...,
BPPARAM = bpparam()
)
joinSpectraData(x, y, by.x = "spectrumId", by.y, suffix.y = ".y")
processingLog(x)
estimatePrecursorIntensity(
x,
ppm = 10,
tolerance = 0,
method = c("previous", "interpolation"),
msLevel. = 2L,
f = dataOrigin(x),
BPPARAM = bpparam()
)
## S4 method for signature 'missing'
Spectra(
object,
processingQueue = list(),
metadata = list(),
...,
backend = MsBackendDataFrame(),
BPPARAM = bpparam()
)
## S4 method for signature 'MsBackend'
Spectra(
object,
processingQueue = list(),
metadata = list(),
...,
BPPARAM = bpparam()
)
## S4 method for signature 'character'
Spectra(
object,
processingQueue = list(),
metadata = list(),
source = MsBackendMzR(),
backend = source,
...,
BPPARAM = bpparam()
)
## S4 method for signature 'ANY'
Spectra(
object,
processingQueue = list(),
metadata = list(),
source = MsBackendDataFrame(),
backend = source,
...,
BPPARAM = bpparam()
)
## S4 method for signature 'Spectra,MsBackend'
setBackend(object, backend, f = dataStorage(object), ..., BPPARAM = bpparam())
## S4 method for signature 'Spectra'
c(x, ...)
## S4 method for signature 'Spectra,ANY'
split(x, f, drop = FALSE, ...)
## S4 method for signature 'Spectra'
export(object, backend, ...)
## S4 method for signature 'Spectra'
acquisitionNum(object)
## S4 method for signature 'Spectra'
peaksData(object, ...)
## S4 method for signature 'Spectra'
centroided(object)
## S4 replacement method for signature 'Spectra'
centroided(object) <- value
## S4 method for signature 'Spectra'
collisionEnergy(object)
## S4 replacement method for signature 'Spectra'
collisionEnergy(object) <- value
## S4 method for signature 'Spectra'
dataOrigin(object)
## S4 replacement method for signature 'Spectra'
dataOrigin(object) <- value
## S4 method for signature 'Spectra'
dataStorage(object)
## S4 method for signature 'Spectra'
dropNaSpectraVariables(object)
## S4 method for signature 'Spectra'
intensity(object, ...)
## S4 method for signature 'Spectra'
ionCount(object)
## S4 method for signature 'Spectra'
isCentroided(object, ...)
## S4 method for signature 'Spectra'
isEmpty(x)
## S4 method for signature 'Spectra'
isolationWindowLowerMz(object)
## S4 replacement method for signature 'Spectra'
isolationWindowLowerMz(object) <- value
## S4 method for signature 'Spectra'
isolationWindowTargetMz(object)
## S4 replacement method for signature 'Spectra'
isolationWindowTargetMz(object) <- value
## S4 method for signature 'Spectra'
isolationWindowUpperMz(object)
## S4 replacement method for signature 'Spectra'
isolationWindowUpperMz(object) <- value
## S4 method for signature 'Spectra'
containsMz(
object,
mz = numeric(),
tolerance = 0,
ppm = 20,
which = c("any", "all"),
BPPARAM = bpparam()
)
## S4 method for signature 'Spectra'
containsNeutralLoss(
object,
neutralLoss = 0,
tolerance = 0,
ppm = 20,
BPPARAM = bpparam()
)
## S4 method for signature 'Spectra'
spectrapply(
object,
FUN,
f = as.factor(seq_along(object)),
...,
BPPARAM = SerialParam()
)
## S4 method for signature 'Spectra'
length(x)
## S4 method for signature 'Spectra'
msLevel(object)
## S4 method for signature 'Spectra'
mz(object, ...)
## S4 method for signature 'Spectra'
lengths(x, use.names = FALSE)
## S4 method for signature 'Spectra'
polarity(object)
## S4 replacement method for signature 'Spectra'
polarity(object) <- value
## S4 method for signature 'Spectra'
precScanNum(object)
## S4 method for signature 'Spectra'
precursorCharge(object)
## S4 method for signature 'Spectra'
precursorIntensity(object)
## S4 method for signature 'Spectra'
precursorMz(object)
## S4 method for signature 'Spectra'
rtime(object)
## S4 replacement method for signature 'Spectra'
rtime(object) <- value
## S4 method for signature 'Spectra'
scanIndex(object)
## S4 method for signature 'Spectra'
selectSpectraVariables(object, spectraVariables = spectraVariables(object))
## S4 method for signature 'Spectra'
smoothed(object)
## S4 replacement method for signature 'Spectra'
smoothed(object) <- value
## S4 method for signature 'Spectra'
spectraData(object, columns = spectraVariables(object))
## S4 replacement method for signature 'Spectra'
spectraData(object) <- value
## S4 method for signature 'Spectra'
spectraNames(object)
## S4 replacement method for signature 'Spectra'
spectraNames(object) <- value
## S4 method for signature 'Spectra'
spectraVariables(object)
## S4 method for signature 'Spectra'
tic(object, initial = TRUE)
## S4 method for signature 'Spectra'
x$name
## S4 replacement method for signature 'Spectra'
x$name <- value
## S4 method for signature 'Spectra'
x[[i, j, ...]]
## S4 replacement method for signature 'Spectra'
x[[i, j, ...]] <- value
## S4 method for signature 'Spectra'
x[i, j, ..., drop = FALSE]
## S4 method for signature 'Spectra'
filterAcquisitionNum(
object,
n = integer(),
dataStorage = character(),
dataOrigin = character()
)
## S4 method for signature 'Spectra'
filterEmptySpectra(object)
## S4 method for signature 'Spectra'
filterDataOrigin(object, dataOrigin = character())
## S4 method for signature 'Spectra'
filterDataStorage(object, dataStorage = character())
## S4 method for signature 'Spectra'
filterIntensity(
object,
intensity = c(0, Inf),
msLevel. = unique(msLevel(object)),
...
)
## S4 method for signature 'Spectra'
filterIsolationWindow(object, mz = numeric())
## S4 method for signature 'Spectra'
filterMsLevel(object, msLevel. = integer())
## S4 method for signature 'Spectra'
filterMzRange(object, mz = numeric(), msLevel. = unique(msLevel(object)))
## S4 method for signature 'Spectra'
filterMzValues(
object,
mz = numeric(),
tolerance = 0,
ppm = 20,
msLevel. = unique(msLevel(object))
)
## S4 method for signature 'Spectra'
filterPolarity(object, polarity = integer())
## S4 method for signature 'Spectra'
filterPrecursorMz(object, mz = numeric())
## S4 method for signature 'Spectra'
filterPrecursorCharge(object, z = integer())
## S4 method for signature 'Spectra'
filterPrecursorScan(object, acquisitionNum = integer(), f = dataOrigin(object))
## S4 method for signature 'Spectra'
filterRt(object, rt = numeric(), msLevel. = unique(msLevel(object)))
## S4 method for signature 'Spectra'
reset(object, ...)
## S4 method for signature 'Spectra'
bin(x, binSize = 1L, breaks = NULL, msLevel. = unique(msLevel(x)))
## S4 method for signature 'Spectra,Spectra'
compareSpectra(
x,
y,
MAPFUN = joinPeaks,
tolerance = 0,
ppm = 20,
FUN = ndotproduct,
...,
SIMPLIFY = TRUE
)
## S4 method for signature 'Spectra,missing'
compareSpectra(
x,
y = NULL,
MAPFUN = joinPeaks,
tolerance = 0,
ppm = 20,
FUN = ndotproduct,
...,
SIMPLIFY = TRUE
)
## S4 method for signature 'Spectra'
pickPeaks(
object,
halfWindowSize = 2L,
method = c("MAD", "SuperSmoother"),
snr = 0,
k = 0L,
descending = FALSE,
threshold = 0,
msLevel. = unique(msLevel(object)),
...
)
## S4 method for signature 'Spectra'
replaceIntensitiesBelow(
object,
threshold = min,
value = 0,
msLevel. = unique(msLevel(object))
)
## S4 method for signature 'Spectra'
smooth(
x,
halfWindowSize = 2L,
method = c("MovingAverage", "WeightedMovingAverage", "SavitzkyGolay"),
msLevel. = unique(msLevel(x)),
...
)
## S4 method for signature 'Spectra'
addProcessing(object, FUN, ..., spectraVariables = character())
|
object |
For |
f |
For |
BPPARAM |
Parallel setup configuration. See |
... |
Additional arguments. |
x |
A |
p |
For |
FUN |
For |
y |
A |
by.x |
A |
by.y |
A |
suffix.y |
A |
ppm |
For |
tolerance |
For |
method |
|
msLevel. |
|
processingQueue |
For |
metadata |
For |
backend |
For |
source |
For |
drop |
For |
value |
replacement value for |
mz |
For |
which |
for |
neutralLoss |
for |
use.names |
For |
spectraVariables |
For |
columns |
For |
initial |
For |
name |
For |
i |
For |
j |
For |
n |
for |
dataStorage |
For |
dataOrigin |
For |
intensity |
For |
polarity |
for |
z |
For |
acquisitionNum |
for |
rt |
for |
binSize |
For |
breaks |
For |
MAPFUN |
For |
SIMPLIFY |
For |
halfWindowSize |
|
snr |
For |
k |
For |
descending |
For |
threshold |
|
The Spectra
class uses by default a lazy data manipulation strategy,
i.e. data manipulations such as performed with replaceIntensitiesBelow
are
not applied immediately to the data, but applied on-the-fly to the spectrum
data once it is retrieved. For some backends that allow to write data back
to the data storage (such as the MsBackendDataFrame()
and
MsBackendHdf5Peaks()
) it is possible to apply to queue with the
applyProcessing
function. See the Data manipulation and analysis
methods section below for more details.
For details on plotting spectra, see plotSpectra()
.
Clarifications regarding scan/acquisition numbers and indices:
A spectrumId
(or spectrumID
) is a vendor specific field in
the mzML file that contains some information about the
run/spectrum, e.g.: controllerType=0 controllerNumber=1 scan=5281 file=2
acquisitionNum
is a more a less sanitize spectrum id generated
from the spectrumId
field by mzR
(see
here).
scanIndex
is the mzR
generated sequence number of the
spectrum in the raw file (which doesn't have to be the same as
the acquisitionNum
)
See also this issue.
See individual method description for the return value.
Spectra
classes can be created with the Spectra
constructor function
which supports the following formats:
parameter object
is a DataFrame
containing the spectrum data. The
provided backend
(by default a MsBackendDataFrame) will be
initialized with that data.
parameter object
is a MsBackend (assumed to be already
initialized).
parameter object
is missing, in which case it is supposed that the data
is provided by the MsBackend class passed along with the backend
argument.
parameter object
is of type character
and is expected to be the file
names(s) from which spectra should be imported. Parameter source
allows
to define a MsBackend that is able to import the data from the
provided source files. The default value for source
is MsBackendMzR()
which allows to import spectra data from mzML, mzXML or CDF files.
With ...
additional arguments can be passed to the backend's
backendInitialize()
method. Parameter backend
allows to specify which
MsBackend should be used for data storage.
The backend of a Spectra
object can be changed with the setBackend
method that takes an instance of the new backend as second parameter
backend
. A call to setBackend(sps, backend = MsBackendDataFrame())
would
for example change the backend or sps
to the in-memory
MsBackendDataFrame
. Note that it is not possible to change the backend
to a read-only backend (such as the MsBackendMzR()
backend). setBackend
changes the "dataOrigin"
variable of the resulting Spectra
object to the
"dataStorage"
variable of the backend before the switch.
The definition of the function is:
setBackend(object, backend, ..., f = dataStorage(object), BPPARAM = bpparam())
and its parameters are:
parameter object
: the Spectra
object.
parameter backend
: an instance of the new backend, e.g.
MsBackendDataFrame()
.
parameter f
: factor allowing to parallelize the change of the backends.
By default the process of copying the spectra data from the original to the
new backend is performed separately (and in parallel) for each file. Users
are advised to use the default setting.
parameter ...
: optional additional arguments passed to the
backendInitialize()
method of the new backend
.
parameter BPPARAM
: setup for the parallel processing. See bpparam()
for
details.
Data from a Spectra
object can be exported to a file with the export
function. The actual export of the data has to be performed by the export
method of the MsBackend class defined with the mandatory parameter
backend
. Note however that not all backend classes support export of data.
From the MsBackend
classes in the Spectra
package currently only the
MsBackendMzR
backend supports data export (to mzML/mzXML file(s));
see the help page of the MsBackend for information on its arguments
or the examples below or the vignette for examples.
The definition of the function is
export(object, backend, ...)
and its
parameters are:
object
: the Spectra
object to be exported.
backend
: instance of a class extending MsBackend which supports export
of the data (i.e. which has a defined export
method).
...
: additional parameters specific for the MsBackend
passed with
parameter backend
.
$
, $<-
: gets (or sets) a spectra variable for all spectra in object
.
See examples for details.
[[
, [[<-
: access or set/add a single spectrum variable (column) in the
backend.
acquisitionNum
: returns the acquisition number of each
spectrum. Returns an integer
of length equal to the number of
spectra (with NA_integer_
if not available).
peaksData
: gets the peaks matrices for all spectra in object
. The
function returns a SimpleList()
of matrices, each matrix
with columns
"mz"
and "intensity"
with the m/z and intensity values for all peaks of
a spectrum. Note that it is also possible to extract the peaks matrices
with as(x, "list")
and as(x, "SimpleList")
as a list
and
SimpleList
, respectively.
centroided
, centroided<-
: gets or sets the centroiding
information of the spectra. centroided
returns a logical
vector of length equal to the number of spectra with TRUE
if a
spectrum is centroided, FALSE
if it is in profile mode and NA
if it is undefined. See also isCentroided
for estimating from
the spectrum data whether the spectrum is centroided. value
for centroided<-
is either a single logical
or a logical
of
length equal to the number of spectra in object
.
collisionEnergy
, collisionEnergy<-
: gets or sets the
collision energy for all spectra in object
. collisionEnergy
returns a numeric
with length equal to the number of spectra
(NA_real_
if not present/defined), collisionEnergy<-
takes a
numeric
of length equal to the number of spectra in object
.
dataOrigin
, dataOrigin<-
: gets or sets the data origin for each
spectrum. dataOrigin
returns a character
vector (same length than
object
) with the origin of the spectra. dataOrigin<-
expects a
character
vector (same length than object
) with the replacement
values for the data origin of each spectrum.
dataStorage
: returns a character
vector (same length than object
)
with the data storage location of each spectrum.
intensity
: gets the intensity values from the spectra. Returns
a NumericList()
of numeric
vectors (intensity values for each
spectrum). The length of the list is equal to the number of
spectra
in object
.
ionCount
: returns a numeric
with the sum of intensities for
each spectrum. If the spectrum is empty (see isEmpty
),
NA_real_
is returned.
isCentroided
: a heuristic approach assessing if the spectra in
object
are in profile or centroided mode. The function takes
the qtl
th quantile top peaks, then calculates the difference
between adjacent m/z value and returns TRUE
if the first
quartile is greater than k
. (See Spectra:::.isCentroided
for
the code.)
isEmpty
: checks whether a spectrum in object
is empty
(i.e. does not contain any peaks). Returns a logical
vector of
length equal number of spectra.
isolationWindowLowerMz
, isolationWindowLowerMz<-
: gets or sets the
lower m/z boundary of the isolation window.
isolationWindowTargetMz
, isolationWindowTargetMz<-
: gets or sets the
target m/z of the isolation window.
isolationWindowUpperMz
, isolationWindowUpperMz<-
: gets or sets the
upper m/z boundary of the isolation window.
containsMz
: checks for each of the spectra whether they contain mass
peaks with an m/z equal to mz
(given acceptable difference as defined by
parameters tolerance
and ppm
- see common()
for details). Parameter
which
allows to define whether any (which = "any"
, the default) or
all (which = "all"
) of the mz
have to match. The function returns
NA
if mz
is of length 0 or is NA
.
containsNeutralLoss
: checks for each spectrum in object
if it has a
peak with an m/z value equal to its precursor m/z - neutralLoss
(given
acceptable difference as defined by parameters tolerance
and ppm
).
Returns NA
for MS1 spectra (or spectra without a precursor m/z).
length
: gets the number of spectra in the object.
lengths
: gets the number of peaks (m/z-intensity values) per
spectrum. Returns an integer
vector (length equal to the
number of spectra). For empty spectra, 0
is returned.
msLevel
: gets the spectra's MS level. Returns an integer vector (names
being spectrum names, length equal to the number of spectra) with the MS
level for each spectrum.
mz
: gets the mass-to-charge ratios (m/z) from the
spectra. Returns a NumericList()
or length equal to the number of
spectra, each element a numeric
vector with the m/z values of
one spectrum.
polarity
, polarity<-
: gets or sets the polarity for each
spectrum. polarity
returns an integer
vector (length equal
to the number of spectra), with 0
and 1
representing negative
and positive polarities, respectively. polarity<-
expects an
integer
vector of length 1 or equal to the number of spectra.
precursorCharge
, precursorIntensity
, precursorMz
,
precScanNum
, precAcquisitionNum
: gets the charge (integer
),
intensity (numeric
), m/z (numeric
), scan index (integer
)
and acquisition number (interger
) of the precursor for MS level >
2 spectra from the object. Returns a vector of length equal to
the number of spectra in object
. NA
are reported for MS1
spectra of if no precursor information is available.
rtime
, rtime<-
: gets or sets the retention times (in seconds)
for each spectrum. rtime
returns a numeric
vector (length
equal to the number of spectra) with the retention time for each
spectrum. rtime<-
expects a numeric vector with length equal
to the number of spectra.
scanIndex
: returns an integer
vector with the scan index
for each spectrum. This represents the relative index of the
spectrum within each file. Note that this can be different to the
acquisitionNum
of the spectrum which represents the index of the
spectrum during acquisition/measurement (as reported in the mzML file).
smoothed
,smoothed<-
: gets or sets whether a spectrum is
smoothed. smoothed
returns a logical
vector of length equal
to the number of spectra. smoothed<-
takes a logical
vector
of length 1 or equal to the number of spectra in object
.
spectraData
: gets general spectrum metadata (annotation, also called
header). spectraData
returns a DataFrame
. Note that this
method does by default not return m/z or intensity values.
spectraData<-
: replaces the full spectra data of the Spectra
object with the one provided with value
. The use of this function is
disencouraged, as replacing spectra data with values that are in a
different can break the linkage with the associated m/z and intensity
values. If possible, spectra variables (i.e. columns of the Spectra
)
should be replaced individually. The spectraData<-
function expects a
DataFrame
to be passed as value.
spectraNames
, spectraNames<-
: gets or sets the spectra names.
spectraVariables
: returns a character
vector with the
available spectra variables (columns, fields or attributes)
available in object
.
tic
: gets the total ion current/count (sum of signal of a
spectrum) for all spectra in object
. By default, the value
reported in the original raw data file is returned. For an empty
spectrum, 0
is returned.
Subsetting and filtering of Spectra
objects can be performed with the below
listed methods.
[
: subsets the spectra keeping only selected elements (i
). The method
always returns a Spectra
object.
dropNaSpectraVariables
: removes spectra variables (i.e. columns in the
object's spectraData
that contain only missing values (NA
). Note that
while columns with only NA
s are removed, a spectraData
call after
dropNaSpectraVariables
might still show columns containing NA
values
for core spectra variables.
filterAcquisitionNum
: filters the object keeping only spectra matching
the provided acquisition numbers (argument n
). If dataOrigin
or
dataStorage
is also provided, object
is subsetted to the spectra with
an acquisition number equal to n
in spectra with matching dataOrigin
or dataStorage values retaining all other spectra.
Returns the filtered Spectra
.
filterDataOrigin
: filters the object retaining spectra matching the
provided dataOrigin
. Parameter dataOrigin
has to be of type
character
and needs to match exactly the data origin value of the
spectra to subset.
Returns the filtered Spectra
object (with spectra ordered according to
the provided dataOrigin
parameter).
filterDataStorage
: filters the object retaining spectra stored in the
specified dataStorage
. Parameter dataStorage
has to be of type
character
and needs to match exactly the data storage value of the
spectra to subset.
Returns the filtered Spectra
object (with spectra ordered according to
the provided dataStorage
parameter).
filterEmptySpectra
: removes empty spectra (i.e. spectra without peaks).
Returns the filtered Spectra
object (with spectra in their
original order).
filterIsolationWindow
: retains spectra that contain mz
in their
isolation window m/z range (i.e. with an isolationWindowLowerMz
<= mz
and isolationWindowUpperMz
>= mz
. Returns the filtered Spectra
object (with spectra in their original order).
filterMsLevel
: filters object by MS level keeping only spectra matching
the MS level specified with argument msLevel
. Returns the filtered
Spectra
(with spectra in their original order).
filterMzRange
: filters the object keeping only peaks in each spectrum
that are within the provided m/z range.
filterMzValues
: filters the object keeping only peaks in each spectrum
that match the provided m/z value(s) considering also the absolute
tolerance
and m/z-relative ppm
(tolerance
and ppm
can be either
of length 1 or equal to the length of mz
to define a different tolerance
for each m/z).
filterPolarity
: filters the object keeping only spectra matching the
provided polarity. Returns the filtered Spectra
(with spectra in their
original order).
filterPrecursorMz
: retains spectra with a precursor m/z within the
provided m/z range. See examples for details on selecting spectra with
a precursor m/z for a target m/z accepting a small difference in ppm.
filterPrecursorCharge
: retains spectra with the defined precursor
charge(s).
filterPrecursorScan
: retains parent (e.g. MS1) and children scans (e.g.
MS2) of acquisition number acquisitionNum
. Returns the filtered
Spectra
(with spectra in their original order). Parameter f
allows to
define which spectra belong to the same sample or original data file (
defaults to f = dataOrigin(object)
).
filterRt
: retains spectra of MS level msLevel
with retention
times (in seconds) within (>=
) rt[1]
and (<=
)
rt[2]
. Returns the filtered Spectra
(with spectra in their
original order).
reset
: restores the data to its original state (as much as possible):
removes any processing steps from the lazy processing queue and calls
reset
on the backend which, depending on the backend, can also undo e.g.
data filtering operations. Note that a reset
call after applyProcessing
will not have any effect. See examples below for more information.
selectSpectraVariables
: reduces the information within the object to
the selected spectra variables: all data for variables not specified will
be dropped. For mandatory columns (such as msLevel, rtime ...) only
the values will be dropped, while additional (user defined) spectra
variables will be completely removed. Returns the filtered Spectra
.
split
: splits the Spectra
object based on parameter f
into a list
of Spectra
objects.
joinSpectraData
: Individual spectra variables can be directly
added with the $<-
or [[<-
syntax. The joinSpectraData()
function allows to merge a DataFrame
to the existing spectra
data. This function diverges from the merge()
method in two
main ways:
The by.x
and by.y
column names must be of length 1.
If variable names are shared in x
and y
, the spectra
variables of x
are not modified. It's only the y
variables that are appended the suffix defined in
suffix.y
. This is to avoid modifying any core spectra
variables that would lead to an invalid object.
Several Spectra
objects can be concatenated into a single object with the
c
or the concatenateSpectra
function. Concatenation will fail if the
processing queue of any of the Spectra
objects is not empty or if
different backends are used in the Spectra
objects. The spectra variables
of the resulting Spectra
object is the union of the spectra variables of
the individual Spectra
objects.
Many data manipulation operations, such as those listed in this section, are
not applied immediately to the spectra, but added to a
lazy processing/manipulation queue. Operations stored in this queue are
applied on-the-fly to spectra data each time it is accessed. This lazy
execution guarantees the same functionality for Spectra
objects with
any backend, i.e. backends supporting to save changes to spectrum data
(MsBackendDataFrame()
or MsBackendHdf5Peaks()
) as well as read-only
backends (such as the MsBackendMzR()
). Note that for the former it is
possible to apply the processing queue and write the modified peak data back
to the data storage with the applyProcessing
function.
addProcessing
: adds an arbitrary function that should be applied to the
peaks matrix of every spectrum in object
. The function (can be passed
with parameter FUN
) is expected to take a peaks matrix as input and to
return a peaks matrix. A peaks matrix is a numeric matrix with two columns,
the first containing the m/z values of the peaks and the second the
corresponding intensities. The function has to have ...
in its
definition. Additional arguments can be passed with ...
. With parameter
spectraVariables
it is possible to define additional spectra variables
from object
that should be passed to the function FUN
. These will be
passed by their name (e.g. specifying spectraVariables = "precursorMz"
will pass the spectra's precursor m/z as a parameter named precursorMz
to the function. The only exception is the spectra's MS level, these will
be passed to the function as a parameter called spectrumMsLevel
(i.e.
with spectraVariables = "msLevel"
the MS levels of each spectrum will be
submitted to the function as a parameter called spectrumMsLevel
).
Examples are provided in the package vignette.
applyProcessing
: for Spectra
objects that use a writeable backend
only: apply all steps from the lazy processing queue to the peak data and
write it back to the data storage. Parameter f
allows to specify how
object
should be split for parallel processing. This should either be
equal to the dataStorage
, or f = rep(1, length(object))
to disable
parallel processing alltogether. Other partitionings might result in
errors (especially if a MsBackendHdf5Peaks
backend is used).
bin
: aggregates individual spectra into discrete (m/z) bins. All
intensity values for peaks falling into the same bin are summed.
combineSpectra
: combine sets of spectra into a single spectrum per set.
For each spectrum group (set), spectra variables from the first spectrum
are used and the peak matrices are combined using the function specified
with FUN
, which defaults to combinePeaks()
. The sets of spectra can be
specified with parameter f
.
In addition it is possible to define, with parameter p
if and how to
split the input data for parallel processing.
This defaults to p = x$dataStorage
and hence a per-file parallel
processing is applied for Spectra
with file-based backends (such as the
MsBackendMzR()
).
Prior combination of the spectra all processings queued in the lazy
evaluation queue are applied. Be aware that calling combineSpectra
on a
Spectra
object with certain backends that allow modifications might
overwrite the original data. This does not happen with a
MsBackendDataFrame
backend, but with a MsBackendHdf5Peaks
backend the
m/z and intensity values in the original hdf5 file(s) will be overwritten.
The function returns a Spectra
of length equal to the unique levels
of f
.
compareSpectra
: compare each spectrum in x
with each spectrum in y
using the function provided with FUN
(defaults to ndotproduct()
). If
y
is missing, each spectrum in x
is compared with each other spectrum
in x
.
The matching/mapping of peaks between the compared spectra is done with the
MAPFUN
function. The default joinPeaks()
matches peaks of both spectra
and allows to keep all peaks from the first spectrum (type = "left"
),
from the second (type = "right"
), from both (type = "outer"
) and to
keep only matching peaks (type = "inner"
); see joinPeaks()
for more
information and examples). The MAPFUN
function should have parameters
x
, y
, xPrecursorMz
and yPrecursorMz
as these values are passed to
the function.
FUN
is supposed to be a function to compare intensities of (matched)
peaks of the two spectra that are compared. The function needs to take two
matrices with columns "mz"
and "intensity"
as input and is supposed
to return a single numeric as result. In addition to the two peak matrices
the spectra's precursor m/z values are passed to the function as parameters
xPrecursorMz
(precursor m/z of the x
peak matrix) and yPrecursorMz
(precursor m/z of the y
peak matrix). Additional parameters to functions
FUN
and MAPFUN
can be passed with ...
.
The function returns a matrix
with the results of FUN
for each
comparison, number of rows equal to length(x)
and number of columns
equal length(y)
(i.e. element in row 2 and column 3 is the result from
the comparison of x[2]
with y[3]
). If SIMPLIFY = TRUE
the matrix
is simplified to a numeric
if length of x
or y
is one.
estimatePrecursorIntensity
: define the precursor intensities for MS2
spectra using the intensity of the matching MS1 peak from the
closest MS1 spectrum (i.e. the last MS1 spectrum measured before the
respective MS2 spectrum). With method = "interpolation"
it is also
possible to calculate the precursor intensity based on an interpolation of
intensity values (and retention times) of the matching MS1 peaks from the
previous and next MS1 spectrum. See below for an example.
filterIntensity
: filters each spectrum keeping only peaks with
intensities that are within the provided range or match the criteria of
the provided function. For the former, parameter intensity
has to be a
numeric
defining the intensity range, for the latter a function
that
takes the intensity values of the spectrum and returns a logical
whether
the peak should be retained or not (see examples below for details) -
additional parameters to the function can be passed with ...
. To
remove only peaks with intensities below a certain threshold, say 100, use
intensity = c(100, Inf)
. Note: also a single value can be passed with
the intensity
parameter in which case an upper limit of Inf
is used.
Note that this function removes also peaks with missing intensities
(i.e. an intensity of NA
). Parameter msLevel.
allows to restrict the
filtering to spectra of the specified MS level(s).
processingLog
: returns a character
vector with the processing log
messages.
spectrapply
: apply a given function to each spectrum in a Spectra
object. The Spectra
is splitted into individual spectra and on each of
them (i.e. Spectra
of length 1) the function FUN
is applied. Additional
parameters to FUN
can be passed with the ...
argument. Parameter
BPPARAM
allows to enable parallel processing, which however makes only
sense if FUN
is computational intense. spectrapply
returns a list
(same length than object
) with the result from FUN
. See examples for
more details.
Note that the result and its order depends on the factor f
used for
splitting object
with split
, i.e. no re-ordering or unsplit
is
performed on the result.
smooth
: smooth individual spectra using a moving window-based approach
(window size = 2 * halfWindowSize
). Currently, the
Moving-Average- (method = "MovingAverage"
),
Weighted-Moving-Average- (method = "WeightedMovingAverage")
,
weights depending on the distance of the center and calculated
1/2^(-halfWindowSize:halfWindowSize)
) and
Savitzky-Golay-Smoothing (method = "SavitzkyGolay"
) are supported.
For details how to choose the correct halfWindowSize
please see
MsCoreUtils::smooth()
.
pickPeaks
: picks peaks on individual spectra using a moving window-based
approach (window size = 2 * halfWindowSize
). For noisy spectra there
are currently two different noise estimators available,
the Median Absolute Deviation (method = "MAD"
) and
Friedman's Super Smoother (method = "SuperSmoother"
),
as implemented in the MsCoreUtils::noise()
.
The method supports also to optionally refine the m/z value of
the identified centroids by considering data points that belong (most
likely) to the same mass peak. Therefore the m/z value is calculated as an
intensity weighted average of the m/z values within the peak region.
The peak region is defined as the m/z values (and their respective
intensities) of the 2 * k
closest signals to the centroid or the closest
valleys (descending = TRUE
) in the 2 * k
region. For the latter the k
has to be chosen general larger. See MsCoreUtils::refineCentroids()
for
details.
If the ratio of the signal to the highest intensity of the peak is below
threshold
it will be ignored for the weighted average.
replaceIntensitiesBelow
: replaces intensities below a specified
threshold with the provided value
. Parameter threshold
can be either
a single numeric value or a function which is applied to all non-NA
intensities of each spectrum to determine a threshold value for each
spectrum. The default is threshold = min
which replaces all values
which are <= the minimum intensity in a spectrum with value
(the
default for value
is 0
). Note that the function specified with
threshold
is expected to have a parameter na.rm
since na.rm = TRUE
will be passed to the function. If the spectrum is in profile mode,
ranges of successive non-0 peaks <= threshold
are set to 0.
Parameter msLevel.
allows to apply this to only spectra of certain MS
level(s).
Sebastian Gibb, Johannes Rainer, Laurent Gatto
| ## Create a Spectra providing a `DataFrame` containing the spectrum data.
spd <- DataFrame(msLevel = c(1L, 2L), rtime = c(1.1, 1.2))
spd$mz <- list(c(100, 103.2, 104.3, 106.5), c(45.6, 120.4, 190.2))
spd$intensity <- list(c(200, 400, 34.2, 17), c(12.3, 15.2, 6.8))
data <- Spectra(spd)
data
## Get the number of spectra
length(data)
## Get the number of peaks per spectrum
lengths(data)
## Create a Spectra from mzML files and use the `MsBackendMzR` on-disk
## backend.
sciex_file <- dir(system.file("sciex", package = "msdata"),
full.names = TRUE)
sciex <- Spectra(sciex_file, backend = MsBackendMzR())
sciex
## The MS data is on disk and will be read into memory on-demand. We can
## however change the backend to a MsBackendDataFrame backend which will
## keep all of the data in memory.
sciex_im <- setBackend(sciex, MsBackendDataFrame())
sciex_im
## The on-disk object `sciex` is light-weight, because it does not keep the
## MS peak data in memory. The `sciex_im` object in contrast keeps all the
## data in memory and its size is thus much larger.
object.size(sciex)
object.size(sciex_im)
## The spectra variable `dataStorage` returns for each spectrum the location
## where the data is stored. For in-memory objects:
head(dataStorage(sciex_im))
## While objects that use an on-disk backend will list the files where the
## data is stored.
head(dataStorage(sciex))
## The spectra variable `dataOrigin` returns for each spectrum the *origin*
## of the data. If the data is read from e.g. mzML files, this will be the
## original mzML file name:
head(dataOrigin(sciex))
head(dataOrigin(sciex_im))
## ---- ACCESSING AND ADDING DATA ----
## Get the MS level for each spectrum.
msLevel(data)
## Alternatively, we could also use $ to access a specific spectra variable.
## This could also be used to add additional spectra variables to the
## object (see further below).
data$msLevel
## Get the intensity and m/z values.
intensity(data)
mz(data)
## Determine whether one of the spectra has a specific m/z value
containsMz(data, mz = 120.4)
## Accessing spectra variables works for all backends:
intensity(sciex)
intensity(sciex_im)
## Get the m/z for the first spectrum.
mz(data)[[1]]
## Get the peak data (m/z and intensity values).
pks <- peaksData(data)
pks
pks[[1]]
pks[[2]]
## Note that we could get the same resulb by coercing the `Spectra` to
## a `list` or `SimpleList`:
as(data, "list")
as(data, "SimpleList")
## List all available spectra variables (i.e. spectrum data and metadata).
spectraVariables(data)
## For all *core* spectrum variables accessor functions are available. These
## return NA if the variable was not set.
centroided(data)
dataStorage(data)
rtime(data)
precursorMz(data)
## Add an additional metadata column.
data$spectrum_id <- c("sp_1", "sp_2")
## List spectra variables, "spectrum_id" is now also listed
spectraVariables(data)
## Get the values for the new spectra variable
data$spectrum_id
## Extract specific spectra variables.
spectraData(data, columns = c("spectrum_id", "msLevel"))
## Drop spectra variable data and/or columns.
res <- selectSpectraVariables(data, c("mz", "intensity"))
## This removed the additional columns "spectrum_id" and deleted all values
## for all spectra variables, except "mz" and "intensity".
spectraData(res)
## Compared to the data before selectSpectraVariables.
spectraData(data)
## ---- SUBSETTING, FILTERING AND COMBINING
## Subset to all MS2 spectra.
data[msLevel(data) == 2]
## Same with the filterMsLevel function
filterMsLevel(data, 2)
## Below we combine the `data` and `sciex_im` objects into a single one.
data_comb <- c(data, sciex_im)
## The combined Spectra contains a union of all spectra variables:
head(data_comb$spectrum_id)
head(data_comb$rtime)
head(data_comb$dataStorage)
head(data_comb$dataOrigin)
## Filter a Spectra for a target precursor m/z with a tolerance of 10ppm
spd$precursorMz <- c(323.4, 543.2302)
data_filt <- Spectra(spd)
filterPrecursorMz(data_filt, mz = 543.23 + ppm(c(-543.23, 543.23), 10))
## Filter a Spectra keeping only peaks matching certain m/z values
sps_sub <- filterMzValues(data, mz = c(103, 104), tolerance = 0.3)
mz(sps_sub)
## Filter a Spectra keeping only peaks within a m/z range
sps_sub <- filterMzRange(data, mz = c(100, 300))
mz(sps_sub)
## Remove empty spectra variables
sciex_noNA <- dropNaSpectraVariables(sciex)
## Available spectra variables before and after dropNaSpectraVariables
spectraVariables(sciex)
spectraVariables(sciex_noNA)
## Adding new spectra variables
spv <- DataFrame(spectrumId = sciex$spectrumId[3:12], ## used for merging
var1 = rnorm(10),
var2 = sample(letters, 10))
spv
sciex2 <- joinSpectraData(sciex, spv, by.y = "spectrumId")
spectraVariables(sciex2)
spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")]
## ---- DATA MANIPULATIONS AND OTHER OPERATIONS ----
## Set the data to be centroided
centroided(data) <- TRUE
## Replace peak intensities below 40 with 3.
res <- replaceIntensitiesBelow(data, threshold = 40, value = 3)
res
## Get the intensities of the first and second spectrum.
intensity(res)[[1]]
intensity(res)[[2]]
## Remove all peaks with an intensity below 40.
res <- filterIntensity(res, intensity = c(40, Inf))
## Get the intensities of the first and second spectrum.
intensity(res)[[1]]
intensity(res)[[2]]
## Lengths of spectra is now different
lengths(mz(res))
lengths(mz(data))
## In addition it is possible to pass a function to `filterIntensity`: in
## the example below we want to keep only peaks that have an intensity which
## is larger than one third of the maximal peak intensity in that spectrum.
keep_peaks <- function(x, prop = 3) {
x > max(x, na.rm = TRUE) / prop
}
res2 <- filterIntensity(data, intensity = keep_peaks)
intensity(res2)[[1L]]
intensity(data)[[1L]]
## We can also change the proportion by simply passing the `prop` parameter
## to the function. To keep only peaks that have an intensity which is
## larger than half of the maximum intensity:
res2 <- filterIntensity(data, intensity = keep_peaks, prop = 2)
intensity(res2)[[1L]]
intensity(data)[[1L]]
## Since data manipulation operations are by default not directly applied to
## the data but only added to the internal lazy evaluation queue, it is also
## possible to remove these data manipulations with the `reset` function:
res_rest <- reset(res)
res_rest
lengths(mz(res_rest))
lengths(mz(res))
lengths(mz(data))
## `reset` after a `applyProcessing` can not restore the data, because the
## data in the backend was changed. Similarly, `reset` after any filter
## operations can not restore data for a `Spectra` with a
## `MsBackendDataFrame`.
res_2 <- applyProcessing(res)
res_rest <- reset(res_2)
lengths(mz(res))
lengths(mz(res_rest))
## Compare spectra: comparing spectra 2 and 3 against spectra 10:20 using
## the normalized dotproduct method.
res <- compareSpectra(sciex_im[2:3], sciex_im[10:20])
## first row contains comparisons of spectrum 2 with spectra 10 to 20 and
## the second row comparisons of spectrum 3 with spectra 10 to 20
res
## To use a simple Pearson correlation instead we can define a function
## that takes the two peak matrices and calculates the correlation for
## their second columns (containing the intensity values).
correlateSpectra <- function(x, y, use = "pairwise.complete.obs", ...) {
cor(x[, 2], y[, 2], use = use)
}
res <- compareSpectra(sciex_im[2:3], sciex_im[10:20],
FUN = correlateSpectra)
res
## Use compareSpectra to determine the number of common (matching) peaks
## with a ppm of 10:
## type = "inner" uses a *inner join* to match peaks, i.e. keeps only
## peaks that can be mapped betwen both spectra. The provided FUN returns
## simply the number of matching peaks.
compareSpectra(sciex_im[2:3], sciex_im[10:20], ppm = 10, type = "inner",
FUN = function(x, y, ...) nrow(x))
## Apply an arbitrary function to each spectrum in a Spectra.
## In the example below we calculate the mean intensity for each spectrum
## in a subset of the sciex_im data. Note that we can access all variables
## of each individual spectrum either with the `$` operator or the
## corresponding method.
res <- spectrapply(sciex_im[1:20], FUN = function(x) mean(x$intensity[[1]]))
head(res)
## It is however important to note that dedicated methods to access the
## data (such as `intensity`) are much more efficient than using `lapply`:
res <- lapply(intensity(sciex_im[1:20]), mean)
head(res)
## Calculating the precursor intensity for MS2 spectra:
##
## Some MS instrument manufacturer don't report the precursor intensities
## for MS2 spectra. The `estimatePrecursorIntensity` function can be used
## in these cases to calculate the precursor intensity on MS1 data. Below
## we load an mzML file from a vendor providing precursor intensities and
## compare the estimated and reported precursor intensities.
tmt <- Spectra(msdata::proteomics(full.names = TRUE)[5],
backend = MsBackendMzR())
pmi <- estimatePrecursorIntensity(tmt)
plot(pmi, precursorIntensity(tmt))
## We can also replace the original precursor intensity values with the
## newly calculated ones
tmt$precursorIntensity <- pmi
## ---- DATA EXPORT ----
## Some `MsBackend` classes provide an `export` method to export the data to
## the file format supported by the backend. The `MsBackendMzR` for example
## allows to export MS data to mzML or mzXML file(s), the `MsBackendMgf`
## (defined in the MsBackendMgf R package) would allow to export the data
## in mgf file format. Below we export the MS data in `data`. We
## call the `export` method on this object, specify the backend that should
## be used to export the data (and which also defines the output format) and
## provide a file name.
fl <- tempfile()
export(data, MsBackendMzR(), file = fl)
## This exported our data in mzML format. Below we read the first 6 lines
## from that file.
readLines(fl, n = 6)
## If only a single file name is provided, all spectra are exported to that
## file. To export data with the `MsBackendMzR` backend to different files, a
## file name for each individual spectrum has to be provided.
## Below we export each spectrum to its own file.
fls <- c(tempfile(), tempfile())
export(data, MsBackendMzR(), file = fls)
## Reading the data from the first file
res <- Spectra(backendInitialize(MsBackendMzR(), fls[1]))
mz(res)
mz(data)
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.