Spectra: The Spectra class to manage and access MS data

Description Usage Arguments Details Value Creation of objects, conversion, changing the backend and export Accessing spectra data Data subsetting, filtering and merging Data manipulation and analysis methods Author(s) Examples

Description

The Spectra class encapsules spectral mass spectrometry data and related metadata.

It supports multiple data backends, e.g. in-memory (MsBackendDataFrame()), on-disk as mzML (MsBackendMzR()) or HDF5 (MsBackendHdf5Peaks()).

Usage

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
applyProcessing(object, f = dataStorage(object), BPPARAM = bpparam(), ...)

concatenateSpectra(x, ...)

combineSpectra(
  x,
  f = x$dataStorage,
  p = x$dataStorage,
  FUN = combinePeaks,
  ...,
  BPPARAM = bpparam()
)

joinSpectraData(x, y, by.x = "spectrumId", by.y, suffix.y = ".y")

processingLog(x)

estimatePrecursorIntensity(
  x,
  ppm = 10,
  tolerance = 0,
  method = c("previous", "interpolation"),
  msLevel. = 2L,
  f = dataOrigin(x),
  BPPARAM = bpparam()
)

## S4 method for signature 'missing'
Spectra(
  object,
  processingQueue = list(),
  metadata = list(),
  ...,
  backend = MsBackendDataFrame(),
  BPPARAM = bpparam()
)

## S4 method for signature 'MsBackend'
Spectra(
  object,
  processingQueue = list(),
  metadata = list(),
  ...,
  BPPARAM = bpparam()
)

## S4 method for signature 'character'
Spectra(
  object,
  processingQueue = list(),
  metadata = list(),
  source = MsBackendMzR(),
  backend = source,
  ...,
  BPPARAM = bpparam()
)

## S4 method for signature 'ANY'
Spectra(
  object,
  processingQueue = list(),
  metadata = list(),
  source = MsBackendDataFrame(),
  backend = source,
  ...,
  BPPARAM = bpparam()
)

## S4 method for signature 'Spectra,MsBackend'
setBackend(object, backend, f = dataStorage(object), ..., BPPARAM = bpparam())

## S4 method for signature 'Spectra'
c(x, ...)

## S4 method for signature 'Spectra,ANY'
split(x, f, drop = FALSE, ...)

## S4 method for signature 'Spectra'
export(object, backend, ...)

## S4 method for signature 'Spectra'
acquisitionNum(object)

## S4 method for signature 'Spectra'
peaksData(object, ...)

## S4 method for signature 'Spectra'
centroided(object)

## S4 replacement method for signature 'Spectra'
centroided(object) <- value

## S4 method for signature 'Spectra'
collisionEnergy(object)

## S4 replacement method for signature 'Spectra'
collisionEnergy(object) <- value

## S4 method for signature 'Spectra'
dataOrigin(object)

## S4 replacement method for signature 'Spectra'
dataOrigin(object) <- value

## S4 method for signature 'Spectra'
dataStorage(object)

## S4 method for signature 'Spectra'
dropNaSpectraVariables(object)

## S4 method for signature 'Spectra'
intensity(object, ...)

## S4 method for signature 'Spectra'
ionCount(object)

## S4 method for signature 'Spectra'
isCentroided(object, ...)

## S4 method for signature 'Spectra'
isEmpty(x)

## S4 method for signature 'Spectra'
isolationWindowLowerMz(object)

## S4 replacement method for signature 'Spectra'
isolationWindowLowerMz(object) <- value

## S4 method for signature 'Spectra'
isolationWindowTargetMz(object)

## S4 replacement method for signature 'Spectra'
isolationWindowTargetMz(object) <- value

## S4 method for signature 'Spectra'
isolationWindowUpperMz(object)

## S4 replacement method for signature 'Spectra'
isolationWindowUpperMz(object) <- value

## S4 method for signature 'Spectra'
containsMz(
  object,
  mz = numeric(),
  tolerance = 0,
  ppm = 20,
  which = c("any", "all"),
  BPPARAM = bpparam()
)

## S4 method for signature 'Spectra'
containsNeutralLoss(
  object,
  neutralLoss = 0,
  tolerance = 0,
  ppm = 20,
  BPPARAM = bpparam()
)

## S4 method for signature 'Spectra'
spectrapply(
  object,
  FUN,
  f = as.factor(seq_along(object)),
  ...,
  BPPARAM = SerialParam()
)

## S4 method for signature 'Spectra'
length(x)

## S4 method for signature 'Spectra'
msLevel(object)

## S4 method for signature 'Spectra'
mz(object, ...)

## S4 method for signature 'Spectra'
lengths(x, use.names = FALSE)

## S4 method for signature 'Spectra'
polarity(object)

## S4 replacement method for signature 'Spectra'
polarity(object) <- value

## S4 method for signature 'Spectra'
precScanNum(object)

## S4 method for signature 'Spectra'
precursorCharge(object)

## S4 method for signature 'Spectra'
precursorIntensity(object)

## S4 method for signature 'Spectra'
precursorMz(object)

## S4 method for signature 'Spectra'
rtime(object)

## S4 replacement method for signature 'Spectra'
rtime(object) <- value

## S4 method for signature 'Spectra'
scanIndex(object)

## S4 method for signature 'Spectra'
selectSpectraVariables(object, spectraVariables = spectraVariables(object))

## S4 method for signature 'Spectra'
smoothed(object)

## S4 replacement method for signature 'Spectra'
smoothed(object) <- value

## S4 method for signature 'Spectra'
spectraData(object, columns = spectraVariables(object))

## S4 replacement method for signature 'Spectra'
spectraData(object) <- value

## S4 method for signature 'Spectra'
spectraNames(object)

## S4 replacement method for signature 'Spectra'
spectraNames(object) <- value

## S4 method for signature 'Spectra'
spectraVariables(object)

## S4 method for signature 'Spectra'
tic(object, initial = TRUE)

## S4 method for signature 'Spectra'
x$name

## S4 replacement method for signature 'Spectra'
x$name <- value

## S4 method for signature 'Spectra'
x[[i, j, ...]]

## S4 replacement method for signature 'Spectra'
x[[i, j, ...]] <- value

## S4 method for signature 'Spectra'
x[i, j, ..., drop = FALSE]

## S4 method for signature 'Spectra'
filterAcquisitionNum(
  object,
  n = integer(),
  dataStorage = character(),
  dataOrigin = character()
)

## S4 method for signature 'Spectra'
filterEmptySpectra(object)

## S4 method for signature 'Spectra'
filterDataOrigin(object, dataOrigin = character())

## S4 method for signature 'Spectra'
filterDataStorage(object, dataStorage = character())

## S4 method for signature 'Spectra'
filterIntensity(
  object,
  intensity = c(0, Inf),
  msLevel. = unique(msLevel(object)),
  ...
)

## S4 method for signature 'Spectra'
filterIsolationWindow(object, mz = numeric())

## S4 method for signature 'Spectra'
filterMsLevel(object, msLevel. = integer())

## S4 method for signature 'Spectra'
filterMzRange(object, mz = numeric(), msLevel. = unique(msLevel(object)))

## S4 method for signature 'Spectra'
filterMzValues(
  object,
  mz = numeric(),
  tolerance = 0,
  ppm = 20,
  msLevel. = unique(msLevel(object))
)

## S4 method for signature 'Spectra'
filterPolarity(object, polarity = integer())

## S4 method for signature 'Spectra'
filterPrecursorMz(object, mz = numeric())

## S4 method for signature 'Spectra'
filterPrecursorCharge(object, z = integer())

## S4 method for signature 'Spectra'
filterPrecursorScan(object, acquisitionNum = integer(), f = dataOrigin(object))

## S4 method for signature 'Spectra'
filterRt(object, rt = numeric(), msLevel. = unique(msLevel(object)))

## S4 method for signature 'Spectra'
reset(object, ...)

## S4 method for signature 'Spectra'
bin(x, binSize = 1L, breaks = NULL, msLevel. = unique(msLevel(x)))

## S4 method for signature 'Spectra,Spectra'
compareSpectra(
  x,
  y,
  MAPFUN = joinPeaks,
  tolerance = 0,
  ppm = 20,
  FUN = ndotproduct,
  ...,
  SIMPLIFY = TRUE
)

## S4 method for signature 'Spectra,missing'
compareSpectra(
  x,
  y = NULL,
  MAPFUN = joinPeaks,
  tolerance = 0,
  ppm = 20,
  FUN = ndotproduct,
  ...,
  SIMPLIFY = TRUE
)

## S4 method for signature 'Spectra'
pickPeaks(
  object,
  halfWindowSize = 2L,
  method = c("MAD", "SuperSmoother"),
  snr = 0,
  k = 0L,
  descending = FALSE,
  threshold = 0,
  msLevel. = unique(msLevel(object)),
  ...
)

## S4 method for signature 'Spectra'
replaceIntensitiesBelow(
  object,
  threshold = min,
  value = 0,
  msLevel. = unique(msLevel(object))
)

## S4 method for signature 'Spectra'
smooth(
  x,
  halfWindowSize = 2L,
  method = c("MovingAverage", "WeightedMovingAverage", "SavitzkyGolay"),
  msLevel. = unique(msLevel(x)),
  ...
)

## S4 method for signature 'Spectra'
addProcessing(object, FUN, ..., spectraVariables = character())

Arguments

object

For Spectra: either a DataFrame or missing. See section on creation of Spectra objects for details. For all other methods a Spectra object.

f

For split: factor defining how to split x. See base::split() for details. For setBackend: factor defining how to split the data for parallelized copying of the spectra data to the new backend. For some backends changing this parameter can lead to errors. For combineSpectra: factor defining the grouping of the spectra that should be combined. For spectrapply: factor how object should be splitted. For estimatePrecursorIntensity and filterPrecursorScan: defining which spectra belong to the same original data file (sample). Defaults to f = dataOrigin(x).

BPPARAM

Parallel setup configuration. See bpparam() for more information. This is passed directly to the backendInitialize() method of the MsBackend.

...

Additional arguments.

x

A Spectra object.

p

For combineSpectra: factor defining how to split the input Spectra for parallel processing. Defaults to x$dataStorage, i.e., depending on the used backend, per-file parallel processing will be performed.

FUN

For addProcessing: function to be applied to the peak matrix of each spectrum in object. For compareSpectra: function to compare intensities of peaks between two spectra with each other. For combineSpectra: function to combine the (peak matrices) of the spectra. See section Data manipulations and examples below for more details.

y

A Spectra object. A DataFrame for joinSpectraData().

by.x

A character(1) specifying the spectra variable used for merging. Default is "spectrumId".

by.y

A character(1) specifying the column used for merging. Set to by.x if missing.

suffix.y

A character(1) specifying the suffix to be used for making the names of columns in the merged spectra variables unique. This suffix will be used to amend names(y), while spectraVariables(x) will remain unchanged.

ppm

For compareSpectra, containsMz, filterMzValues: numeric(1) defining a relative, m/z-dependent, maximal accepted difference between m/z values for peaks to be matched.

tolerance

For compareSpectra, containsMz: numeric(1) allowing to define a constant maximal accepted difference between m/z values for peaks to be matched. For containsMz and filterMzValues it can also be of length equal mz to specify a different tolerance for each m/z value.

method
  • For pickPeaks: character(1), the noise estimators that should be used, currently the the Median Absolute Deviation (method = "MAD") and Friedman's Super Smoother (method = "SuperSmoother") are supported.

  • For smooth: character(1), the smoothing function that should be used, currently, the Moving-Average- (method = "MovingAverage"), Weighted-Moving-Average- (method = "WeightedMovingAverage"), Savitzky-Golay-Smoothing (method = "SavitzkyGolay") are supported.

  • For estimatePrecursorIntensity: character(1) defining whether the precursor intensity should be estimated on the previous MS1 spectrum (method = "previous", the default) or based on an interpolation on the previous and next MS1 spectrum (method = "interpolation").

msLevel.

integer defining the MS level(s) of the spectra to which the function should be applied (defaults to all MS levels of object. For filterMsLevel: the MS level to which object should be subsetted.

processingQueue

For Spectra: optional list of ProcessingStep objects.

metadata

For Spectra: optional list with metadata information.

backend

For Spectra: MsBackend to be used as backend. See section on creation of Spectra objects for details. For setBackend: instance of MsBackend. See section on creation of Spectra objects for details. For export: MsBackend to be used to export the data.

source

For Spectra: instance of MsBackend that can be used to import spectrum data from the provided files. See section Creation of objects, conversion and changing the backend for more details.

drop

For [, split: not considered.

value

replacement value for <- methods. See individual method description or expected data type.

mz

For filterIsolationWindow: numeric(1) with the m/z value to filter the object. For filterPrecursorMz and filterMzRange: numeric(2) defining the lower and upper m/z boundary. For filterMzValues: numeric with the m/z values to match peaks against.

which

for containsMz: either "any" or "all" defining whether any (the default) or all provided mz have to be present in the spectrum.

neutralLoss

for containsNeutralLoss: numeric(1) defining the value which should be subtracted from the spectrum's precursor m/z.

use.names

For lengths: ignored.

spectraVariables

For selectSpectraVariables: character with the names of the spectra variables to which the backend should be subsetted. For addProcessing: character with additional spectra variables that should be passed along to the function defined with FUN. See function description for details.

columns

For spectraData accessor: optional character with column names (spectra variables) that should be included in the returned DataFrame. By default, all columns are returned.

initial

For tic: logical(1) whether the initially reported total ion current should be reported, or whether the total ion current should be (re)calculated on the actual data (initial = FALSE, same as ionCount).

name

For $ and $<-: the name of the spectra variable to return or set.

i

For [: integer, logical or character to subset the object.

j

For [: not supported.

n

for filterAcquisitionNum: integer with the acquisition numbers to filter for.

dataStorage

For filterDataStorage: character to define which spectra to keep. For filterAcquisitionNum: optionally specify if filtering should occur only for spectra of selected dataStorage.

dataOrigin

For filterDataOrigin: character to define which spectra to keep. For filterAcquisitionNum: optionally specify if filtering should occurr only for spectra of selected dataOrigin.

intensity

For filterIntensity: numeric of length 1 or 2 defining either the lower or the lower and upper intensity limit for the filtering, or a function that takes the intensities as input and returns a logical (same length then peaks in the spectrum) whether the peak should be retained or not. Defaults to intensity = c(0, Inf) thus only peaks with NA intensity are removed.

polarity

for filterPolarity: integer specifying the polarity to to subset object.

z

For filterPrecursorCharge: integer() with the precursor charges to be used as filter.

acquisitionNum

for filterPrecursorScan: integer with the acquisition number of the spectra to which the object should be subsetted.

rt

for filterRt: numeric(2) defining the retention time range to be used to subset/filter object.

binSize

For bin: numeric(1) defining the size for the m/z bins. Defaults to binSize = 1.

breaks

For bin: numeric defining the m/z breakpoints between bins.

MAPFUN

For compareSpectra: function to map/match peaks between the two compared spectra. See joinPeaks() for more information and possible functions.

SIMPLIFY

For compareSpectra whether the result matrix should be simplified to a numeric if possible (i.e. if either x or y is of length 1).

halfWindowSize
  • For pickPeaks: integer(1), used in the identification of the mass peaks: a local maximum has to be the maximum in the window from (i - halfWindowSize):(i + halfWindowSize).

  • For smooth: integer(1), used in the smoothing algorithm, the window reaches from (i - halfWindowSize):(i + halfWindowSize).

snr

For pickPeaks: double(1) defining the Signal-to-Noise-Ratio. The intensity of a local maximum has to be higher than snr * noise to be considered as peak.

k

For pickPeaks: integer(1), number of values left and right of the peak that should be considered in the weighted mean calculation.

descending

For pickPeaks: logical, if TRUE just values between the nearest valleys around the peak centroids are used.

threshold
  • For pickPeaks: a double(1) defining the proportion of the maximal peak intensity. Just values above are used for the weighted mean calculation.

  • For replaceIntensitiesBelow: a numeric(1) defining the threshold or a function to calculate the threshold for each spectrum on its intensity values. Defaults to threshold = min.

Details

The Spectra class uses by default a lazy data manipulation strategy, i.e. data manipulations such as performed with replaceIntensitiesBelow are not applied immediately to the data, but applied on-the-fly to the spectrum data once it is retrieved. For some backends that allow to write data back to the data storage (such as the MsBackendDataFrame() and MsBackendHdf5Peaks()) it is possible to apply to queue with the applyProcessing function. See the Data manipulation and analysis methods section below for more details.

For details on plotting spectra, see plotSpectra().

Clarifications regarding scan/acquisition numbers and indices:

See also this issue.

Value

See individual method description for the return value.

Creation of objects, conversion, changing the backend and export

Spectra classes can be created with the Spectra constructor function which supports the following formats:

With ... additional arguments can be passed to the backend's backendInitialize() method. Parameter backend allows to specify which MsBackend should be used for data storage.

The backend of a Spectra object can be changed with the setBackend method that takes an instance of the new backend as second parameter backend. A call to setBackend(sps, backend = MsBackendDataFrame()) would for example change the backend or sps to the in-memory MsBackendDataFrame. Note that it is not possible to change the backend to a read-only backend (such as the MsBackendMzR() backend). setBackend changes the "dataOrigin" variable of the resulting Spectra object to the "dataStorage" variable of the backend before the switch.

The definition of the function is: setBackend(object, backend, ..., f = dataStorage(object), BPPARAM = bpparam()) and its parameters are:

Data from a Spectra object can be exported to a file with the export function. The actual export of the data has to be performed by the export method of the MsBackend class defined with the mandatory parameter backend. Note however that not all backend classes support export of data. From the MsBackend classes in the Spectra package currently only the MsBackendMzR backend supports data export (to mzML/mzXML file(s)); see the help page of the MsBackend for information on its arguments or the examples below or the vignette for examples.

The definition of the function is export(object, backend, ...) and its parameters are:

Accessing spectra data

Data subsetting, filtering and merging

Subsetting and filtering of Spectra objects can be performed with the below listed methods.

Several Spectra objects can be concatenated into a single object with the c or the concatenateSpectra function. Concatenation will fail if the processing queue of any of the Spectra objects is not empty or if different backends are used in the Spectra objects. The spectra variables of the resulting Spectra object is the union of the spectra variables of the individual Spectra objects.

Data manipulation and analysis methods

Many data manipulation operations, such as those listed in this section, are not applied immediately to the spectra, but added to a lazy processing/manipulation queue. Operations stored in this queue are applied on-the-fly to spectra data each time it is accessed. This lazy execution guarantees the same functionality for Spectra objects with any backend, i.e. backends supporting to save changes to spectrum data (MsBackendDataFrame() or MsBackendHdf5Peaks()) as well as read-only backends (such as the MsBackendMzR()). Note that for the former it is possible to apply the processing queue and write the modified peak data back to the data storage with the applyProcessing function.

Author(s)

Sebastian Gibb, Johannes Rainer, Laurent Gatto

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
## Create a Spectra providing a `DataFrame` containing the spectrum data.

spd <- DataFrame(msLevel = c(1L, 2L), rtime = c(1.1, 1.2))
spd$mz <- list(c(100, 103.2, 104.3, 106.5), c(45.6, 120.4, 190.2))
spd$intensity <- list(c(200, 400, 34.2, 17), c(12.3, 15.2, 6.8))

data <- Spectra(spd)
data

## Get the number of spectra
length(data)

## Get the number of peaks per spectrum
lengths(data)

## Create a Spectra from mzML files and use the `MsBackendMzR` on-disk
## backend.
sciex_file <- dir(system.file("sciex", package = "msdata"),
    full.names = TRUE)
sciex <- Spectra(sciex_file, backend = MsBackendMzR())
sciex

## The MS data is on disk and will be read into memory on-demand. We can
## however change the backend to a MsBackendDataFrame backend which will
## keep all of the data in memory.
sciex_im <- setBackend(sciex, MsBackendDataFrame())
sciex_im

## The on-disk object `sciex` is light-weight, because it does not keep the
## MS peak data in memory. The `sciex_im` object in contrast keeps all the
## data in memory and its size is thus much larger.
object.size(sciex)
object.size(sciex_im)

## The spectra variable `dataStorage` returns for each spectrum the location
## where the data is stored. For in-memory objects:
head(dataStorage(sciex_im))

## While objects that use an on-disk backend will list the files where the
## data is stored.
head(dataStorage(sciex))

## The spectra variable `dataOrigin` returns for each spectrum the *origin*
## of the data. If the data is read from e.g. mzML files, this will be the
## original mzML file name:
head(dataOrigin(sciex))
head(dataOrigin(sciex_im))

## ---- ACCESSING AND ADDING DATA ----

## Get the MS level for each spectrum.
msLevel(data)

## Alternatively, we could also use $ to access a specific spectra variable.
## This could also be used to add additional spectra variables to the
## object (see further below).
data$msLevel

## Get the intensity and m/z values.
intensity(data)
mz(data)

## Determine whether one of the spectra has a specific m/z value
containsMz(data, mz = 120.4)

## Accessing spectra variables works for all backends:
intensity(sciex)
intensity(sciex_im)

## Get the m/z for the first spectrum.
mz(data)[[1]]

## Get the peak data (m/z and intensity values).
pks <- peaksData(data)
pks
pks[[1]]
pks[[2]]

## Note that we could get the same resulb by coercing the `Spectra` to
## a `list` or `SimpleList`:
as(data, "list")
as(data, "SimpleList")

## List all available spectra variables (i.e. spectrum data and metadata).
spectraVariables(data)

## For all *core* spectrum variables accessor functions are available. These
## return NA if the variable was not set.
centroided(data)
dataStorage(data)
rtime(data)
precursorMz(data)

## Add an additional metadata column.
data$spectrum_id <- c("sp_1", "sp_2")

## List spectra variables, "spectrum_id" is now also listed
spectraVariables(data)

## Get the values for the new spectra variable
data$spectrum_id

## Extract specific spectra variables.
spectraData(data, columns = c("spectrum_id", "msLevel"))

## Drop spectra variable data and/or columns.
res <- selectSpectraVariables(data, c("mz", "intensity"))

## This removed the additional columns "spectrum_id" and deleted all values
## for all spectra variables, except "mz" and "intensity".
spectraData(res)

## Compared to the data before selectSpectraVariables.
spectraData(data)


## ---- SUBSETTING, FILTERING AND COMBINING

## Subset to all MS2 spectra.
data[msLevel(data) == 2]

## Same with the filterMsLevel function
filterMsLevel(data, 2)

## Below we combine the `data` and `sciex_im` objects into a single one.
data_comb <- c(data, sciex_im)

## The combined Spectra contains a union of all spectra variables:
head(data_comb$spectrum_id)
head(data_comb$rtime)
head(data_comb$dataStorage)
head(data_comb$dataOrigin)

## Filter a Spectra for a target precursor m/z with a tolerance of 10ppm
spd$precursorMz <- c(323.4, 543.2302)
data_filt <- Spectra(spd)
filterPrecursorMz(data_filt, mz = 543.23 + ppm(c(-543.23, 543.23), 10))

## Filter a Spectra keeping only peaks matching certain m/z values
sps_sub <- filterMzValues(data, mz = c(103, 104), tolerance = 0.3)
mz(sps_sub)

## Filter a Spectra keeping only peaks within a m/z range
sps_sub <- filterMzRange(data, mz = c(100, 300))
mz(sps_sub)

## Remove empty spectra variables
sciex_noNA <- dropNaSpectraVariables(sciex)

## Available spectra variables before and after dropNaSpectraVariables
spectraVariables(sciex)
spectraVariables(sciex_noNA)


## Adding new spectra variables
spv <- DataFrame(spectrumId = sciex$spectrumId[3:12], ## used for merging
                 var1 = rnorm(10),
                 var2 = sample(letters, 10))
spv

sciex2 <- joinSpectraData(sciex, spv, by.y = "spectrumId")

spectraVariables(sciex2)
spectraData(sciex2)[1:13, c("spectrumId", "var1", "var2")]


## ---- DATA MANIPULATIONS AND OTHER OPERATIONS ----

## Set the data to be centroided
centroided(data) <- TRUE

## Replace peak intensities below 40 with 3.
res <- replaceIntensitiesBelow(data, threshold = 40, value = 3)
res

## Get the intensities of the first and second spectrum.
intensity(res)[[1]]
intensity(res)[[2]]

## Remove all peaks with an intensity below 40.
res <- filterIntensity(res, intensity = c(40, Inf))

## Get the intensities of the first and second spectrum.
intensity(res)[[1]]
intensity(res)[[2]]

## Lengths of spectra is now different
lengths(mz(res))
lengths(mz(data))

## In addition it is possible to pass a function to `filterIntensity`: in
## the example below we want to keep only peaks that have an intensity which
## is larger than one third of the maximal peak intensity in that spectrum.
keep_peaks <- function(x, prop = 3) {
    x > max(x, na.rm = TRUE) / prop
}
res2 <- filterIntensity(data, intensity = keep_peaks)
intensity(res2)[[1L]]
intensity(data)[[1L]]

## We can also change the proportion by simply passing the `prop` parameter
## to the function. To keep only peaks that have an intensity which is
## larger than half of the maximum intensity:
res2 <- filterIntensity(data, intensity = keep_peaks, prop = 2)
intensity(res2)[[1L]]
intensity(data)[[1L]]

## Since data manipulation operations are by default not directly applied to
## the data but only added to the internal lazy evaluation queue, it is also
## possible to remove these data manipulations with the `reset` function:
res_rest <- reset(res)
res_rest
lengths(mz(res_rest))
lengths(mz(res))
lengths(mz(data))

## `reset` after a `applyProcessing` can not restore the data, because the
## data in the backend was changed. Similarly, `reset` after any filter
## operations can not restore data for a `Spectra` with a
## `MsBackendDataFrame`.
res_2 <- applyProcessing(res)
res_rest <- reset(res_2)
lengths(mz(res))
lengths(mz(res_rest))


## Compare spectra: comparing spectra 2 and 3 against spectra 10:20 using
## the normalized dotproduct method.
res <- compareSpectra(sciex_im[2:3], sciex_im[10:20])
## first row contains comparisons of spectrum 2 with spectra 10 to 20 and
## the second row comparisons of spectrum 3 with spectra 10 to 20
res

## To use a simple Pearson correlation instead we can define a function
## that takes the two peak matrices and calculates the correlation for
## their second columns (containing the intensity values).
correlateSpectra <- function(x, y, use = "pairwise.complete.obs", ...) {
    cor(x[, 2], y[, 2], use = use)
}
res <- compareSpectra(sciex_im[2:3], sciex_im[10:20],
    FUN = correlateSpectra)
res

## Use compareSpectra to determine the number of common (matching) peaks
## with a ppm of 10:
## type = "inner" uses a *inner join* to match peaks, i.e. keeps only
## peaks that can be mapped betwen both spectra. The provided FUN returns
## simply the number of matching peaks.
compareSpectra(sciex_im[2:3], sciex_im[10:20], ppm = 10, type = "inner",
    FUN = function(x, y, ...) nrow(x))

## Apply an arbitrary function to each spectrum in a Spectra.
## In the example below we calculate the mean intensity for each spectrum
## in a subset of the sciex_im data. Note that we can access all variables
## of each individual spectrum either with the `$` operator or the
## corresponding method.
res <- spectrapply(sciex_im[1:20], FUN = function(x) mean(x$intensity[[1]]))
head(res)

## It is however important to note that dedicated methods to access the
## data (such as `intensity`) are much more efficient than using `lapply`:
res <- lapply(intensity(sciex_im[1:20]), mean)
head(res)

## Calculating the precursor intensity for MS2 spectra:
##
## Some MS instrument manufacturer don't report the precursor intensities
## for MS2 spectra. The `estimatePrecursorIntensity` function can be used
## in these cases to calculate the precursor intensity on MS1 data. Below
## we load an mzML file from a vendor providing precursor intensities and
## compare the estimated and reported precursor intensities.
tmt <- Spectra(msdata::proteomics(full.names = TRUE)[5],
    backend = MsBackendMzR())
pmi <- estimatePrecursorIntensity(tmt)
plot(pmi, precursorIntensity(tmt))

## We can also replace the original precursor intensity values with the
## newly calculated ones
tmt$precursorIntensity <- pmi

## ---- DATA EXPORT ----

## Some `MsBackend` classes provide an `export` method to export the data to
## the file format supported by the backend. The `MsBackendMzR` for example
## allows to export MS data to mzML or mzXML file(s), the `MsBackendMgf`
## (defined in the MsBackendMgf R package) would allow to export the data
## in mgf file format. Below we export the MS data in `data`. We
## call the `export` method on this object, specify the backend that should
## be used to export the data (and which also defines the output format) and
## provide a file name.
fl <- tempfile()
export(data, MsBackendMzR(), file = fl)

## This exported our data in mzML format. Below we read the first 6 lines
## from that file.
readLines(fl, n = 6)

## If only a single file name is provided, all spectra are exported to that
## file. To export data with the `MsBackendMzR` backend to different files, a
## file name for each individual spectrum has to be provided.
## Below we export each spectrum to its own file.
fls <- c(tempfile(), tempfile())
export(data, MsBackendMzR(), file = fls)

## Reading the data from the first file
res <- Spectra(backendInitialize(MsBackendMzR(), fls[1]))

mz(res)
mz(data)

lgatto/Spectra documentation built on July 4, 2021, 5:53 p.m.