################################################################################
# TODO: Register identification object with specific methods
# A unique ID defined for the id class is merged with either peaks or
# links - this id is carried around and included in exports and can be
# used to query the id object at a later time.
# Create empty MsList when criteria is not met.
#
# Copy method for MsData
#
#' @include MsConnections.R
#' @include tableFormats.R
#' @include generics.R
#' @include peakMethods.R
#' @include aaa.R
#'
NULL
#' Main class for interacting with MS data
#'
#' This class is the main entry point for interacting with MS data in the MSsary
#' package. It is designed to seamlessly integrate data from raw data files and
#' additional results from various algorithms. Furthermore it provides a mean to
#' commit persistent changes to the raw data (e.g. filtering or recalibration),
#' that will be available across sessions in a non-destructive manner.
#'
#' @details
#' Behind the scene an SQLite-based file is created that handles all deviations
#' and additions to the raw data. The file is named after the raw data file but
#' has a .sary file extension to illustrate its relationship to this package.
#' Because it is just an SQLite database file the data generated with MSsary can
#' easily be accessed from other programs.
#'
#' @section Constructors:
#' Objects of class MsData can be created either from scratch with
#' \code{\link{createMsData}} or from an already created sary-file
#' \code{\link{loadMsData}}.
#'
#' @slot connections An MsConnections reference class object
#'
#' @family MSsary-classes
#'
#' @seealso \code{\link{createMsData}} \code{\link{loadMsData}}
#'
setClass(
'MsData',
slots=list(
connections='MsConnections'
)
)
### METHODS
#' @describeIn MsData Short summary of content
#'
#' @param object An MsData object
#'
setMethod(
'show', 'MsData',
function(object) {
cat('An MsData object\n')
cat('\n')
cat(' Raw file:', basename(con(object)$rawFile), '\n')
cat(' Sary file:', basename(con(object)$saryFile), '\n')
cat('\n')
cat(' Number of scans:', length(object), '\n')
nPeaks <- con(object)$nPeaks()
if(nPeaks != 0) {
cat(' Number of peaks:', nPeaks, '\n')
}
cat('\n')
cat('Last edit history:\n')
cat('\n')
print(tail(editHistory(object)[,1:3]))
}
)
#' @describeIn MsData Number of scans in the raw data
#'
#' @param x An MsData object
#'
setMethod(
'length', 'MsData',
function(x) {
con(x)$length()
}
)
setMethod(
'==', c('MsData', 'MsData'),
function(e1, e2) {
con(e1) == con(e2)
}
)
setMethod(
'!=', c('MsData', 'MsData'),
function(e1, e2) {
!(e1 == e2)
}
)
#' @describeIn con Get connection from MsData object
#'
setMethod(
'con', 'MsData',
function(object) {
return(object@connections)
}
)
#' @describeIn MsData Get the edit history of the object
#'
setMethod(
'editHistory', 'MsData',
function(object) {
dbGetQuery(con(object)$sary(), 'SELECT * FROM history')
}
)
#' Extract scans from an MsData object
#'
#' This method returns one or several scans based on a range of selection
#' criteria. See the arguments section for a full list.
#'
setMethod(
'scans', 'MsData',
function(object, ..., raw=FALSE) {
acqNum <- unlist(getAcqNum(con(object), ..., raw=raw))
if(length(acqNum) == 0) {
return(new('MsScanList'))
}
scInfo <- con(object)$getHeader(acqNum, raw=raw)
scData <- con(object)$getScans(acqNum, raw=raw)
mapping <- getListMapping(scData, 1, raw=as.integer(raw))
mapping <- cbind(mapping, matrix(isCentroided(scData), dimnames = list(NULL, 'mode')))
scData <- do.call(rbind, scData)
colnames(scData) <- c('mz', 'intensity')
new('MsScanList', connections=list(object), info=scInfo, data=scData, mapping=mapping)
}
)
#' Extract chromatograms from an MsData object
#'
#' This function is used to get chromatographic data from an MsData object. If
#' nothing besides the object is passed the full chromatogram is extracted, but
#' this can be altered by passing in scan, mz and retention time constraints.
#' The returned MsScanList object contains both TIC and BPC so there is no need
#' to specify this during creation
#'
setMethod(
'chroms', 'MsData',
function(object, ..., mz, raw=FALSE) {
args <- list(...)
acqNum <- getContAcqNum(con(object), ..., raw=raw)
if(length(unlist(acqNum)) == 0) {
return(new('MsChromList'))
}
msLevels <- toFilter(args$msLevel)$data
msLevels <- rep(msLevels, length.out=length(acqNum))
if(missing(mz)) {
scanNums <- sort(unique(do.call('c', acqNum)))
data <- con(object)$getHeader(scanNums, raw=raw)[, c('acquisitionNum', 'retentionTime', 'totIonCurrent', 'basePeakIntensity')]
data <- lapply(acqNum, function(x) {data[data$acquisitionNum %in% x,]})
} else {
if(!rangeFilter(mz)) {
stop('mz must specifiy an interval: Use either \'BETWEEN\', \'ABOVE\' or \'BELOW\'')
}
mzFunc <- toFunction(mz)
if(length(acqNum) > length(mzFunc)) {
mzFunc <- mzFunc[rep(1:length(mzFunc), length.out=length(acqNum))]
} else if(length(acqNum) < length(mzFunc)) {
acqNum <- rep(acqNum, length.out=length(mzFunc))
}
data <- con(object)$extractIC(acqNum, mzFunc, raw=raw)
}
msLevels <- toFilter(args$msLevel)$data
msLevels <- rep(msLevels, length.out=length(data))
info <- lapply(1:length(data), function(i) {
data.frame(
msLevel=msLevels[i],
nScan=nrow(data[[i]]),
maxTIC=max(data[[i]]$totIonCurrent),
maxBPC=max(data[[i]]$basePeakIntensity),
minRT=min(data[[i]]$retentionTime),
maxRT=max(data[[i]]$retentionTime)
)
})
info <- do.call(rbind, info)
if(!missing(mz)) {
mzRange <- toRange(mz)
mzRange <- mzRange[rep(1:nrow(mzRange), length.out=nrow(info)), , drop=FALSE]
colnames(mzRange) <- c('minMZ', 'maxMZ')
info <- cbind(info, mzRange)
}
mapping <- getListMapping(data, 1, raw=as.integer(raw))
data <- as.matrix(do.call(rbind, data))
cNames <- c('acquisitionNum', 'retentionTime', 'TIC', 'BPC')
colnames(data) <- cNames
new('MsChromList', connections=list(object), info=info, data=data, mapping=mapping)
}
)
#' Extract ions from an MsData object
#'
setMethod(
'ions', 'MsData',
function(object, ..., mz, raw=FALSE) {
args <- list(...)
acqNum <- getContAcqNum(con(object), ..., raw=raw)
if(length(unlist(acqNum)) == 0) {
return(new('MsIonList'))
}
msLevels <- toFilter(args$msLevel)$data
msLevels <- rep(msLevels, length.out=length(acqNum))
if(missing(mz)) {
mzFunc <- NULL
} else {
if(!rangeFilter(mz)) {
stop('mz must specifiy an interval: Use either \'BETWEEN\', \'ABOVE\' or \'BELOW\'')
}
mzFunc <- toFunction(mz)
if(length(acqNum) > length(mzFunc)) {
mzFunc <- mzFunc[rep(1:length(mzFunc), length.out=length(acqNum))]
} else if(length(acqNum) < length(mzFunc)) {
acqNum <- rep(acqNum, length.out=length(mzFunc))
}
}
data <- con(object)$extractIons(acqNum, mzFunc, raw=raw)
info <- mapply(function(x, msLevel) {
data.frame(
msLevel=msLevel,
minRT=min(x[, 'retentionTime']),
maxRT=max(x[, 'retentionTime']),
minMZ=min(x[, 'mz']),
maxMZ=max(x[, 'mz']),
minINT=min(x[, 'intensity']),
maxINT=max(x[, 'intensity'])
)
}, x=data, msLevel=msLevels, SIMPLIFY=FALSE)
info <- do.call(rbind, info)
mapping <- getListMapping(data, 1, raw=as.integer(raw))
data <- do.call(rbind, data)
new('MsIonList', connections=list(object), info=info, data=data, mapping=mapping)
}
)
#' Extrack peaks from an MsData object
#'
setMethod(
'peaks', 'MsData',
function(object, ...) {
ids <- unlist(getPeakIds(con(object), ...))
if(length(ids) == 0) {
return(new('MsPeakList'))
}
info <- con(object)$getPeaks(ids)
data <- lapply(info$peak, matrix, ncol=2, byrow=TRUE)
info$peak <- NULL
mapping <- getListMapping(data, 1)
data <- do.call(rbind, data)
colnames(data) <- c('retentionTime', 'intensity')
new('MsPeakList', connections=list(object), info=info, data=data, mapping=mapping)
}
)
#' Scan preprocessing in MsData objects
#'
setMethod(
'prepareScans', 'MsData',
function(object, method, ...) {
arguments <- list(...)
dataSubset <- !(names(arguments) %in% prepareMethods$methodArgs(method))
acqNum <- unlist(do.call(getAcqNum, c(con=con(object), arguments[dataSubset])))
arguments <- arguments[!dataSubset]
arguments$name <- method
arguments$scans <- con(object)$getScans(acqNum)
arguments$info <- con(object)$getHeader(acqNum)
scans <- do.call(prepareMethods$useMethod, arguments)
con(object)$setScans(arguments$info$acquisitionNum, scans)
funcCall <- expand.call(call=sys.call(-1), expand.dots = TRUE)
con(object)$addData(
'history',
data.frame(
time = as.character(Sys.time()),
operation = 'Spectra modified',
MSsary_version = as.character(packageVersion('MSsary')),
call = callToString(funcCall),
augPackage = prepareMethods$getPackage(method),
augPackVersion = prepareMethods$getVersion(method),
stringsAsFactors=FALSE
)
)
invisible(TRUE)
}
)
#' Peak detection in MsData object
#'
setMethod(
'detectPeaks', 'MsData',
function(object, method, ...) {
arguments <- list(...)
dataSubset <- names(arguments) %in% names(formals(getContAcqNum))
acqNum <- unlist(do.call(getContAcqNum, c(con=con(object), arguments[dataSubset])))
arguments <- arguments[!dataSubset]
arguments$name <- method
arguments$scans <- con(object)$getScans(acqNum)
arguments$info <- con(object)$getHeader(acqNum)
peaks <- do.call(peakMethods$useMethod, arguments)
funcCall <- expand.call(call=sys.call(-1), expand.dots = TRUE)
prevID <- dbGetQuery(con(object)$sary(), 'SELECT IFNULL(MAX(rowid), 0) AS max FROM peakInfo')$max
con(object)$addData('peakInfo', peaks[, c('msLevel', 'length', 'FWHM', 'mzMean', 'scanMax', 'maxHeight', 'area', 'peak')])
ids <- dbGetQuery(con(object)$sary(), paste0('SELECT rowid AS id FROM peakInfo WHERE rowid > ', prevID))$id
con(object)$addData('peakLoc', data.frame(peakID=ids, peaks[, c('scanStart', 'scanEnd', 'mzMin', 'mzMax')]))
con(object)$addData(
'history',
data.frame(
time = as.character(Sys.time()),
operation = 'Peaks detected',
MSsary_version = as.character(packageVersion('MSsary')),
call = callToString(funcCall),
augPackage = peakMethods$getPackage(method),
augPackVersion = peakMethods$getVersion(method),
stringsAsFactors=FALSE
)
)
invisible(TRUE)
}
)
### CONSTRUCTORS
#' Create an MsData object from a raw MS data file
#'
#' This function takes an mzR-compliant data file and create the initial,
#' minimal sary database file and set up the link to both the database and raw
#' file.
#'
#' @param rawFile The path to an mzR-compliant MS data file
#'
#' @return An MsData object
#'
#' @seealso \code{\link{loadMsData}} \code{\linkS4class{MsData}}
#'
#' @examples
#' file <- 'test'
#' msdata <- createMsData(file)
#'
#' @importFrom tools file_path_sans_ext file_path_as_absolute
#' @importFrom mzR openMSfile close header
#'
#' @export
#'
createMsData <- function(rawFile, saryName, force=FALSE) {
if(missing(saryName)) {
dbFile <- paste0(file_path_sans_ext(rawFile,compression = T), '.sary')
} else {
dbFile <- file.path(dirname(rawFile), paste0(saryName, '.sary'))
}
if(force) {
if(unlink(dbFile) == 1) {
stop('Cannot remove ', dbFile)
}
}
if(file.exists(dbFile)) {
stop('Sary file already exists at: ', dbFile)
}
file.create(dbFile)
connection <- MsConnections(rawFile, dbFile)
connection$addTable('header', headerTableFormat)
raw <- openMSfile(rawFile)
headerInfo <- header(raw)
close(raw)
connection$addData('header', headerInfo)
connection$addTable('history', historyTableFormat)
funcCall <- match.call()
connection$addData(
'history',
data.frame(
time = as.character(Sys.time()),
operation = 'Sary created',
MSsary_version = as.character(packageVersion('MSsary')),
call = callToString(funcCall),
stringsAsFactors=FALSE
)
)
connection$addTable('mzR', 'location TEXT NOT NULL')
connection$addData('mzR', data.frame(location=file_path_as_absolute(rawFile)))
connection$addTable('scans', scanTableFormat)
connection$addTable('peakInfo', peakTableFormat)
connection$addTable('peakLoc', peakRtree, rtree=TRUE)
dbGetQuery(
connection$sary(),
'CREATE VIEW currentRawHeader AS SELECT * FROM header WHERE acquisitionNum NOT IN (SELECT scanNum FROM scans)'
)
dbGetQuery(
connection$sary(),
'CREATE VIEW currentModHeader AS SELECT h.seqNum, h.acquisitionNum, h.msLevel, h.polarity, s.peaksCount, s.totIonCurrent, h.retentionTime, s.basePeakMZ, s.basePeakIntensity, h.collisionEnergy, h.ionisationEnergy, s.lowMZ, s.highMZ, h.precursorScanNum, h.precursorMZ, h.precursorCharge, h.precursorIntensity, h.mergedScan, h.mergedResultScanNum, h.mergedResultStartScanNum, h.mergedResultEndScanNum FROM header AS h JOIN scans AS s ON h.acquisitionNum=s.scanNum AND s.remove == 0'
)
dbGetQuery(
connection$sary(),
'CREATE VIEW currentHeader AS SELECT * FROM currentRawHeader UNION SELECT * FROM currentModHeader ORDER BY seqNum'
)
new('MsData', connections=connection)
}
#' Load in an already created sary file as an MsData object
#'
#' This function creates a new MsData object from an already created sary file,
#' together with the accompanying raw data file. The function automatically
#' checks the sary file and see if it complies with the format. By default it
#' also does a light check on whether the data in the raw file corresponds to
#' the data in the sary file.
#'
#' @param rawFile The path to the raw datafile
#'
#' @param saryFile The path to the sary database file
#'
#' @param testIntegrity Should the raw and sary file be compared to each other
#'
#' @param testSize The number of randomly selected scans to use for comparing
#' raw and sary if \code{testIntegrity=TRUE}
#'
#' @return An MsData object
#'
#' @seealso \code{\link{createMsData}} \code{\linkS4class{MsData}}
#'
#' @export
#'
loadMsData <- function(saryFile, rawFile, testIntegrity=TRUE, testSize=10) {
if(missing(rawFile)) {
rawFile <- getMzrPath(saryFile)
if(!file.exists(rawFile)) {
rawFile <- file.path(dirname(saryFile), basename(rawFile))
if(!file.exists(rawFile)) {
stop('Cannot infer raw data location')
}
}
}
connection <- MsConnections(rawFile, saryFile)
if(!connection$verify()) {
stop(saryFile, ' does not look like a saryFile')
}
if(testIntegrity) {
if(!connection$validate(testSize)) {
stop(saryFile, ' and ', rawFile, ' does not match')
}
}
new('MsData', connections=connection)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.