R/DataClasses.R

## All class definitions should go in here.
#' @include AllGenerics.R functions-XChromatogram.R functions-XChromatograms.R

############################################################
## Class unions
setClassUnion("characterOrNULL", c("character", "NULL"))
setClassUnion("logicalOrNumeric", c("logical", "numeric"))
##setClassUnion("ANYorNULL", c("ANY", "NULL"))


############################################################
## xcmsSet
##
setClass("xcmsSet",
         representation = representation(peaks = "matrix",
                                         groups = "matrix",
                                         groupidx = "list",
                                         filled="numeric",
                                         phenoData = "data.frame",
                                         rt = "list",
                                         filepaths = "character",
                                         profinfo = "list",
                                         dataCorrection="numeric",
                                         polarity = "character",
                                         progressInfo = "list",
                                         progressCallback="function",
                                         mslevel = "numeric",
                                         scanrange = "numeric",
                                         .processHistory = "list"),
         prototype = prototype(peaks = matrix(nrow = 0, ncol = 0),
                               groups = matrix(nrow = 0, ncol = 0),
                               groupidx = list(),
                               filled = integer(0),
                               phenoData = data.frame(),
                               rt = list(),
                               filepaths = character(0),
                               profinfo = vector("list"),
                               dataCorrection=integer(0),
                               polarity = character(0),
                               progressInfo = list(),
                               mslevel = numeric(0),
                               scanrange= numeric(0),
                               progressCallback = function(progress) NULL,
                               .processHistory = list()),
         validity = function(object) {
             msg <- character()
             ## Check if all slots are present.
             slNames <- slotNames(object)
             missingSlots <- character()
             for (i in 1:length(slNames)) {
                 if (!.hasSlot(object, slNames[i]))
                     missingSlots <- c(missingSlots, slNames[i])
             }
             if (length(missingSlots) > 0)
                 msg <- c(msg, paste0("This xcmsSet lacks slot(s): ",
                                      paste(missingSlots, collapse = ","),
                                      ". Please update the object using",
                                      " the 'updateObject' method."))
             ## Check the .processHistory slot.
             if (!any(missingSlots == ".processHistory")) {
                 inh <- unlist(lapply(object@.processHistory,
                                      FUN = function(z) {
                                          return(inherits(z, "ProcessHistory"))
                                      }))
                 if (!all(inh))
                     msg <- c(msg,
                              paste0("Slot '.processHistory' should",
                                     " only contain 'ProcessHistory'",
                                     " objects!"))
             }
             if (length(msg))
                 return(msg)
             return(TRUE)
         })

############################################################
## xcmsEIC
setClass("xcmsEIC",
         representation(eic = "list",
                        mzrange = "matrix",
                        rtrange = "matrix",
                        rt = "character",
                        groupnames = "character"),
         prototype(eic = list(),
                   mzrange = matrix(nrow = 0, ncol = 0),
                   rtrange = matrix(nrow = 0, ncol = 0),
                   rt = character(0),
                   groupnames = character(0)))

############################################################
## xcmsFragments
setClass("xcmsFragments",
         representation(peaks = "matrix",
                        MS2spec = "list",
                        specinfo = "matrix"
                        ##, pipeline = "xcmsRawPipeline"
                        ),
         prototype(peaks = matrix(nrow = 0, ncol = 6),
                   MS2spec=NULL,
                   specinfo=NULL
                   ##, pipeline = new("xcmsRawPipeline")
                   ))

############################################################
## xcmsSource
setClass("xcmsSource", representation("VIRTUAL"))
## If given an xcmsSource object, simply return it unchanged
setMethod("xcmsSource", "xcmsSource", function(object) object)

############################################################
## xcmsFileSource
setClass("xcmsFileSource",
         representation("character"),
         contains="xcmsSource",
         validity=function(object) {
             if (file.exists(object)) TRUE
             else paste("File not found:", object)
         })

############################################################
## xcmsRaw
setClass("xcmsRaw", representation(env = "environment",
                                   tic = "numeric",
                                   scantime = "numeric",
                                   scanindex = "integer",
                                   polarity = "factor",
                                   acquisitionNum = "integer",
                                   profmethod = "character",
                                   profparam = "list",
                                   mzrange = "numeric",
                                   gradient = "matrix",
                                   msnScanindex = "integer",
                                   msnAcquisitionNum = "integer",
                                   msnPrecursorScan = "integer",
                                   msnLevel = "integer",
                                   msnRt = "numeric",
                                   msnPrecursorMz = "numeric",
                                   msnPrecursorIntensity = "numeric",
                                   msnPrecursorCharge = "numeric",
                                   msnCollisionEnergy = "numeric",
                                   filepath = "xcmsSource",
                                   scanrange = "numeric",
                                   mslevel = "numeric"),
         prototype(env = new.env(parent=.GlobalEnv),
                   tic = numeric(0),
                   scantime = numeric(0),
                   scanindex = integer(0),
                   polarity = factor(integer(0)),
                   acquisitionNum = integer(0),
                   profmethod = "bin",
                   profparam = list(),
                   mzrange = numeric(0),
                   gradient = matrix(nrow=0, ncol=0),
                   msnScanindex = integer(0),
                   msnAcquisitionNum = integer(0),
                   msnLevel = integer(0),
                   msnRt = numeric(0),
                   msnPrecursorScan = integer(0),
                   msnPrecursorMz = numeric(0),
                   msnPrecursorIntensity = numeric(0),
                   msnPrecursorCharge = numeric(0),
                   msnCollisionEnergy = numeric(0),
                   scanrange = NULL,
                   mslevel = 1
                   ))

############################################################
## netCdfSource
setClass("netCdfSource", contains="xcmsFileSource")

############################################################
## rampSource
setClass("rampSource", contains="xcmsFileSource")

############################################################
## pwizSource
setClass("pwizSource", contains="xcmsFileSource")

############################################################
## xcmsPeaks
setClass("xcmsPeaks", contains = "matrix")

############################################################
## Processing history type statics
.PROCSTEP.UNKNOWN <- "Unknown"
.PROCSTEP.PEAK.DETECTION <- "Peak detection"
.PROCSTEP.PEAK.GROUPING <- "Peak grouping"
.PROCSTEP.RTIME.CORRECTION <- "Retention time correction"
.PROCSTEP.PEAK.FILLING <- "Missing peak filling"
.PROCSTEP.CALIBRATION <- "Calibration"
.PROCSTEPS <- c(
    .PROCSTEP.UNKNOWN,
    .PROCSTEP.PEAK.DETECTION,
    .PROCSTEP.PEAK.GROUPING,
    .PROCSTEP.RTIME.CORRECTION,
    .PROCSTEP.PEAK.FILLING,
    .PROCSTEP.CALIBRATION
)

############################################################
## ProcessHistory
#' @aliases ProcessHistory
#'
#' @title Tracking data processing
#'
#' @description Objects of the type \code{ProcessHistory} allow to keep track
#'     of any data processing step in an metabolomics experiment. They are
#'     created by the data processing methods, such as
#'     \code{\link{findChromPeaks}} and added to the corresponding results
#'     objects. Thus, usually, users don't need to create them.
#'
#' @slot type character(1): string defining the type of the processing step.
#'     This string has to match predefined values. Use
#'     \code{\link{processHistoryTypes}} to list them.
#'
#' @slot date character(1): date time stamp when the processing step was started.
#'
#' @slot info character(1): optional additional information.
#'
#' @slot fileIndex integer of length 1 or > 1 to specify on which
#'     samples of the object the processing was performed.
#'
#' @slot error (ANY): used to store eventual calculation errors.
#'
#' @rdname ProcessHistory-class
setClass("ProcessHistory",
         slots = c(
             type = "character",
             date = "character",
             info = "character",
             fileIndex = "integer",
             error = "ANY"
         ),
         contains = "Versioned",
         prototype = prototype(
             type = .PROCSTEP.UNKNOWN,
             date = character(),
             info = character(),
             fileIndex = integer(),  ## This can be of length 1 or > 1.
             error = NULL
         ),
         validity = function(object) {
             msg <- character()
             ## check type:
             if (!any(object@type == .PROCSTEPS))
                 msg <- c(msg, paste0("Got invalid type '", object@type,
                                      "'! Allowd are: ",
                                      paste0("\"", .PROCSTEPS, "\"",
                                             collapse = ", ")))
             if (length(object@type) > 1)
                 msg <- c(msg, paste0("length of 'type' should not be ",
                                      "larger than 1!"))
             if (length(object@date) > 1)
                 msg <- c(msg, paste0("length of 'date' should not be ",
                                      "larger than 1!"))
             if (length(object@info) > 1)
                 msg <- c(msg, paste0("length of 'info' should not be ",
                                      "larger than 1!"))
             if (length(msg))
                 msg
             else
                 TRUE
         }
         )

## BasicParam class
## CentWaveParam
setClass("Param",
         representation = representation("VIRTUAL"),
         contains = c("Versioned"))
setClassUnion("ParamOrNULL", c("Param", "NULL"))

#' @aliases GenericParam
#'
#' @title Generic parameter class
#'
#' @description The \code{GenericParam} class allows to store generic parameter
#'     information such as the name of the function that was/has to be called
#'     (slot \code{fun}) and its arguments (slot \code{args}). This object is
#'     used to track the process history of the data processings of an
#'     \code{\link{XCMSnExp}} object. This is in contrast to e.g. the
#'     \code{\link{CentWaveParam}} object that is passed to the actual
#'     processing method.
#'
#' @seealso \code{\link{processHistory}} for how to access the process history
#'     of an \code{\link{XCMSnExp}} object.
#'
#' @slot fun \code{character} specifying the function name.
#'
#' @slot args \code{list} (ideally named) with the arguments to the
#'     function.
#'
#' @slot .__classVersion__ the version of the class.
#'
#' @author Johannes Rainer
#'
#' @rdname GenericParam
#'
#' @examples
#' prm <- GenericParam(fun = "mean")
#'
#' prm <- GenericParam(fun = "mean", args = list(na.rm = TRUE))
setClass("GenericParam",
         slots = c(fun = "character",
                   args = "list"),
         contains = "Param",
         prototype = prototype(
             fun = character(),
             args = list()
         ),
         validity = function(object) {
             msg <- character()
             if (length(object@args) > 0)
                 if (!length(object@fun) > 0)
                     msg <- c(msg, paste0("No function name specified in '@fun'",
                                          " but got '@args'"))
             if (length(object@fun) > 1)
                 msg <- c(msg, paste0("'@fun' has to be of length 1"))
             if (length(msg)) msg
             else TRUE
         }
         )

#' @aliases XProcessHistory
#'
#' @title Tracking data processing
#'
#' @description The \code{XProcessHistory} extends the \code{ProcessHistory} by
#'     adding a slot \code{param} that allows to store the actual parameter
#'     class of the processing step.
#'
#' @slot param (Param): an object of type \code{Param} (e.g.
#'     \code{\link{CentWaveParam}}) specifying the settings of the processing
#'     step.
#'
#' @slot msLevel: \code{integer} definining the MS level(s) on which the
#'     analysis was performed.
#'
#' @rdname ProcessHistory-class
setClass("XProcessHistory",
         slots = c(
             param = "ParamOrNULL",
             msLevel = "integer"
         ),
         contains = "ProcessHistory",
         prototype = prototype(
             param = NULL,
             msLevel = NA_integer_
         ),
         validity = function(object) {
             msg <- character()
             if (length(object@param) > 0)
                 if (!is(object@param, "Param"))
                     msg <- c(msg,
                              paste0("Only objects from type 'Param' ",
                                     "allowed in slot '@param'! I got ",
                                     class(object@param)))
             if (!is.na(msLevel(object)))
                 if (msLevel(object) < 0)
                     msg <- c(msg, "msLevel has to be a positive integer")
             if (length(msg)) msg
             else TRUE
         })

#' @aliases findChromPeaks
#'
#' @title Chromatographic peak detection methods.
#'
#' @description The \code{findChromPeaks} methods perform the chromatographic
#'     peak detection on LC/GC-MS data and are part of the modernized
#'     \code{xcms} user interface.
#'
#'     The implemented peak detection methods in chromatographic space are:
#'     \describe{
#'     \item{centWave}{chromatographic peak detection using the \emph{centWave}
#'     method. See \code{\link{centWave}} for more details.}
#'
#'     \item{centWave with predicted isotopes}{peak detection using a two-step
#'     centWave-based approach considering also feature isotopes. See
#'     \code{\link{centWaveWithPredIsoROIs}} for more details.}
#'
#'     \item{matchedFilter}{peak detection in chromatographic space. See
#'     \code{\link{matchedFilter}} for more details.}
#'
#'     \item{massifquant}{peak detection using the Kalman filter-based
#'     method. See \code{\link{massifquant}} for more details.}
#'
#'     \item{MSW}{single-spectrum non-chromatography MS data peak detection.
#'     See \code{\link{MSW}} for more details.}
#'
#'     }
#'
#' @name chromatographic-peak-detection
#'
#' @family peak detection methods
#'
#' @seealso \code{\link{findPeaks}} for the \emph{old} peak detection
#'     methods.
#'
#'     \code{\link{plotChromPeaks}} to plot identified chromatographic peaks
#'     for one file.
#'
#'     \code{\link{highlightChromPeaks}} to highlight identified chromatographic
#'     peaks in an extracted ion chromatogram plot.
#'
#' @author Johannes Rainer
NULL
#> NULL

## Main centWave documentation.
#' @title Chromatographic peak detection using the centWave method
#'
#' @aliases centWave
#'
#' @description The centWave algorithm perform peak density and wavelet based
#'     chromatographic peak detection for high resolution LC/MS data in centroid
#'     mode [Tautenhahn 2008].
#'
#' @param ppm \code{numeric(1)} defining the maximal tolerated m/z deviation in
#'     consecutive scans in parts per million (ppm) for the initial ROI
#'     definition.
#'
#' @param peakwidth \code{numeric(2)} with the expected approximate
#'     peak width in chromatographic space. Given as a range (min, max)
#'     in seconds.
#'
#' @param snthresh \code{numeric(1)} defining the signal to noise ratio cutoff.
#'
#' @param prefilter \code{numeric(2)}: \code{c(k, I)} specifying the prefilter
#'     step for the first analysis step (ROI detection). Mass traces are only
#'     retained if they contain at least \code{k} peaks with intensity
#'     \code{>= I}.
#'
#' @param mzCenterFun Name of the function to calculate the m/z center of the
#'     chromatographic peak. Allowed are: \code{"wMean"}: intensity weighted
#'     mean of the peak's m/z values, \code{"mean"}: mean of the peak's m/z
#'     values, \code{"apex"}: use the m/z value at the peak apex,
#'     \code{"wMeanApex3"}: intensity weighted mean of the m/z value at the
#'     peak apex and the m/z values left and right of it and \code{"meanApex3"}:
#'     mean of the m/z value of the peak apex and the m/z values left and right
#'     of it.
#'
#' @param integrate Integration method. For \code{integrate = 1} peak limits
#'     are found through descent on the mexican hat filtered data, for
#'     \code{integrate = 2} the descent is done on the real data. The latter
#'     method is more accurate but prone to noise, while the former is more
#'     robust, but less exact.
#'
#' @param mzdiff \code{numeric(1)} representing the minimum difference in m/z
#'     dimension required for peaks with overlapping retention times; can be
#'     negative to allow overlap. During peak post-processing, peaks
#'     defined to be overlapping are reduced to the one peak with the largest
#'     signal.
#'
#' @param fitgauss \code{logical(1)} whether or not a Gaussian should be fitted
#'     to each peak. This affects mostly the retention time position of the
#'     peak.
#'
#' @param noise \code{numeric(1)} allowing to set a minimum intensity required
#'     for centroids to be considered in the first analysis step (centroids with
#'     intensity \code{< noise} are omitted from ROI detection).
#'
#' @param verboseColumns \code{logical(1)} whether additional peak meta data
#'     columns should be returned.
#'
#' @param roiList An optional list of regions-of-interest (ROI) representing
#'     detected mass traces. If ROIs are submitted the first analysis step is
#'     omitted and chromatographic peak detection is performed on the submitted
#'     ROIs. Each ROI is expected to have the following elements specified:
#'     \code{scmin} (start scan index), \code{scmax} (end scan index),
#'     \code{mzmin} (minimum m/z), \code{mzmax} (maximum m/z), \code{length}
#'     (number of scans), \code{intensity} (summed intensity). Each ROI should
#'     be represented by a \code{list} of elements or a single row
#'     \code{data.frame}.
#'
#' @param firstBaselineCheck \code{logical(1)}. If \code{TRUE} continuous
#'     data within regions of interest is checked to be above the first baseline.
#'
#' @param roiScales Optional numeric vector with length equal to \code{roiList}
#'     defining the scale for each region of interest in \code{roiList} that
#'     should be used for the centWave-wavelets.
#'
#' @details
#'
#' The centWave algorithm is most suitable for high resolution
#' LC/\{TOF,OrbiTrap,FTICR\}-MS data in centroid mode. In the first phase
#' the method identifies \emph{regions of interest} (ROIs) representing
#' mass traces that are characterized as regions with less than \code{ppm}
#' m/z deviation in consecutive scans in the LC/MS map. In detail, starting
#' with a single m/z, a ROI is extended if a m/z can be found in the next scan
#' (spectrum) for which the difference to the mean m/z of the ROI is smaller
#' than the user defined \code{ppm} of the m/z. The mean m/z of the ROI is then
#' updated considering also the newly included m/z value.
#'
#' These ROIs are then, after some cleanup, analyzed using continuous wavelet
#' transform (CWT) to locate chromatographic peaks on different scales.
#' The first analysis step is skipped, if regions of interest are passed
#' \emph{via} the \code{param} parameter.
#'
#' @note These methods and classes are part of the updated and modernized
#'     \code{xcms} user interface which will eventually replace the
#'     \code{\link{findPeaks}} methods. It supports peak detection on
#'     \code{\link{MSnExp}} and \code{\link{OnDiskMSnExp}}
#'     objects (both defined in the \code{MSnbase} package). All of the settings
#'     to the centWave algorithm can be passed with a \code{CentWaveParam}
#'     object.
#'
#' @family peak detection methods
#'
#' @seealso
#'
#' The \code{\link{do_findChromPeaks_centWave}} core API function and
#' \code{\link{findPeaks.centWave}} for the old user interface.
#'
#' \code{\link{peaksWithCentWave}} for functions to perform centWave peak
#' detection in purely chromatographic data.
#'
#' @references
#' Ralf Tautenhahn, Christoph B\"{o}ttcher, and Steffen Neumann "Highly
#' sensitive feature detection for high resolution LC/MS" \emph{BMC Bioinformatics}
#' 2008, 9:504
#'
#' @name findChromPeaks-centWave
#'
#' @author Ralf Tautenhahn, Johannes Rainer
NULL
#> NULL

#' @description The \code{CentWaveParam} class allows to specify all settings
#'     for a chromatographic peak detection using the centWave method. Instances
#'     should be created with the \code{CentWaveParam} constructor.
#'
#' @slot .__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales See corresponding parameter above. \code{.__classVersion__} stores
#' the version from the class. Slots values should exclusively be accessed
#' \emph{via} the corresponding getter and setter methods listed above.
#'
#' @rdname findChromPeaks-centWave
#'
#' @examples
#'
#' ## Create a CentWaveParam object. Note that the noise is set to 10000 to
#' ## speed up the execution of the example - in a real use case the default
#' ## value should be used, or it should be set to a reasonable value.
#' cwp <- CentWaveParam(ppm = 20, noise = 10000)
#' ## Change snthresh parameter
#' snthresh(cwp) <- 25
#' cwp
#'
#' ## Perform the peak detection using centWave on some of the files from the
#' ## faahKO package. Files are read using the readMSData from the MSnbase
#' ## package
#' library(faahKO)
#' library(xcms)
#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE,
#'            full.names = TRUE)
#' raw_data <- readMSData(fls[1:2], mode = "onDisk")
#'
#' ## Perform the peak detection using the settings defined above.
#' res <- findChromPeaks(raw_data, param = cwp)
#' head(chromPeaks(res))
setClass("CentWaveParam",
         slots = c(
             ppm = "numeric",
             peakwidth = "numeric",
             snthresh = "numeric",
             prefilter = "numeric",
             mzCenterFun = "character",
             integrate = "integer",
             mzdiff = "numeric",
             fitgauss = "logical",
             noise = "numeric",
             verboseColumns = "logical",
             roiList = "list",
             firstBaselineCheck = "logical",
             roiScales = "numeric"
         ),
         contains = c("Param"),
         prototype = prototype(
             ppm = 25,
             peakwidth = c(20, 50),
             snthresh = 10,
             prefilter = c(3, 100),
             mzCenterFun = "wMean",
             integrate = 1L,
             mzdiff = -0.001,
             fitgauss = FALSE,
             noise = 0,
             verboseColumns = FALSE,
             roiList = list(),
             firstBaselineCheck = TRUE,
             roiScales = numeric()
         ),
         validity = function(object) {
             msg <- character()
             if (length(object@ppm) != 1 | any(object@ppm < 0))
                 msg <- c(msg, paste0("'ppm' has to be positive numeric",
                                      " of length 1."))
             if (length(object@peakwidth) != 2 | any(object@peakwidth < 0))
                 msg <- c(msg, paste0("'peakwidth' has to be a numeric",
                                      " of length 2 with only positive",
                                      " values."))
             if (length(object@snthresh) != 1 | any(object@snthresh < 0))
                 msg <- c(msg, paste0("'snthresh' has to be a positive",
                                      " numeric of length 1."))
             if (length(object@prefilter) != 2)
                 msg <- c(msg, paste0("'prefilter' has to be a numeric",
                                      " of length 2."))
             allowed_vals <- c("wMean", "mean", "apex", "wMeanApex3",
                               "meanApex3")
             if (!(object@mzCenterFun) %in% allowed_vals)
                 msg <- c(msg, paste0("'mzCenterFun' has to be one of ",
                                      paste0("'", allowed_vals, "'",
                                             collapse = ", "), "."))
             if (!(object@integrate %in% c(1L, 2L)))
                 msg <- c(msg, paste0("'integrate' has to be either 1",
                                      " or 2."))
             if (length(object@mzdiff) != 1)
                 msg <- c(msg, paste0("'mzdiff' has to be a numeric of",
                                      " length 1."))
             if (length(object@noise) != 1)
                 msg <- c(msg, paste0("'noise' has to be a numeric of",
                                      " length 1."))
             if (length(object@fitgauss) != 1)
                 msg <- c(msg, paste0("'fitgauss' has to be a numeric of",
                                      " length 1."))
             if (length(object@verboseColumns) != 1)
                 msg <- c(msg, paste0("'verboseColumns' has to be a ",
                                      "numeric of length 1."))
             if (length(object@firstBaselineCheck) != 1)
                 msg <- c(msg, paste0("'firstBaselineCheck' has to be a",
                                      " numeric of length 1."))
             if (length(object@roiList) > 0) {
                 doHaveExpectedEls <- function(z) {
                     need <- c("scmax", "scmin", "mzmin", "mzmax", "length",
                               "intensity")
                     if (is.null(nrow(z))) {
                         OK <- all(need %in% names(z))
                     } else {
                         OK <- all(need %in% colnames(z))
                     }
                     return(OK)
                 }
                 OKs <- unlist(lapply(object@roiList, doHaveExpectedEls))
                 if (any(!OKs))
                     msg <- c(msg, paste0("'roiList' does not provide ",
                                          "all required fields!"))
             }
             if (length(object@roiScales) > 0) {
                 if (length(object@roiList) != length(object@roiScales))
                     msg <- c(msg, paste0("'roiScales' has to have the same",
                                          " length than 'roiList'."))
             }
             if (length(msg))
                 msg
             else
                 TRUE
         })

## Main matchedFilter documentation.
#' @title Peak detection in the chromatographic time domain
#'
#' @aliases matchedFilter
#'
#' @description The \emph{matchedFilter} algorithm identifies peaks in the
#'     chromatographic time domain as described in [Smith 2006]. The intensity
#'     values are binned by cutting The LC/MS data into slices (bins) of a mass
#'     unit (\code{binSize} m/z) wide. Within each bin the maximal intensity is
#'     selected. The chromatographic peak detection is then performed in each
#'     bin by extending it based on the \code{steps} parameter to generate
#'     slices comprising bins \code{current_bin - steps +1} to
#'     \code{current_bin + steps - 1}. Each of these slices is then filtered
#'     with matched filtration using a second-derative Gaussian as the model
#'     peak shape. After filtration peaks are detected using a signal-to-ratio
#'     cut-off. For more details and illustrations see [Smith 2006].
#'
#' @param binSize \code{numeric(1)} specifying the width of the
#'     bins/slices in m/z dimension.
#'
#' @param impute Character string specifying the method to be used for missing
#'     value imputation. Allowed values are \code{"none"} (no linear
#'     interpolation), \code{"lin"} (linear interpolation), \code{"linbase"}
#'     (linear interpolation within a certain bin-neighborhood) and
#'     \code{"intlin"}. See \code{\link{imputeLinInterpol}} for more details.
#'
#' @param fwhm \code{numeric(1)} specifying the full width at half maximum
#'     of matched filtration gaussian model peak. Only used to calculate the
#'     actual sigma, see below.
#'
#' @param sigma \code{numeric(1)} specifying the standard deviation (width)
#'     of the matched filtration model peak.
#'
#' @param max \code{numeric(1)} representing the maximum number of peaks
#'     that are expected/will be identified per slice.
#'
#' @param snthresh \code{numeric(1)} defining the signal to noise cutoff
#'     to be used in the chromatographic peak detection step.
#'
#' @param steps \code{numeric(1)} defining the number of bins to be
#'     merged before filtration (i.e. the number of neighboring bins that will
#'     be joined to the slice in which filtration and peak detection will be
#'     performed).
#'
#' @param mzdiff \code{numeric(1)} defining the minimum difference
#'     in m/z for peaks with overlapping retention times
#'
#' @param index \code{logical(1)} specifying whether indicies should be
#'     returned instead of values for m/z and retention times.
#'
#' @details The intensities are binned by the provided m/z values within each
#'     spectrum (scan). Binning is performed such that the bins are centered
#'     around the m/z values (i.e. the first bin includes all m/z values between
#'     \code{min(mz) - bin_size/2} and \code{min(mz) + bin_size/2}).
#'
#'     For more details on binning and missing value imputation see
#'     \code{\link{binYonX}} and \code{\link{imputeLinInterpol}} methods.
#'
#' @note These methods and classes are part of the updated and modernized
#'     \code{xcms} user interface which will eventually replace the
#'     \code{\link{findPeaks}} methods. It supports chromatographic peak
#'     detection on \code{\link{MSnExp}} and
#'     \code{\link{OnDiskMSnExp}} objects (both defined in the
#'     \code{MSnbase} package). All of the settings to the matchedFilter
#'     algorithm can be passed with a \code{MatchedFilterParam} object.
#'
#' @inheritParams imputeLinInterpol
#'
#' @inheritParams findChromPeaks-centWave
#'
#' @family peak detection methods
#'
#' @seealso
#'
#' The \code{\link{do_findChromPeaks_matchedFilter}} core API function
#' and \code{\link{findPeaks.matchedFilter}} for the old user interface.
#'
#' \code{\link{peaksWithMatchedFilter}} for functions to perform matchedFilter
#' peak detection in purely chromatographic data.
#'
#' @references
#' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and
#' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite
#' Profiling Using Nonlinear Peak Alignment, Matching, and Identification"
#' \emph{Anal. Chem.} 2006, 78:779-787.
#'
#' @author Colin A Smith, Johannes Rainer
#'
#' @name findChromPeaks-matchedFilter
NULL
#> NULL

#' @description The \code{MatchedFilterParam} class allows to specify all
#'     settings for a chromatographic peak detection using the matchedFilter
#'     method. Instances should be created with the \code{MatchedFilterParam}
#'     constructor.
#'
#' @slot .__classVersion__,binSize,impute,baseValue,distance,fwhm,sigma,max,snthresh,steps,mzdiff,index See corresponding parameter above. \code{.__classVersion__} stores
#' the version from the class. Slots values should exclusively be accessed
#' \emph{via} the corresponding getter and setter methods listed above.
#'
#' @rdname findChromPeaks-matchedFilter
#'
#' @examples
#'
#' ## Create a MatchedFilterParam object. Note that we use a unnecessarily large
#' ## binSize parameter to reduce the run-time of the example.
#' mfp <- MatchedFilterParam(binSize = 5)
#' ## Change snthresh parameter
#' snthresh(mfp) <- 15
#' mfp
#'
#' ## Perform the peak detection using matchecFilter on the files from the
#' ## faahKO package. Files are read using the readMSData from the MSnbase
#' ## package
#' library(faahKO)
#' library(MSnbase)
#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE,
#'            full.names = TRUE)
#' raw_data <- readMSData(fls[1:2], mode = "onDisk")
#' ## Perform the chromatographic peak detection using the settings defined
#' ## above. Note that we are also disabling parallel processing in this
#' ## example by registering a "SerialParam"
#' register(SerialParam())
#' res <- findChromPeaks(raw_data, param = mfp)
#' head(chromPeaks(res))
setClass("MatchedFilterParam",
         slots = c(
             binSize = "numeric",
             impute = "character",
             baseValue = "numeric",
             distance = "numeric",
             fwhm = "numeric",
             sigma = "numeric",
             max = "numeric",
             snthresh = "numeric",
             steps = "numeric",
             mzdiff = "numeric",
             index = "logical"
         ),
         contains = c("Param"),
         prototype = prototype(
             binSize = 0.1,
             impute = "none",
             baseValue = numeric(),
             distance = numeric(),
             fwhm = 30,
             sigma = 12.73994,
             max = 5,
             snthresh = 10,
             steps = 2,
             mzdiff = 0.6,
             index = FALSE
         ),
         validity = function(object) {
             msg <- character()
             if (length(object@binSize) != 1 | any(object@binSize < 0))
                 msg <- c(msg, paste0("'binSize' has to be positive",
                                      " numeric of length 1."))
             if (!any(c("none", "lin", "linbase") == object@impute))
                 msg <- c(msg,
                          paste0("Only values 'none', 'lin' and ",
                                 "'linbase' are allowed for'impute'"))
             if (length(object@baseValue) > 1)
                 msg <- c(msg, paste0("'baseValue' has to be a",
                                      " numeric of length 1."))
             if (length(object@distance) > 1)
                 msg <- c(msg, paste0("'distance' has to be a numeric",
                                      " of length 1."))
             if (length(object@fwhm) != 1)
                 msg <- c(msg, paste0("'fwhm' has to be a numeric",
                                      " of length 1."))
             if (length(object@sigma) != 1)
                 msg <- c(msg, paste0("'sigma' has to be a numeric",
                                      " of length 1."))
             if (length(object@max) != 1)
                 msg <- c(msg, paste0("'max' has to be a numeric",
                                      " of length 1."))
             if (length(object@snthresh) != 1)
                 msg <- c(msg, paste0("'snthresh' has to be a numeric",
                                      " of length 1."))
             if (length(object@steps) != 1)
                 msg <- c(msg, paste0("'steps' has to be a numeric",
                                      " of length 1."))
             if (length(object@mzdiff) != 1)
                 msg <- c(msg, paste0("'mzdiff' has to be a numeric",
                                      " of length 1."))
             if (length(object@index) != 1)
                 msg <- c(msg, paste0("'index' has to be a logical",
                                      " of length 1."))
             if (length(msg))
                 msg
             else
                 TRUE
         })


## Main massifquant documentation.
#' @title Chromatographic peak detection using the massifquant method
#'
#' @aliases massifquant
#'
#' @description Massifquant is a Kalman filter (KF)-based chromatographic peak
#'     detection for XC-MS data in centroid mode. The identified peaks
#'     can be further refined with the \emph{centWave} method (see
#'     \code{\link{findChromPeaks-centWave}} for details on centWave)
#'     by specifying \code{withWave = TRUE}.
#'
#' @param peakwidth \code{numeric(2)}. Only the first element is used by
#'     massifquant, which specifices the minimum peak length in time scans.
#'     For \code{withWave = TRUE} the second argument represents the maximum
#'     peak length subject to being greater than the mininum peak length
#'     (see also documentation of \code{\link{do_findChromPeaks_centWave}}).
#'
#' @param prefilter \code{numeric(2)}. The first argument is only used
#'     if (\code{withWave = TRUE}); see \code{\link{findChromPeaks-centWave}}
#'     for details. The second argument specifies the minimum threshold for the
#'     maximum intensity of a chromatographic peak that must be met.
#'
#' @param criticalValue \code{numeric(1)}. Suggested values:
#'     (\code{0.1-3.0}). This setting helps determine the the Kalman Filter
#'     prediciton margin of error. A real centroid belonging to a bonafide
#'     peak must fall within the KF prediction margin of error. Much like
#'     in the construction of a confidence interval, \code{criticalVal} loosely
#'     translates to be a multiplier of the standard error of the prediction
#'     reported by the Kalman Filter. If the peak in the XC-MS sample have
#'     a small mass deviance in ppm error, a smaller critical value might be
#'     better and vice versa.
#'
#' @param consecMissedLimit \code{integer(1)} Suggested values: (\code{1,2,3}).
#'     While a peak is in the proces of being detected by a Kalman Filter, the
#'     Kalman Filter may not find a predicted centroid in every scan. After 1
#'     or more consecutive failed predictions, this setting informs Massifquant
#'     when to stop a Kalman Filter from following a candidate peak.
#'
#' @param unions \code{integer(1)} set to \code{1} if apply t-test union on
#'     segmentation; set to \code{0} if no t-test to be applied on
#'     chromatographically continous peaks sharing same m/z range.
#'     Explanation: With very few data points, sometimes a Kalman Filter stops
#'     tracking a peak prematurely. Another Kalman Filter is instantiated
#'     and begins following the rest of the signal. Because tracking is done
#'     backwards to forwards, this algorithmic defect leaves a real peak
#'     divided into two segments or more. With this option turned on, the
#'     program identifies segmented peaks and combines them (merges them)
#'     into one with a two sample t-test. The potential danger of this option
#'     is that some truly distinct peaks may be merged.
#'
#' @param checkBack \code{integer(1)} set to \code{1} if turned on; set to
#'     \code{0} if turned off. The convergence of a Kalman Filter to a peak's
#'     precise m/z mapping is very fast, but sometimes it incorporates erroneous
#'     centroids as part of a peak (especially early on). The \code{scanBack}
#'     option is an attempt to remove the occasional outlier that lies beyond
#'     the converged bounds of the Kalman Filter. The option does not directly
#'     affect identification of a peak because it is a postprocessing measure;
#'     it has not shown to be a extremely useful thus far and the default is set
#'     to being turned off.
#'
#' @param withWave \code{logical(1)} if \code{TRUE}, the peaks identified first
#'     with Massifquant are subsequently filtered with the second step of the
#'     centWave algorithm, which includes wavelet estimation.
#'
#' @details This algorithm's performance has been tested rigorously
#'     on high resolution LC/{OrbiTrap, TOF}-MS data in centroid mode.
#'     Simultaneous kalman filters identify chromatographic peaks and calculate
#'     their area under the curve. The default parameters are set to operate on
#'     a complex LC-MS Orbitrap sample. Users will find it useful to do some
#'     simple exploratory data analysis to find out where to set a minimum
#'     intensity, and identify how many scans an average peak spans. The
#'     \code{consecMissedLimit} parameter has yielded good performance on
#'     Orbitrap data when set to (\code{2}) and on TOF data it was found best
#'     to be at (\code{1}). This may change as the algorithm has yet to be
#'     tested on many samples. The \code{criticalValue} parameter is perhaps
#'     most dificult to dial in appropriately and visual inspection of peak
#'     identification is the best suggested tool for quick optimization.
#'     The \code{ppm} and \code{checkBack} parameters have shown less influence
#'     than the other parameters and exist to give users flexibility and
#'     better accuracy.
#'
#' @note These methods and classes are part of the updated and modernized
#'     \code{xcms} user interface which will eventually replace the
#'     \code{\link{findPeaks}} methods. It supports chromatographic peak
#'     detection on \code{\link{MSnExp}} and
#'     \code{\link{OnDiskMSnExp}} objects (both defined in the
#'     \code{MSnbase} package). All of the settings to the massifquant and
#'     centWave algorithm can be passed with a \code{MassifquantParam} object.
#'
#' @inheritParams findChromPeaks-centWave
#'
#' @family peak detection methods
#'
#' @seealso The \code{\link{do_findChromPeaks_massifquant}} core API function
#'     and \code{\link{findPeaks.massifquant}} for the old user interface.
#'
#' @references
#' Conley CJ, Smith R, Torgrip RJ, Taylor RM, Tautenhahn R and Prince JT
#' "Massifquant: open-source Kalman filter-based XC-MS isotope trace feature
#' detection" \emph{Bioinformatics} 2014, 30(18):2636-43.
#'
#' @author Christopher Conley, Johannes Rainer
#'
#' @name findChromPeaks-massifquant
NULL
#> NULL

#' @description The \code{MassifquantParam} class allows to specify all
#'     settings for a chromatographic peak detection using the massifquant
#'     method eventually in combination with the centWave algorithm. Instances
#'     should be created with the \code{MassifquantParam} constructor.
#'
#' @slot .__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,criticalValue,consecMissedLimit,unions,checkBack,withWave See corresponding parameter above. \code{.__classVersion__} stores
#' the version from the class. Slots values should exclusively be accessed
#' \emph{via} the corresponding getter and setter methods listed above.
#'
#' @rdname findChromPeaks-massifquant
#'
#' @examples
#'
#' ## Create a MassifquantParam object.
#' mqp <- MassifquantParam()
#' ## Change snthresh parameter
#' snthresh(mqp) <- 30
#' mqp
#'
#' ## Perform the peak detection using massifquant on the files from the
#' ## faahKO package. Files are read using the readMSData from the MSnbase
#' ## package
#' library(faahKO)
#' library(MSnbase)
#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE,
#'            full.names = TRUE)
#' raw_data <- readMSData(fls[1:2], mode = "onDisk")
#' ## Perform the peak detection using the settings defined above.
#' res <- findChromPeaks(raw_data, param = mqp)
#' head(chromPeaks(res))
setClass("MassifquantParam",
         slots = c(
             ppm = "numeric",
             peakwidth = "numeric",
             snthresh = "numeric",
             prefilter = "numeric",
             mzCenterFun = "character",
             integrate = "integer",
             mzdiff = "numeric",
             fitgauss = "logical",
             noise = "numeric",
             verboseColumns = "logical",
             criticalValue = "numeric",
             consecMissedLimit = "integer",
             unions = "integer",
             checkBack = "integer",
             withWave = "logical"
         ),
         contains = c("Param"),
         prototype = prototype(
             ppm = 25,
             peakwidth = c(20, 50),
             snthresh = 10,
             prefilter = c(3, 100),
             mzCenterFun = "wMean",
             integrate = 1L,
             mzdiff = -0.001,
             fitgauss = FALSE,
             noise = 0,
             verboseColumns = FALSE,
             criticalValue = 1.125,
             consecMissedLimit = 2L,
             unions = 1L,
             checkBack = 0L,
             withWave = FALSE
         ),
         validity = function(object) {
             msg <- character()
             if (length(object@ppm) != 1 | any(object@ppm < 0))
                 msg <- c(msg, paste0("'ppm' has to be positive numeric",
                                      " of length 1."))
             if (length(object@peakwidth) != 2 | any(object@peakwidth < 0))
                 msg <- c(msg, paste0("'peakwidth' has to be a numeric",
                                      " of length 2 with only positive",
                                      " values."))
             if (length(object@snthresh) != 1 | any(object@snthresh < 0))
                 msg <- c(msg, paste0("'snthresh' has to be a positive",
                                      " numeric of length 1."))
             if (length(object@prefilter) != 2)
                 msg <- c(msg, paste0("'prefilter' has to be a numeric",
                                      " of length 2."))
             allowed_vals <- c("wMean", "mean", "apex", "wMeanApex3",
                               "meanApex3")
             if (!(object@mzCenterFun) %in% allowed_vals)
                 msg <- c(msg, paste0("'mzCenterFun' has to be one of ",
                                      paste0("'", allowed_vals, "'",
                                             collapse = ", "), "."))
             if (!(object@integrate %in% c(1L, 2L)))
                 msg <- c(msg, paste0("'integrate' has to be either 1",
                                      " or 2."))
             if (length(object@mzdiff) != 1)
                 msg <- c(msg, paste0("'mzdiff' has to be a numeric of",
                                      " length 1."))
             if (length(object@noise) != 1)
                 msg <- c(msg, paste0("'noise' has to be a numeric of",
                                      " length 1."))
             if (length(object@fitgauss) != 1)
                 msg <- c(msg, paste0("'fitgauss' has to be a numeric of",
                                      " length 1."))
             if (length(object@verboseColumns) != 1)
                 msg <- c(msg, paste0("'verboseColumns' has to be a ",
                                      "numeric of length 1."))
             if (length(object@criticalValue) != 1)
                 msg <- c(msg, paste0("'criticalValue' has to be a ",
                                      "numeric of length 1."))
             if (length(object@consecMissedLimit) != 1)
                 msg <- c(msg, paste0("'consecMissedLimit' has to be a ",
                                      "numeric of length 1."))
             if (length(object@unions) != 1)
                 msg <- c(msg, paste0("'unions' has to be a ",
                                      "numeric of length 1."))
             if (object@unions != 0 & object@unions != 1)
                 msg <- c(msg, paste0("'unions' has to be either 0 or 1!"))
             if (length(object@checkBack) != 1)
                 msg <- c(msg, paste0("'checkBack' has to be a ",
                                      "numeric of length 1."))
             if (object@checkBack != 0 & object@checkBack != 1)
                 msg <- c(msg, paste0("'checkBack' has to be either 0",
                                      " or 1!"))
             if (length(object@withWave) != 1)
                 msg <- c(msg, paste0("'withWave' has to be a ",
                                      "numeric of length 1."))
             if (length(msg))
                 msg
             else TRUE
         })

## Main MSW documentation.
#' @title Single-spectrum non-chromatography MS data peak detection
#'
#' @aliases MSW
#'
#' @description Perform peak detection in mass spectrometry
#'     direct injection spectrum using a wavelet based algorithm.
#'
#' @details This is a wrapper for the peak picker in Bioconductor's
#'     \code{MassSpecWavelet} package calling
#'     \code{\link{peakDetectionCWT}} and
#'     \code{\link{tuneInPeakInfo}} functions. See the
#'     \emph{xcmsDirect} vignette for more information.
#'
#' @note These methods and classes are part of the updated and modernized
#'     \code{xcms} user interface which will eventually replace the
#'     \code{\link{findPeaks}} methods. It supports peak detection on
#'     \code{\link{MSnExp}} and \code{\link{OnDiskMSnExp}}
#'     objects (both defined in the \code{MSnbase} package). All of the settings
#'     to the algorithm can be passed with a \code{MSWParam} object.
#'
#' @inheritParams findChromPeaks-centWave
#'
#' @family peak detection methods
#'
#' @seealso The \code{\link{do_findPeaks_MSW}} core API function
#'     and \code{\link{findPeaks.MSW}} for the old user interface.
#'
#' @author Joachim Kutzera, Steffen Neumann, Johannes Rainer
#'
#' @name findPeaks-MSW
NULL
#> NULL

#' @description The \code{MSWParam} class allows to specify all
#'     settings for a peak detection using the MSW method. Instances should be
#'     created with the \code{MSWParam} constructor.
#'
#' @slot .__classVersion__,snthresh,verboseColumns,scales,nearbyPeak,peakScaleRange,ampTh,minNoiseLevel,ridgeLength,peakThr,tuneIn,addParams See corresponding parameter above. \code{.__classVersion__} stores the version from the class. Slots values
#' should exclusively be accessed \emph{via} the corresponding getter and
#' setter methods listed above.
#'
#' @rdname findPeaks-MSW
#'
#' @examples
#'
#' ## Create a MSWParam object
#' mp <- MSWParam()
#' ## Change snthresh parameter
#' snthresh(mp) <- 15
#' mp
#'
#' ## Loading a small subset of direct injection, single spectrum files
#' library(msdata)
#' fticrf <- list.files(system.file("fticr", package = "msdata"),
#'                     recursive = TRUE, full.names = TRUE)
#' fticr <- readMSData(fticrf[1:2], msLevel. = 1, mode = "onDisk")
#'
#' ## Perform the MSW peak detection on these:
#' p <- MSWParam(scales = c(1, 7), peakThr = 80000, ampTh = 0.005,
#'              SNR.method = "data.mean", winSize.noise = 500)
#' fticr <- findChromPeaks(fticr, param = p)
#'
#' head(chromPeaks(fticr))
setClass("MSWParam",
         slots = c(
             snthresh = "numeric",
             verboseColumns = "logical",
             ## params from the peakDetectionCWT
             scales = "numeric",
             nearbyPeak = "logical",
             peakScaleRange = "numeric",
             ampTh = "numeric",
             minNoiseLevel = "numeric",
             ridgeLength = "numeric",
             peakThr = "numeric",
             tuneIn = "logical",
             addParams = "list"
         ),
         contains = c("Param"),
         prototype = prototype(
             snthresh = 3,
             verboseColumns = FALSE,
             scales = c(1, seq(2, 30, 2), seq(32, 64, 4)),
             nearbyPeak = TRUE,
             peakScaleRange = 5,
             ampTh = 0.01,
             minNoiseLevel = (0.01 / 3),
             ridgeLength = 24,
             peakThr = numeric(),
             tuneIn = FALSE,
             addParams = list()
         ),
         validity = function(object) {
             msg <- character()
             if (length(object@snthresh) != 1 | any(object@snthresh < 0))
                 msg <- c(msg, paste0("'snthresh' has to be a positive",
                                      " numeric of length 1."))
             if (length(object@verboseColumns) != 1)
                 msg <- c(msg, paste0("'verboseColumns' has to be a ",
                                      "numeric of length 1."))
             if (length(object@nearbyPeak) != 1)
                 msg <- c(msg, paste0("'nearbyPeak' has to be a ",
                                      "logical of length 1."))
             if (length(object@peakScaleRange) != 1 |
                 any(object@peakScaleRange < 0))
                 msg <- c(msg, paste0("'peakScaleRange' has to be a ",
                                      "positive numeric of length 1."))
             if (length(object@ampTh) != 1 | any(object@ampTh < 0))
                 msg <- c(msg, paste0("'ampTh' has to be a ",
                                      "positive numeric of length 1."))
             if (length(object@minNoiseLevel) != 1 |
                 any(object@minNoiseLevel < 0))
                 msg <- c(msg, paste0("'minNoiseLevel' has to be a ",
                                      "positive numeric of length 1."))
             if (length(object@ridgeLength) != 1 |
                 any(object@ridgeLength < 0))
                 msg <- c(msg, paste0("'ridgeLength' has to be a ",
                                      "positive numeric of length 1."))
             if (length(object@peakThr) > 1)
                 msg <- c(msg, paste0("'peakThr' has to be a ",
                                      "positive numeric of length 1."))
             if (length(object@tuneIn) != 1)
                 msg <- c(msg, paste0("'tuneIn' has to be a ",
                                      "logical of length 1."))
             if (length(msg))
                 msg
             else TRUE
         })

#' @title Two-step centWave peak detection considering also isotopes
#'
#' @aliases centWaveWithPredIsoROIs
#'
#' @description This method performs a two-step centWave-based chromatographic
#'     peak detection: in a first centWave run peaks are identified for which
#'     then the location of their potential isotopes in the mz-retention time is
#'     predicted. A second centWave run is then performed on these
#'     \emph{regions of interest} (ROIs). The final list of chromatographic
#'     peaks comprises all non-overlapping peaks from both centWave runs.
#'
#' @inheritParams findChromPeaks-centWave
#'
#' @param maxCharge \code{integer(1)} defining the maximal isotope charge.
#'     Isotopes will be defined for charges \code{1:maxCharge}.
#'
#' @param maxIso \code{integer(1)} defining the number of isotope peaks that
#'     should be predicted for each peak identified in the first centWave run.
#'
#' @param mzIntervalExtension \code{logical(1)} whether the mz range for the
#'     predicted isotope ROIs should be extended to increase detection of low
#'     intensity peaks.
#'
#' @param snthreshIsoROIs \code{numeric(1)} defining the signal to noise ratio
#'     cutoff to be used in the second centWave run to identify peaks for
#'     predicted isotope ROIs.
#'
#' @param polarity \code{character(1)} specifying the polarity of the data.
#'     Currently not used, but has to be \code{"positive"}, \code{"negative"} or
#'     \code{"unknown"} if provided.
#'
#' @details See \code{\link{centWave}} for details on the centWave method.
#'
#' @note These methods and classes are part of the updated and modernized
#'     \code{xcms} user interface which will eventually replace the
#'     \code{\link{findPeaks}} methods. It supports chromatographic peak
#'     detection on \code{\link{MSnExp}} and
#'     \code{\link{OnDiskMSnExp}} objects (both defined in the
#'     \code{MSnbase} package). All of the settings to the algorithm can be
#'     passed with a \code{CentWavePredIsoParam} object.
#'
#' @family peak detection methods
#'
#' @seealso The \code{\link{do_findChromPeaks_centWaveWithPredIsoROIs}} core
#'     API function and \code{\link{findPeaks.centWave}} for the old user
#'     interface. \code{\link{CentWaveParam}} for the class the
#'     \code{CentWavePredIsoParam} extends.
#'
#' @name findChromPeaks-centWaveWithPredIsoROIs
#'
#' @author Hendrik Treutler, Johannes Rainer
NULL
#> NULL

#' @description The \code{CentWavePredIsoParam} class allows to specify all
#'     settings for the two-step centWave-based peak detection considering also
#'     predicted isotopes of peaks identified in the first centWave run.
#'     Instances should be created with the \code{CentWavePredIsoParam}
#'     constructor. See also the documentation of the
#'     \code{\link{CentWaveParam}} for all methods and arguments this class
#'     inherits.
#'
#' @slot .__classVersion__,ppm,peakwidth,snthresh,prefilter,mzCenterFun,integrate,mzdiff,fitgauss,noise,verboseColumns,roiList,firstBaselineCheck,roiScales,snthreshIsoROIs,maxCharge,maxIso,mzIntervalExtension,polarity See corresponding parameter above. \code{.__classVersion__} stores
#' the version from the class. Slots values should exclusively be accessed
#' \emph{via} the corresponding getter and setter methods listed above.
#'
#' @rdname findChromPeaks-centWaveWithPredIsoROIs
#'
#' @examples
#'
#' ## Create a param object
#' p <- CentWavePredIsoParam(maxCharge = 4)
#' ## Change snthresh parameter
#' snthresh(p) <- 25
#' p
#'
setClass("CentWavePredIsoParam",
         slots = c(
             snthreshIsoROIs = "numeric",
             maxCharge = "integer",
             maxIso = "integer",
             mzIntervalExtension = "logical",
             polarity = "character"
         ),
         contains = c("CentWaveParam"),
         prototype = prototype(
             snthreshIsoROIs = 6.25,
             maxCharge = 3L,
             maxIso = 5L,
             mzIntervalExtension = TRUE,
             polarity = "unknown"
         ),
         validity = function(object) {
             msg <- character()
             if (length(object@snthreshIsoROIs) != 1 |
                 any(object@snthreshIsoROIs < 0))
                 msg <- c(msg, paste0("'snthreshIsoROIs' has to be a ",
                                      "positive numeric of length 1."))
             if (length(object@maxCharge) != 1 | any(object@maxCharge < 0))
                 msg <- c(msg, paste0("'maxCharge' has to be a ",
                                      "positive integer of length 1."))
             if (length(object@maxIso) != 1 | any(object@maxIso < 0))
                 msg <- c(msg, paste0("'maxIso' has to be a ",
                                      "positive integer of length 1."))
             if (length(object@mzIntervalExtension) != 1)
                 msg <- c(msg, paste0("'mzIntervalExtension' has to be a",
                                      " logical of length 1."))
             if (length(object@polarity) != 1)
                 msg <- c(msg, paste0("'polarity' has to be a",
                                      " character of length 1."))
             if (!(object@polarity %in% c("positive", "negative", "unknown")))
                 msg <- c(msg, paste0("'polarity' has to be either ",
                                      "'positive', 'negative' or ",
                                      "'unknown'!"))
             if (length(msg))
                 msg
             else TRUE
         })


## General groupChromPeaks method.
#' @title Correspondence: Chromatographic peak grouping methods.
#'
#' @description The \code{groupChromPeaks} method(s) perform the correspondence,
#'     i.e. the grouping of chromatographic peaks within and between samples.
#'     These methods are part of the modernized \code{xcms} user interface.
#'     The resulting peak groups are referred to as (mz-rt) features and can be
#'     accessed \emph{via} the \code{\link{featureDefinitions}} method on the
#'     result object.
#'
#'     The implemented peak grouping methods are:
#'     \describe{
#'
#'     \item{density}{peak grouping based on time dimension peak densities.
#'     See \code{\link{groupChromPeaks-density}} for more details.}
#'
#'     \item{mzClust}{high resolution peak grouping for single spectra (direct
#'     infusion) MS data. See \code{\link{groupChromPeaks-mzClust}} for more
#'     details.}
#'
#'     \item{nearest}{chromatographic peak grouping based on their proximity in
#'     the mz-rt space. See \code{\link{groupChromPeaks-nearest}} for more
#'     details.}
#'
#' }
#' @name groupChromPeaks
#'
#' @family peak grouping methods
#'
#' @seealso
#'
#' \code{\link{featureDefinitions}} and
#' \code{\link{featureValues,XCMSnExp-method}} for methods to access peak
#' grouping results.
#'
#' \code{\link{featureChromatograms}} to extract ion chromatograms for each
#' feature.
#'
#' \code{\link{group}} for the \emph{old} peak grouping methods.
#'
#' @author Johannes Rainer
NULL
#> NULL

#' @title Peak grouping based on time dimension peak densities
#'
#' @description
#'
#' This method performs performs correspondence (chromatographic
#' peak grouping) based on the density (distribution) of identified peaks
#' along the retention time axis within slices of overlapping mz ranges.
#' All peaks (from the same or from different samples) being close on the
#' retention time axis are grouped into a feature (*peak group*).
#'
#' @note These methods and classes are part of the updated and modernized
#'     `xcms` user interface. All of the settings to the algorithm
#'     can be passed with a `PeakDensityParam` object.
#'
#' @param sampleGroups A vector of the same length than samples defining the
#'     sample group assignments (i.e. which samples belong to which sample
#'     group). This parameter is mandatory for the `PeakDensityParam`
#'     and has to be provided also if there is no sample grouping in the
#'     experiment (in which case all samples should be assigned to the
#'     same group).
#'
#' @param bw `numeric(1)` defining the bandwidth (standard deviation ot the
#'     smoothing kernel) to be used. This argument is passed to the
#'     [density() method.
#'
#' @param minFraction `numeric(1)` defining the minimum fraction of samples
#'     in at least one sample group in which the peaks have to be present to be
#'     considered as a peak group (feature).
#'
#' @param minSamples `numeric(1)` with the minimum number of samples in at
#'     least one sample group in which the peaks have to be detected to be
#'     considered a peak group (feature).
#'
#' @param binSize `numeric(1)` defining the size of the overlapping slices
#'     in mz dimension.
#'
#' @param maxFeatures `numeric(1)` with the maximum number of peak groups
#'     to be identified in a single mz slice.
#'
#' @family peak grouping methods
#'
#' @seealso
#'
#' The [do_groupChromPeaks_density()] core API function and [group.density()]
#' for the old user interface.
#'
#' [plotChromPeakDensity()] to plot peak densities and evaluate different
#' algorithm settings.
#'
#' [featureDefinitions()] and [featureValues()] for methods to access the
#' features (i.e. the peak grouping results).
#'
#' @name groupChromPeaks-density
#'
#' @md
#'
#' @author Colin Smith, Johannes Rainer
#'
#' @references
#' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and
#' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite
#' Profiling Using Nonlinear Peak Alignment, Matching, and Identification"
#' Anal. Chem. 2006, 78:779-787.
NULL
#> NULL

#' @description
#'
#' The `PeakDensityParam` class allows to specify all settings for the peak
#' grouping based on peak densities along the time dimension. Instances should
#' be created with the [PeakDensityParam()] constructor.
#'
#' @slot .__classVersion__,sampleGroups,bw,minFraction,minSamples,binSize,maxFeatures See corresponding parameter above. `.__classVersion__` stores
#' the version from the class. Slots values should exclusively be accessed
#' *via* the corresponding getter and setter methods listed above.
#'
#' @rdname groupChromPeaks-density
#'
#' @md
#'
#' @examples
#'
#' ## Create a PeakDensityParam object
#' p <- PeakDensityParam(binSize = 0.05, sampleGroups = c(1, 1, 2, 2))
#' ## Change hte minSamples slot
#' minSamples(p) <- 3
#' p
#'
#' ##############################
#' ## Chromatographic peak detection and grouping.
#' ##
#' ## Below we perform first a peak detection (using the matchedFilter
#' ## method) on some of the test files from the faahKO package followed by
#' ## a peak grouping using the density method.
#' library(faahKO)
#' library(MSnbase)
#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE,
#'            full.names = TRUE)
#'
#' ## Reading 2 of the KO samples
#' raw_data <- readMSData(fls[1:2], mode = "onDisk")
#'
#' ## Perform the chromatographic peak detection using the matchedFilter method.
#' mfp <- MatchedFilterParam(snthresh = 20, binSize = 1)
#' res <- findChromPeaks(raw_data, param = mfp)
#'
#' head(chromPeaks(res))
#' ## The number of peaks identified per sample:
#' table(chromPeaks(res)[, "sample"])
#'
#' ## Performing the chromatographic peak grouping. Assigning all samples to
#' ## the same sample group.
#' fdp <- PeakDensityParam(sampleGroups = rep(1, length(fileNames(res))))
#' res <- groupChromPeaks(res, fdp)
#'
#' ## The definition of the features (peak groups):
#' featureDefinitions(res)
#'
#' ## Using the featureValues method to extract a matrix with the
#' ## intensities of the features per sample.
#' head(featureValues(res, value = "into"))
#'
#' ## The process history:
#' processHistory(res)
setClass("PeakDensityParam",
         slots = c(sampleGroups = "ANY",
                   bw = "numeric",
                   minFraction = "numeric",
                   minSamples = "numeric",
                   binSize = "numeric",
                   maxFeatures = "numeric"),
         contains = "Param",
         prototype = prototype(
             sampleGroups = numeric(),
             bw = 30,
             minFraction = 0.5,
             minSamples = 1,
             binSize = 0.25,
             maxFeatures = 50),
         validity = function(object) {
             msg <- character()
             if (length(object@bw) > 1 | any(object@bw < 0))
                 msg <- c(msg, paste0("'bw' has to be a ",
                                      "positive numeric of length 1!"))
             if (length(object@minFraction) > 1 | any(object@minFraction < 0) |
                 any(object@minFraction > 1))
                 msg <- c(msg, paste0("'minFraction' has to be a ",
                                      "single positive number between ",
                                      "0 and 1!"))
             if (length(object@minSamples) > 1 | any(object@minSamples < 0))
                 msg <- c(msg, paste0("'minSamples' has to be a ",
                                      "positive numeric of length 1!"))
             if (length(object@binSize) > 1 | any(object@binSize < 0))
                 msg <- c(msg, paste0("'binSize' has to be a ",
                                      "positive numeric of length 1!"))
             if (length(object@maxFeatures) > 1 | any(object@maxFeatures < 0))
                 msg <- c(msg, paste0("'maxFeatures' has to be a ",
                                             "positive numeric of length 1!"))
             if (length(msg))
                 return(msg)
             else
                 return(TRUE)
         })

## Main group.mzClust documentation.
#' @title High resolution peak grouping for single spectra samples
#'
#' @description
#'
#' This method performs high resolution correspondence for single spectra
#' samples.
#'
#' @note These methods and classes are part of the updated and modernized
#'     `xcms` user interface which will eventually replace the
#'     [group()] methods. All of the settings to the algorithm
#'     can be passed with a [MzClustParam] object.
#'
#' @inheritParams groupChromPeaks-density
#'
#' @param ppm `numeric(1)` representing the relative mz error for the
#'     clustering/grouping (in parts per million).
#'
#' @param absMz `numeric(1)` representing the absolute mz error for the
#'     clustering.
#'
#' @family peak grouping methods
#'
#' @seealso
#'
#' The [do_groupPeaks_mzClust()] core API function and [group.mzClust()] for
#' the old user interface.
#'
#' [featureDefinitions()] and [featureValues()] for methods to access peak
#' grouping results (i.e. the features).
#'
#' @name groupChromPeaks-mzClust
#'
#' @md
#'
#' @references Saira A. Kazmi, Samiran Ghosh, Dong-Guk Shin, Dennis W. Hill
#' and David F. Grant\cr Alignment of high resolution mass spectra:
#' development of a heuristic approach for metabolomics.\cr Metabolomics,
#' Vol. 2, No. 2, 75-83 (2006)
NULL
#> NULL

#' @description
#'
#' The `MzClustParam` class allows to specify all settings for the peak
#' grouping based on the *mzClust* algorithm.
#' Instances should be created with the `MzClustParam` constructor.
#'
#' @slot .__classVersion__,sampleGroups,ppm,absMz,minFraction,minSamples See corresponding parameter above. `.__classVersion__` stores
#' the version from the class. Slots values should exclusively be accessed
#' *via* the corresponding getter and setter methods listed above.
#'
#' @md
#'
#' @rdname groupChromPeaks-mzClust
#'
#' @examples
#'
#' ## Loading a small subset of direct injection, single spectrum files
#' library(msdata)
#' fticrf <- list.files(system.file("fticr", package = "msdata"),
#'                     recursive = TRUE, full.names = TRUE)
#' fticr <- readMSData(fticrf[1:2], msLevel. = 1, mode = "onDisk")
#'
#' ## Perform the MSW peak detection on these:
#' p <- MSWParam(scales = c(1, 7), peakThr = 80000, ampTh = 0.005,
#'              SNR.method = "data.mean", winSize.noise = 500)
#' fticr <- findChromPeaks(fticr, param = p)
#'
#' head(chromPeaks(fticr))
#'
#' ## Now create the MzClustParam parameter object: we're assuming here that
#' ## both samples are from the same sample group.
#' p <- MzClustParam(sampleGroups = c(1, 1))
#'
#' fticr <- groupChromPeaks(fticr, param = p)
#'
#' ## Get the definition of the features.
#' featureDefinitions(fticr)
setClass("MzClustParam",
         slots = c(sampleGroups = "ANY",
                   ppm = "numeric",
                   absMz = "numeric",
                   minFraction = "numeric",
                   minSamples = "numeric"),
         contains = "Param",
         prototype = prototype(
             sampleGroups = numeric(),
             ppm = 20,
             absMz = 0,
             minFraction = 0.5,
             minSamples = 1),
         validity = function(object) {
             msg <- character()
             if (length(object@ppm) > 1 | any(object@ppm < 0))
                 msg <- c(msg, paste0("'ppm' has to be a ",
                                      "positive numeric of length 1!"))
             if (length(object@absMz) > 1 | any(object@absMz < 0))
                 msg <- c(msg, paste0("'absMz' has to be a ",
                                      "positive numeric of length 1!"))
             if (length(object@minFraction) > 1 | any(object@minFraction < 0) |
                 any(object@minFraction > 1))
                 msg <- c(msg, paste0("'minFraction' has to be a ",
                                      "single positive number between ",
                                      "0 and 1!"))
             if (length(object@minSamples) > 1 | any(object@minSamples < 0))
                 msg <- c(msg, paste0("'minSamples' has to be a ",
                                      "positive numeric of length 1!"))
             if (length(msg))
                 msg
             else
                 TRUE
         })

## Main group.nearest documentation.
#' @title Peak grouping based on proximity in the mz-rt space
#'
#' @description
#'
#' This method is inspired by the grouping algorithm of mzMine
#' (Katajamaa 2006) and performs correspondence based on proximity of peaks
#' in the space spanned by retention time and mz values.
#' The method creates first a *master peak list* consisting of all
#' chromatographic peaks from the sample in which most peaks were
#' identified, and starting from that, calculates distances to peaks from
#' the sample with the next most number of peaks. If peaks are closer than
#' the defined threshold they are grouped together.
#'
#' @note
#'
#' These methods and classes are part of the updated and modernized
#' `xcms` user interface. All of the settings to the algorithm
#' can be passed with a `NearestPeaksParam` object.
#'
#' @inheritParams groupChromPeaks-density
#'
#' @param mzVsRtBalance `numeric(1)` representing the factor by which mz
#'     values are multiplied before calculating the (euclician) distance between
#'     two peaks.
#'
#' @param absMz `numeric(1)` maximum tolerated distance for mz values.
#'
#' @param absRt `numeric(1)` maximum tolerated distance for rt values.
#'
#' @param kNN `numeric(1)` representing the number of nearest neighbors
#'     to check.
#'
#' @family peak grouping methods
#'
#' @seealso
#'
#' The [do_groupChromPeaks_nearest()] core API function.
#'
#' [featureDefinitions()] and [featureValues()] for methods to access
#' peak grouping results (i.e. the features).
#'
#' @name groupChromPeaks-nearest
#'
#' @md
#'
#' @references Katajamaa M, Miettinen J, Oresic M: MZmine: Toolbox for
#' processing and visualization of mass spectrometry based molecular profile
#' data. Bioinformatics 2006, 22:634-636.
NULL
#> NULL

#' @description The `NearestPeaksParam` class allows to specify all
#'     settings for the peak grouping based on the *nearest* algorithm.
#'     Instances should be created with the `NearestPeaksParam` constructor.
#'
#' @slot .__classVersion__,sampleGroups,mzVsRtBalance,absMz,absRt,kNN See corresponding parameter above. `.__classVersion__` stores
#' the version from the class. Slots values should exclusively be accessed
#' *via* the corresponding getter and setter methods listed above.
#'
#' @md
#'
#' @rdname groupChromPeaks-nearest
#'
#' @examples
#'
#' ## Create a NearestPeaksParam object
#' p <- NearestPeaksParam(kNN = 3)
#' p
#'
#' ##############################
#' ## Chromatographic peak detection and grouping.
#' ##
#' ## Below we perform first a chromatographic peak detection (using the
#' ## matchedFilter method) on some of the test files from the faahKO package
#' ## followed by a peaks grouping using the "nearest" method.
#' library(faahKO)
#' library(MSnbase)
#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE,
#'            full.names = TRUE)
#'
#' ## Reading 2 of the KO samples
#' raw_data <- readMSData(fls[1:2], mode = "onDisk")
#'
#' ## Perform the peak detection using the matchedFilter method.
#' mfp <- MatchedFilterParam(snthresh = 20, binSize = 1)
#' res <- findChromPeaks(raw_data, param = mfp)
#'
#' head(chromPeaks(res))
#' ## The number of peaks identified per sample:
#' table(chromPeaks(res)[, "sample"])
#'
#' ## Performing the peak grouping
#' p <- NearestPeaksParam()
#' res <- groupChromPeaks(res, param = p)
#'
#' ## The results from the peak grouping:
#' featureDefinitions(res)
#'
#' ## Using the featureValues method to extract a matrix with the intensities of
#' ## the features per sample.
#' head(featureValues(res, value = "into"))
#'
#' ## The process history:
#' processHistory(res)
setClass("NearestPeaksParam",
         slots = c(sampleGroups = "ANY",
                   mzVsRtBalance = "numeric",
                   absMz = "numeric",
                   absRt = "numeric",
                   kNN = "numeric"),
         contains = "Param",
         prototype = prototype(
             sampleGroups = numeric(),
             mzVsRtBalance = 10,
             absMz = 0.2,
             absRt = 15,
             kNN = 10),
         validity = function(object) {
             msg <- character()
             if (length(object@mzVsRtBalance) > 1 |
                 any(object@mzVsRtBalance < 0))
                 msg <- c(msg, paste0("'mzVsRtBalance' has to be a ",
                                      "positive numeric of length 1!"))
             if (length(object@absMz) > 1 | any(object@absMz < 0))
                 msg <- c(msg, paste0("'absMz' has to be a ",
                                      "positive numeric of length 1!"))
             if (length(object@absRt) > 1 | any(object@absRt < 0))
                 msg <- c(msg, paste0("'absRt' has to be a ",
                                      "positive numeric of length 1!"))
             if (length(object@kNN) > 1 | any(object@kNN < 0))
                 msg <- c(msg, paste0("'kNN' has to be a ",
                                      "positive numeric of length 1!"))
             if (length(msg))
                 msg
             else TRUE
         })



#' @title Alignment: Retention time correction methods.
#'
#' @description The \code{adjustRtime} method(s) perform retention time
#'     correction (alignment) between chromatograms of different samples. These
#'     methods are part of the modernized \code{xcms} user interface.
#'
#'     The implemented retention time adjustment methods are:
#'     \describe{
#'     \item{peakGroups}{retention time correction based on aligment of
#'     features (peak groups) present in most/all samples.
#'     See \code{\link{adjustRtime-peakGroups}} for more details.}
#'
#'     \item{obiwarp}{alignment based on the complete mz-rt data. This method
#'     does not require any identified peaks or defined features. See
#'     \code{\link{adjustRtime-obiwarp}} for more details.}
#'     }
#' @name adjustRtime
#'
#' @family retention time correction methods
#'
#' @seealso \code{\link{retcor}} for the \emph{old} retention time correction
#'     methods.
#'     \code{\link{plotAdjustedRtime}} for visualization of alignment results.
#'
#' @author Johannes Rainer
NULL
#> NULL

## Main retcor.peakgroups documentation.
#' @title Retention time correction based on alignment of house keeping peak
#' groups
#'
#' @description
#'
#' This method performs retention time adjustment based on the
#' alignment of chromatographic peak groups present in all/most samples
#' (hence corresponding to house keeping compounds). First the retention
#' time deviation of these peak groups is described by fitting either a
#' polynomial (\code{smooth = "loess"}) or a linear (
#' \code{smooth = "linear"}) model to the data points. These models are
#' subsequently used to adjust the retention time of each spectrum in
#' each sample.
#'
#' It is also possible to exclude certain samples within an experiment from
#' the estimation of the alignment models. The parameter \code{subset}
#' allows to define the indices of samples within \code{object} that should
#' be aligned. Samples not part of this \code{subset} are left out in the
#' estimation of the alignment models, but their retention times are
#' subsequently adjusted based on the alignment results of the closest sample
#' in \code{subset} (close in terms of position within the \code{object}).
#' Alignment could thus be performed on only \emph{real} samples leaving out
#' e.g. blanks, which are then in turn adjusted based on the closest real
#' sample. Here it is up to the user to ensure that the samples within
#' \code{object} are ordered correctly (e.g. by injection index).
#'
#' How the non-subset samples are adjusted bases also on the parameter
#' \code{subsetAdjust}: with \code{subsetAdjust = "previous"}, each non-subset
#' sample is adjusted based on the closest previous subset sample which results
#' in most cases with adjusted retention times of the non-subset sample being
#' identical to the subset sample on which the adjustment bases. The second,
#' default, option is to use \code{subsetAdjust = "average"} in which case
#' each non subset sample is adjusted based on the average retention time
#' adjustment from the previous and following subset sample. For the average
#' a weighted mean is used with weights being the inverse of the distance of
#' the non-subset sample to the subset samples used for alignment.
#'
#' See also section \emph{Alignment of experiments including blanks} in the
#' \emph{xcms} vignette for an example.
#'
#' @note
#'
#' These methods and classes are part of the updated and modernized
#' \code{xcms} user interface which will eventually replace the
#' \code{\link{group}} methods. All of the settings to the alignment
#' algorithm can be passed with a \code{PeakGroupsParam} object.
#'
#' The matrix with the (raw) retention times of the peak groups used
#' in the alignment is added to the \code{peakGroupsMatrix} slot of the
#' \code{PeakGroupsParam} object that is stored into the corresponding
#' \emph{process history step} (see \code{\link{processHistory}} for how
#' to access the process history).
#'
#' @param minFraction \code{numeric(1)} between 0 and 1 defining the minimum
#'     required fraction of samples in which peaks for the peak group were
#'     identified. Peak groups passing this criteria will aligned across
#'     samples and retention times of individual spectra will be adjusted
#'     based on this alignment. For \code{minFraction = 1} the peak group
#'     has to contain peaks in all samples of the experiment. Note that if
#'     \code{subset} is provided, the specified fraction is relative to the
#'     defined subset of samples and not to the total number of samples within
#'     the experiment (i.e. a peak has to be present in the specified
#'     proportion of subset samples).
#'
#' @param extraPeaks \code{numeric(1)} defining the maximal number of
#'     additional peaks for all samples to be assigned to a peak group (i.e.
#'     feature) for retention time correction. For a data set with 6 samples,
#'     \code{extraPeaks = 1} uses all peak groups with a total peak count
#'     \code{<= 6 + 1}. The total peak count is the total number of peaks being
#'     assigned to a peak group and considers also multiple peaks within a
#'     sample being assigned to the group.
#'
#' @param smooth character defining the function to be used, to interpolate
#'     corrected retention times for all peak groups. Either \code{"loess"} or
#'     \code{"linear"}.
#'
#' @param span \code{numeric(1)} defining the degree of smoothing (if
#'     \code{smooth = "loess"}). This parameter is passed to the internal call
#'     to \code{\link{loess}}.
#'
#' @param family character defining the method to be used for loess smoothing.
#'     Allowed values are \code{"gaussian"} and \code{"symmetric"}.See
#'     \code{\link{loess}} for more information.
#'
#' @param peakGroupsMatrix optional \code{matrix} of (raw) retention times for
#'     the peak groups on which the alignment should be performed. Each column
#'     represents a sample, each row a feature/peak group. Such a matrix is
#'     for example returned by the \code{\link{adjustRtimePeakGroups}} method.
#'
#' @param subset \code{integer} with the indices of samples within the
#'     experiment on which the alignment models should be estimated. Samples
#'     not part of the subset are adjusted based on the closest subset sample.
#'     See description above for more details.
#'
#' @param subsetAdjust \code{character} specifying the method with which
#'     non-subset samples should be adjusted. Supported options are
#'     \code{"previous"} and \code{"average"} (default). See description above
#'     for more information.
#'
#' @family retention time correction methods
#'
#' @seealso The \code{\link{do_adjustRtime_peakGroups}} core
#'     API function and \code{\link{retcor.peakgroups}} for the old user
#'     interface.
#'     \code{\link{plotAdjustedRtime}} for visualization of alignment results.
#'
#' @name adjustRtime-peakGroups
#'
#' @author Colin Smith, Johannes Rainer
#'
#' @references
#' Colin A. Smith, Elizabeth J. Want, Grace O'Maille, Ruben Abagyan and
#' Gary Siuzdak. "XCMS: Processing Mass Spectrometry Data for Metabolite
#' Profiling Using Nonlinear Peak Alignment, Matching, and Identification"
#' \emph{Anal. Chem.} 2006, 78:779-787.
NULL
#> NULL

#' @description The \code{PeakGroupsParam} class allows to specify all
#'     settings for the retention time adjustment based on \emph{house keeping}
#'     peak groups present in most samples.
#'     Instances should be created with the \code{PeakGroupsParam} constructor.
#'
#' @slot .__classVersion__,minFraction,extraPeaks,smooth,span,family,peakGroupsMatrix,subset,subsetAdjust See corresponding parameter above. \code{.__classVersion__} stores
#' the version from the class. Slots values should exclusively be accessed
#' \emph{via} the corresponding getter and setter methods listed above.
#'
#' @rdname adjustRtime-peakGroups
#'
#' @examples
#' ##############################
#' ## Chromatographic peak detection and grouping.
#' ##
#' ## Below we perform first a peak detection (using the matchedFilter
#' ## method) on some of the test files from the faahKO package followed by
#' ## a peak grouping.
#' library(faahKO)
#' library(xcms)
#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE,
#'            full.names = TRUE)
#'
#' ## Reading 2 of the KO samples
#' raw_data <- readMSData(fls[1:2], mode = "onDisk")
#'
#' ## Perform the peak detection using the matchedFilter method.
#' mfp <- MatchedFilterParam(snthresh = 20, binSize = 1)
#' res <- findChromPeaks(raw_data, param = mfp)
#'
#' head(chromPeaks(res))
#' ## The number of peaks identified per sample:
#' table(chromPeaks(res)[, "sample"])
#'
#' ## Performing the peak grouping using the "peak density" method.
#' p <- PeakDensityParam(sampleGroups = c(1, 1))
#' res <- groupChromPeaks(res, param = p)
#'
#' ## Perform the retention time adjustment using peak groups found in both
#' ## files.
#' fgp <- PeakGroupsParam(minFraction = 1)
#'
#' ## Before running the alignment we can evaluate which features (peak groups)
#' ## would be used based on the specified parameters.
#' pkGrps <- adjustRtimePeakGroups(res, param = fgp)
#'
#' ## We can also plot these to evaluate if the peak groups span a large portion
#' ## of the retention time range.
#' plot(x = pkGrps[, 1], y = rep(1, nrow(pkGrps)), xlim = range(rtime(res)),
#'     ylim = c(1, 2), xlab = "rt", ylab = "", yaxt = "n")
#' points(x = pkGrps[, 2], y = rep(2, nrow(pkGrps)))
#' segments(x0 = pkGrps[, 1], x1 = pkGrps[, 2],
#'     y0 = rep(1, nrow(pkGrps)), y1 = rep(2, nrow(pkGrps)))
#' grid()
#' axis(side = 2, at = c(1, 2), labels = colnames(pkGrps))
#'
#' ## Next we perform the alignment.
#' res <- adjustRtime(res, param = fgp)
#'
#' ## Any grouping information was dropped
#' hasFeatures(res)
#'
#' ## Plot the raw against the adjusted retention times.
#' plot(rtime(raw_data), rtime(res), pch = 16, cex = 0.25, col = fromFile(res))
#'
#' ## Adjusterd retention times can be accessed using
#' ## rtime(object, adjusted = TRUE) and adjustedRtime
#' all.equal(rtime(res), adjustedRtime(res))
#'
#' ## To get the raw, unadjusted retention times:
#' all.equal(rtime(res, adjusted = FALSE), rtime(raw_data))
#'
#' ## To extract the retention times grouped by sample/file:
#' rts <- rtime(res, bySample = TRUE)
setClass("PeakGroupsParam",
         slots = c(minFraction = "numeric",
                   extraPeaks = "numeric",
                   smooth = "character",
                   span = "numeric",
                   family = "character",
                   peakGroupsMatrix = "matrix",
                   subset = "integer",
                   subsetAdjust = "character"),
         contains = "Param",
         prototype = prototype(
             minFraction = 0.9,
             extraPeaks = 1,
             smooth = "loess",
             span = 0.2,
             family = "gaussian",
             peakGroupsMatrix = matrix(ncol = 0, nrow = 0),
             subset = integer(),
             subsetAdjust = "average"
         ),
         validity = function(object) {
             msg <- character()
             if (length(object@minFraction) > 1 |
                 any(object@minFraction < 0) |
                 any(object@minFraction > 1))
                 msg <- c(msg, paste0("'minFraction' has to be a single",
                                      " number between 0 and 1!"))
             if (length(object@extraPeaks) > 1 |
                 any(object@extraPeaks < 0))
                 msg <- c(msg, paste0("'extraPeaks' has to be a ",
                                      "positive numeric of length 1!"))
             if (length(object@span) > 1 | any(object@span < 0))
                 msg <- c(msg, paste0("'span' has to be a ",
                                      "positive numeric of length 1!"))
             if (length(object@smooth) > 1 |
                 !all(object@smooth %in% c("loess", "linear")))
                 msg <- c(msg, paste0("'smooth' has to be either \"",
                                      "loess\" or \"linear\"!"))
             if (length(object@family) > 1 |
                 !all(object@family %in% c("gaussian", "symmetric")))
                 msg <- c(msg, paste0("'family' has to be either \"",
                                      "gaussian\" or \"symmetric\"!"))
             if (length(msg))
                 msg
             else TRUE
         })

#' @title Align retention times across samples using Obiwarp
#'
#' @description
#'
#' This method performs retention time adjustment using the
#' Obiwarp method [Prince 2006]. It is based on the code at
#' \url{http://obi-warp.sourceforge.net} but supports alignment of multiple
#' samples by aligning each against a \emph{center} sample. The alignment is
#' performed directly on the \code{\link{profile-matrix}} and can hence be
#' performed independently of the peak detection or peak grouping.
#'
#' It is also possible to exclude certain samples within an experiment from
#' the estimation of the alignment models. The parameter \code{subset}
#' allows to define the indices of samples within \code{object} that should
#' be aligned. Samples not part of this \code{subset} are left out in the
#' estimation of the alignment models, but their retention times are
#' subsequently adjusted based on the alignment results of the closest sample
#' in \code{subset} (close in terms of position within the \code{object}).
#' Alignment could thus be performed on only \emph{real} samples leaving out
#' e.g. blanks, which are then in turn adjusted based on the closest real
#' sample. Here it is up to the user to ensure that the samples within
#' \code{object} are ordered correctly (e.g. by injection index).
#'
#' How the non-subset samples are adjusted bases also on the parameter
#' \code{subsetAdjust}: with \code{subsetAdjust = "previous"}, each non-subset
#' sample is adjusted based on the closest previous subset sample which results
#' in most cases with adjusted retention times of the non-subset sample being
#' identical to the subset sample on which the adjustment bases. The second,
#' default, option is to use \code{subsetAdjust = "average"} in which case
#' each non subset sample is adjusted based on the average retention time
#' adjustment from the previous and following subset sample. For the average
#' a weighted mean is used with weights being the inverse of the distance of
#' the non-subset sample to the subset samples used for alignment.
#'
#' See also section \emph{Alignment of experiments including blanks} in the
#' \emph{xcms} vignette for an example.
#'
#' @note
#'
#' These methods and classes are part of the updated and modernized
#' \code{xcms} user interface which will eventually replace the
#' \code{\link{retcor}} methods. All of the settings to the alignment
#' algorithm can be passed with a \code{ObiwarpParam} object.
#'
#' @param binSize \code{numeric(1)} defining the bin size (in mz dimension)
#'     to be used for the \emph{profile matrix} generation. See \code{step}
#'     parameter in \code{\link{profile-matrix}} documentation for more details.
#'
#' @param centerSample \code{integer(1)} defining the index of the center sample
#'     in the experiment. It defaults to
#'     \code{floor(median(1:length(fileNames(object))))}. Note that if
#'     \code{subset} is used, the index passed with \code{centerSample} is
#'     within these subset samples.
#'
#' @param response \code{numeric(1)} defining the \emph{responsiveness} of
#'     warping with \code{response = 0} giving linear warping on start and end
#'     points and \code{response = 100} warping using all bijective anchors.
#'
#' @param distFun character defining the distance function to be used. Allowed
#'     values are \code{"cor"} (Pearson's correlation), \code{"cor_opt"}
#'     (calculate only 10\% diagonal band of distance matrix; better runtime),
#'     \code{"cov"} (covariance), \code{"prd"} (product) and \code{"euc"}
#'     (Euclidian distance). The default value is \code{distFun = "cor_opt"}.
#'
#' @param gapInit \code{numeric(1)} defining the penalty for gap opening. The
#'     default value for \code{gapInit} depends on the value of \code{distFun}:
#'     for \code{distFun = "cor"} and \code{distFun = "cor_opt"} it is
#'     \code{0.3}, for \code{distFun = "cov"} and \code{distFun = "prd"}
#'     \code{0.0} and for \code{distFun = "euc"} \code{0.9}.
#'
#' @param gapExtend \code{numeric(1)} defining the penalty for gap enlargement.
#'     The default value for \code{gapExtend} depends on the value of
#'     \code{distFun}, for \code{distFun = "cor"} and
#'     \code{distFun = "cor_opt"} it is \code{2.4}, for \code{distFun = "cov"}
#'     \code{11.7}, for \code{distFun = "euc"} \code{1.8} and for
#'     \code{distFun = "prd"} {7.8}.
#'
#' @param factorDiag \code{numeric(1)} defining the local weight applied to
#'     diagonal moves in the alignment.
#'
#' @param factorGap \code{numeric(1)} defining the local weight for gap moves
#'     in the alignment.
#'
#' @param localAlignment \code{logical(1)} whether a local alignment should be
#'     performed instead of the default global alignment.
#'
#' @param initPenalty \code{numeric(1)} defining the penalty for initiating an
#'     alignment (for local alignment only).
#'
#' @inheritParams adjustRtime-peakGroups
#'
#' @family retention time correction methods
#'
#' @seealso \code{\link{retcor.obiwarp}} for the old user interface.
#'     \code{\link{plotAdjustedRtime}} for visualization of alignment results.
#'
#' @name adjustRtime-obiwarp
#'
#' @author Colin Smith, Johannes Rainer
#'
#' @references
#' John T. Prince and Edward M. Marcotte. "Chromatographic Alignment of
#' ESI-LC-MS Proteomics Data Sets by Ordered Bijective Interpolated Warping"
#' \emph{Anal. Chem.} 2006, 78(17):6140-6152.

NULL
#> NULL

#' @description The \code{ObiwarpParam} class allows to specify all
#'     settings for the retention time adjustment based on the \emph{obiwarp}
#'     method. Class Instances should be created using the
#'     \code{ObiwarpParam} constructor.
#'
#' @slot .__classVersion__,binSize,centerSample,response,distFun,gapInit,gapExtend,factorDiag,factorGap,localAlignment,initPenalty,subset,subsetAdjust See corresponding parameter above. \code{.__classVersion__} stores
#' the version from the class. Slots values should exclusively be accessed
#' \emph{via} the corresponding getter and setter methods listed above.
#'
#' @rdname adjustRtime-obiwarp
#'
#' @examples
#' library(faahKO)
#' library(MSnbase)
#' fls <- dir(system.file("cdf/KO", package = "faahKO"), recursive = TRUE,
#'            full.names = TRUE)
#'
#' ## Reading 2 of the KO samples
#' raw_data <- readMSData(fls[1:2], mode = "onDisk")
#'
#' ## Perform retention time correction on the OnDiskMSnExp:
#' res <- adjustRtime(raw_data, param = ObiwarpParam())
#'
#' ## As a result we get a numeric vector with the adjusted retention times for
#' ## all spectra.
#' head(res)
#'
#' ## We can split this by file to get the adjusted retention times for each
#' ## file
#' resL <- split(res, fromFile(raw_data))
#'
#' ##############################
#' ## Perform retention time correction on an XCMSnExp:
#' ##
#' ## Perform first the chromatographic peak detection using the matchedFilter
#' ## method.
#' mfp <- MatchedFilterParam(snthresh = 20, binSize = 1)
#' res <- findChromPeaks(raw_data, param = mfp)
#'
#' ## Performing the retention time adjustment using obiwarp.
#' res_2 <- adjustRtime(res, param = ObiwarpParam())
#'
#' head(rtime(res_2))
#' head(rtime(raw_data))
#'
#' ## Also the retention times of the detected peaks were adjusted.
#' tail(chromPeaks(res))
#' tail(chromPeaks(res_2))
setClass("ObiwarpParam",
         slots = c(binSize = "numeric",
                   centerSample = "integer",
                   response = "integer",
                   distFun = "character",
                   gapInit = "numeric",
                   gapExtend = "numeric",
                   factorDiag = "numeric",
                   factorGap = "numeric",
                   localAlignment = "logical",
                   initPenalty = "numeric",
                   subset = "integer",
                   subsetAdjust = "character"),
         contains = "Param",
         prototype = prototype(
             binSize = 1,
             centerSample = integer(),
             response = 1L,
             distFun = "cor_opt",
             gapInit = numeric(),
             gapExtend = numeric(),
             factorDiag = 2,
             factorGap = 1,
             localAlignment = FALSE,
             initPenalty = 0,
             subset = integer(),
             subsetAdjust = "average"),
         validity = function(object) {
             msg <- character()
             if (length(object@binSize) > 1 |
                 any(object@binSize < 0))
                 msg <- c(msg, paste0("'binSize' has to be a positive",
                                      " numeric of length 1!"))
             if (length(object@centerSample) > 1 |
                 any(object@centerSample < 0))
                 msg <- c(msg, paste0("'centerSample' has to be a positive",
                                      " numeric of length 1!"))
             if (length(object@response) > 1 |
                 any(object@response < 0) |
                 any(object@response > 100))
                 msg <- c(msg, paste0("'response' has to be a single ",
                                      " integer from 1 to 100!"))
             if (length(object@distFun) > 1 |
                 any(!(object@distFun %in% c("cor", "cor_opt", "cov", "euc",
                                             "prd"))))
                 msg <- c(msg, paste0("'distFun' has to be one of \"cor\"",
                                      ", \"cor_opt\", \"cov\", \"euc\"",
                                      " or \"prd\"!"))
             if (length(object@gapInit) > 1 | any(object@gapInit < 0))
                 msg <- c(msg, paste0("'gapInit' has to be a positive",
                                      " numeric of length 1!"))
             if (length(object@gapExtend) > 1 | any(object@gapExtend < 0))
                 msg <- c(msg, paste0("'gapExtend' has to be a positive",
                                      " numeric of length 1!"))
             if (length(object@factorDiag) > 1 | any(object@factorDiag < 0))
                 msg <- c(msg, paste0("'factorDiag' has to be a positive",
                                      " numeric of length 1!"))
             if (length(object@factorGap) > 1 | any(object@factorGap < 0))
                 msg <- c(msg, paste0("'factorGap' has to be a positive",
                                      " numeric of length 1!"))
             if (length(object@localAlignment) > 1)
                 msg <- c(msg, paste0("'localAlignment' has to be a ",
                                      "logical of length 1!"))
             if (length(object@initPenalty) > 1 | any(object@initPenalty < 0))
                 msg <- c(msg, paste0("'initPenalty' has to be a positive",
                                      " numeric of length 1!"))
             if (length(msg))
                 msg
             else TRUE
         })

#' @description The \code{FillChromPeaksParam} object encapsules all settings for
#' the signal integration for missing peaks.
#'
#' @slot .__classVersion__,expandMz,expandRt,ppm,fixedMz,fixedRt See corresponding parameter above. \code{.__classVersion__} stores the version of the class.
#'
#' @rdname fillChromPeaks
setClass("FillChromPeaksParam",
         slots = c(expandMz = "numeric",
                   expandRt = "numeric",
                   ppm = "numeric",
                   fixedMz = "numeric",
                   fixedRt = "numeric"),
         contains = "Param",
         prototype = prototype(
             expandMz = 0,
             expandRt = 0,
             ppm = 0,
             fixedMz = 0,
             fixedRt = 0
         ),
         validity = function(object) {
             msg <- character()
             if (length(object@expandMz) > 1 | any(object@expandMz < -1))
                 msg <- c(msg, "'expandMz' has to be > -1 and of length 1")
             if (length(object@expandRt) > 1 | any(object@expandRt < -1))
                 msg <- c(msg, "'expandRt' has to be > -1 and of length 1")
             if (length(object@ppm) > 1 | any(object@ppm < 0))
                 msg <- c(msg, paste0("'ppm' has to be a positive",
                                      " numeric of length 1!"))
             if (length(object@fixedMz) > 1)
                 msg <- c(msg, "'fixedMz' has to be a numeric of length 1")
             if (length(object@fixedRt) > 1)
                 msg <- c(msg, "'fixedRt' has to be a numeric of length 1")
             if (length(msg))
                 msg
             else TRUE
         }
         )

#' @aliases MsFeatureData
#'
#' @title Data container storing xcms preprocessing results
#'
#' @description The \code{MsFeatureData} class is designed to encapsule all
#'     data related to the preprocessing of metabolomics data using the
#'     \code{xcms} package, i.e. it contains a \code{matrix} with the
#'     chromatographic peaks identified by the peak detection, a
#'     \code{DataFrame} with the definition on grouped chromatographic peaks
#'     across samples and a \code{list} with the adjusted retention times per
#'     sample.
#'
#' @noRd
#'
#' @rdname XCMSnExp-class
setClass("MsFeatureData", contains = c("environment", "Versioned"),
         prototype = prototype(.xData = new.env(parent = emptyenv())))

.REQ_PEAKS_COLS <- c("mz", "mzmin", "mzmax", "rt", "rtmin",
                     "rtmax", "into", "sample")
.REQ_PEAKG_COLS <- c("mzmed", "mzmin", "mzmax", "rtmed", "rtmin", "rtmax",
                     "peakidx")

#' @aliases XCMSnExp
#'
#' @title Data container storing xcms preprocessing results
#'
#' @description
#'
#' The \code{XCMSnExp} object is a container for the results of a G/LC-MS
#' data preprocessing that comprises chromatographic peak detection, alignment
#' and correspondence. These results can be accessed with the \code{chromPeaks},
#' \code{adjustedRtime} and \code{featureDefinitions} functions; see below
#' (after the Usage, Arguments, Value and Slots sections) for more details).
#' Along with the results, the object contains the processing history that
#' allows to track each processing step along with the used settings. This
#' can be extracted with the \code{\link{processHistory}} method.
#' \code{XCMSnExp} objects, by directly extending the
#' \code{\link{OnDiskMSnExp}} object from the \code{MSnbase} package, inherit
#' all of its functionality and allows thus an easy access to the full raw
#' data at any stage of an analysis.
#' To support interaction with packages requiring the \emph{old} objects,
#' \code{XCMSnExp} objects can be coerced into \code{\linkS4class{xcmsSet}}
#' objects using the \code{as} method (see examples below). All
#' preprocessing results will be passed along to the resulting
#' \code{xcmsSet} object.
#'
#' General functions for \code{XCMSnExp} objects are:
#'
#' @section Chromatographic peak data:
#'
#' Chromatographic peak data is added to an \code{XCMSnExp} object by the
#' \code{\link{findChromPeaks}} function. Functions to access chromatographic
#' peak data are:
#'
#' \itemize{
#' \item \code{hasChromPeaks} whether chromatographic peak data is available,
#' see below for help of the function.
#'
#' \item \code{chromPeaks} access chromatographic peaks (see below for help).
#'
#' \item \code{dropChromPeaks} remove chromatographic peaks (see below for
#' help).
#'
#' \item \code{dropFilledChromPeaks} remove filled-in peaks (see below for
#' help).
#'
#' \item \code{\link{fillChromPeaks}} fill-in missing peaks (see respective
#' help page).
#'
#' \item \code{\link{plotChromPeaks}} plot identified peaks for a file (see
#' respective help page).
#'
#' \item \code{\link{plotChromPeakImage}} plot distribution of peaks along the
#' retention time axis (see respective help page).
#'
#' \item \code{\link{highlightChromPeaks}} add chromatographic peaks to an
#' existing plot of a \code{\link{Chromatogram}} (see respective help page).
#'
#' }
#'
#'
#' @section Adjusted retention times:
#'
#' Adjusted retention times are stored in an \code{XCMSnExp} object besides the
#' original, raw, retention times, allowing to switch between raw and adjusted
#' times. It is also possible to replace the raw retention times with the
#' adjusted ones with the \code{\link{applyAdjustedRtime}}. The adjusted
#' retention times are added to an \code{XCMSnExp} by the
#' \code{\link{adjustRtime}} function. All functions related to the access of
#'  adjusted retention times are:
#'
#' \itemize{
#'
#' \item \code{hasAdjustedRtime} whether adjusted retention times are available
#' (see below for help).
#'
#' \item \code{dropAdjustedRtime} remove adjusted retention times (see below
#' for help).
#'
#' \item \code{\link{applyAdjustedRtime}} replace the raw retention times with
#' the adjusted ones (see respective help page).
#'
#' \item \code{\link{plotAdjustedRtime}} plot differences between adjusted and
#' raw retention times (see respective help page).
#'
#' }
#'
#'
#' @section Correspondence results, features:
#'
#' The correspondence analysis (\code{\link{groupChromPeaks}}) adds the feature
#' definitions to an \code{XCMSnExp} object. All functions related to these are
#' listed below:
#'
#' \itemize{
#'
#' \item \code{hasFeatures} whether correspondence results are available (see
#' below for help).
#'
#' \item \code{featureDefinitions} access the definitions of the features (see
#' below for help).
#'
#' \item \code{dropFeatureDefinitions} remove correspondence results (see below
#' for help).
#'
#' \item \code{\link{featureValues}} access values for features (see respective
#' help page).
#'
#' \item \code{\link{featureSummary}} perform a simple summary of the defined
#' features (see respective help page).
#'
#' \item \code{link{overlappingFeatures}} identify features that are
#' overlapping or close in the m/z - rt space (see respective help page).
#'
#' }
#'
#' @note The \code{"chromPeaks"} element in the \code{msFeatureData} slot is
#'     equivalent to the \code{@peaks} slot of the \code{xcmsSet} object, the
#'     \code{"featureDefinitions"} contains information from the \code{@groups}
#'     and \code{@groupidx} slots from an \code{xcmsSet} object.
#'
#' @slot .processHistory \code{list} with \code{XProcessHistory} objects
#'     tracking all individual analysis steps that have been performed.
#'
#' @slot msFeatureData \code{MsFeatureData} class extending \code{environment}
#'     and containing the results from a chromatographic peak detection (element
#'     \code{"chromPeaks"}), peak grouping (element \code{"featureDefinitions"})
#'     and retention time correction (element \code{"adjustedRtime"}) steps.
#'     This object should not be manipulated directly.
#'
#' @param object For \code{adjustedRtime}, \code{featureDefinitions},
#'     \code{chromPeaks}, \code{hasAdjustedRtime}, \code{hasFeatures} and
#'     \code{hasChromPeaks} either a \code{MsFeatureData} or a \code{XCMSnExp}
#'     object, for all other methods a \code{XCMSnExp} object.
#'
#' @param value For \code{adjustedRtime<-}: a \code{list} (length equal to the
#'     number of samples) with numeric vectors representing the adjusted
#'     retention times per scan.
#'
#'     For \code{featureDefinitions<-}: a \code{DataFrame} with peak
#'     grouping information. See return value for the \code{featureDefinitions}
#'     method for the expected format.
#'
#'     For \code{chromPeaks<-}: a \code{matrix} with information on
#'     detected peaks. See return value for the \code{chromPeaks} method for the
#'     expected format.
#'
#' @author Johannes Rainer
#'
#' @seealso \code{\linkS4class{xcmsSet}} for the old implementation.
#'     \code{\link{OnDiskMSnExp}}, \code{\link{MSnExp}}
#'     and \code{\link{pSet}} for a complete list of inherited methods.
#'
#'     \code{\link{findChromPeaks}} for available peak detection methods
#'     returning a \code{XCMSnExp} object as a result.
#'
#'     \code{\link{groupChromPeaks}} for available peak grouping
#'     methods and \code{\link{featureDefinitions}} for the method to extract
#'     the feature definitions representing the peak grouping results.
#'     \code{\link{adjustRtime}} for retention time adjustment methods.
#'
#'     \code{\link{chromatogram}} to extract MS data as
#'     \code{\link{Chromatogram}} objects.
#'
#'     \code{\link{as}} (\code{as(x, "data.frame")}) in the \code{MSnbase}
#'     package for the method to extract MS data as \code{data.frame}s.
#'
#'     \code{\link{featureSummary}} to calculate basic feature summaries.
#'
#'     \code{\link{featureChromatograms}} to extract chromatograms for each
#'     feature.
#'
#'     \code{\link{chromPeakSpectra}} to extract MS2 spectra with the m/z of
#'     the precursor ion within the m/z range of a peak and a retention time
#'     within its retention time range.
#'
#'     \code{\link{featureSpectra}} to extract MS2 spectra associated with
#'     identified features.
#'
#' @rdname XCMSnExp-class
#'
#' @examples
#'
#' ## Loading the data from 2 files of the faahKO package.
#' library(faahKO)
#' od <- readMSData(c(system.file("cdf/KO/ko15.CDF", package = "faahKO"),
#'                    system.file("cdf/KO/ko16.CDF", package = "faahKO")),
#'                  mode = "onDisk")
#' ## Now we perform a chromatographic peak detection on this data set using the
#' ## matched filter method. We are tuning the settings such that it performs
#' ## faster.
#' mfp <- MatchedFilterParam(binSize = 6)
#' xod <- findChromPeaks(od, param = mfp)
#'
#' ## The results from the peak detection are now stored in the XCMSnExp
#' ## object
#' xod
#'
#' ## The detected peaks can be accessed with the chromPeaks method.
#' head(chromPeaks(xod))
#'
#' ## The settings of the chromatographic peak detection can be accessed with
#' ## the processHistory method
#' processHistory(xod)
#'
#' ## Also the parameter class for the peak detection can be accessed
#' processParam(processHistory(xod)[[1]])
#'
#' ## The XCMSnExp inherits all methods from the pSet and OnDiskMSnExp classes
#' ## defined in Bioconductor's MSnbase package. To access the (raw) retention
#' ## time for each spectrum we can use the rtime method. Setting bySample = TRUE
#' ## would cause the retention times to be grouped by sample
#' head(rtime(xod))
#'
#' ## Similarly it is possible to extract the mz values or the intensity values
#' ## using the mz and intensity method, respectively, also with the option to
#' ## return the results grouped by sample instead of the default, which is
#' ## grouped by spectrum. Finally, to extract all of the data we can use the
#' ## spectra method which returns Spectrum objects containing all raw data.
#' ## Note that all these methods read the information from the original input
#' ## files and subsequently apply eventual data processing steps to them.
#' mzs <- mz(xod, bySample = TRUE)
#' length(mzs)
#' lengths(mzs)
#'
#' ## The full data could also be read using the spectra data, which returns
#' ## a list of Spectrum object containing the mz, intensity and rt values.
#' ## spctr <- spectra(xod)
#' ## To get all spectra of the first file we can split them by file
#' ## head(split(spctr, fromFile(xod))[[1]])
#'
#' ############
#' ## Filtering
#' ##
#' ## XCMSnExp objects can be filtered by file, retention time, mz values or
#' ## MS level. For some of these filter preprocessing results (mostly
#' ## retention time correction and peak grouping results) will be dropped.
#' ## Below we filter the XCMSnExp object by file to extract the results for
#' ## only the second file.
#' xod_2 <- filterFile(xod, file = 2)
#' xod_2
#'
#' ## Now the objects contains only the idenfified peaks for the second file
#' head(chromPeaks(xod_2))
#'
#' head(chromPeaks(xod)[chromPeaks(xod)[, "sample"] == 2, ])
#'
#' ##########
#' ## Coercing to an xcmsSet object
#' ##
#' ## We can also coerce the XCMSnExp object into an xcmsSet object:
#' xs <- as(xod, "xcmsSet")
#' head(peaks(xs))
setClass("XCMSnExp",
         slots = c(
             .processHistory = "list",
             msFeatureData = "MsFeatureData"
         ),
         prototype = prototype(
             .processHistory = list(),
             msFeatureData = new("MsFeatureData")
         ),
         contains = c("OnDiskMSnExp"),
         validity = function(object) {
             msg <- character()
             if (length(object@.processHistory) > 0) {
                 isOK <- unlist(lapply(object@.processHistory, function(z) {
                     return(inherits(z, "ProcessHistory"))
                 }))
                 if (!all(isOK))
                     msg <- c(msg, paste0("Only 'ProcessHistory' ",
                                          "objects are allowed in slot ",
                                          ".processHistory!"))
             }
             ## TODO @jo add checks:
             ## 1) call validMsFeatureData
             msg <- c(msg, validateMsFeatureData(object@msFeatureData))
             if (length(msg)) return(msg)
             ## 2) peaks[, "sample"] is within 1:number of samples
             if (any(ls(object@msFeatureData) == "chromPeaks")) {
                 if (!all(object@msFeatureData$chromPeaks[, "sample"] %in%
                          1:length(fileNames(object))))
                     msg <- c(msg, paste0("The number of available ",
                                          "samples does not match with ",
                                          "the sample assignment of ",
                                          "peaks in the 'chromPeaks' ",
                                          "element of the msFeatureData ",
                                          "slot!"))
             }
             ## 3) Check that the length of the adjustedRtime matches!
             if (any(ls(object@msFeatureData) == "adjustedRtime")) {
                 rt <- rtime(object, bySample = TRUE, adjusted = FALSE)
                 if (length(rt) != length(object@msFeatureData$adjustedRtime)) {
                     msg <- c(msg, paste0("The number of numeric vectors",
                                          " in the 'adjustedRtime' element",
                                          " of the msFeatureData slot does",
                                          " not match the number of",
                                          " samples!"))
                 } else {
                     if (any(lengths(rt) !=
                             lengths(object@msFeatureData$adjustedRtime)))
                         msg <- c(msg,
                                  paste0("The lengths of the numeric ",
                                         "vectors in the 'adjustedRtime'",
                                         " element of the msFeatureData ",
                                         "slot does not match the number",
                                         " of scans per sample!"))
                 }
             }
             ## 3) If we've got peaks, check that we have also a related
             ##    processing history step.
             if (length(msg))
                 msg
             else TRUE
         }
)

.CHROMPEAKS_REQ_NAMES <- c("rt", "rtmin", "rtmax", "into", "maxo", "sn")
setClass("XChromatogram",
         slots = c(chromPeaks = "matrix"),
         prototype = prototype(
             chromPeaks = matrix(nrow = 0, ncol = length(.CHROMPEAKS_REQ_NAMES),
                                 dimnames = list(character(),
                                                 .CHROMPEAKS_REQ_NAMES))),
         contains = "Chromatogram",
         validity = .validXChromatogram)

setClass("XChromatograms",
         slots = c(.processHistory = "list",
                   featureDefinitions = "DataFrame"),
         prototype = prototype(.processHistory = list(),
                               featureDefinitions = DataFrame()),
         contains = "Chromatograms",
         validity = .validXChromatograms)

#' @aliases mz,CalibrantMassParam
#'
#' @title Calibrant mass based calibration of chromatgraphic peaks
#'
#' @description Calibrate peaks using mz values of known masses/calibrants.
#'     mz values of identified peaks are adjusted based on peaks that are close
#'     to the provided mz values. See details below for more information.
#'
#' @param mz a `numeric` or `list` of `numeric` vectors with reference mz
#'     values. If a `numeric` vector is provided, this is used for each sample
#'     in the `XCMSnExp` object. If a `list` is provided, it's length has to be
#'     equal to the number of samples in the experiment.
#'
#' @param mzabs `numeric(1)` the absolute error/deviation for matching peaks to
#'     calibrants (in Da).
#'
#' @param mzppm `numeric(1)` the relative error for matching peaks to calibrants
#'     in ppm (parts per million).
#'
#' @param neighbors `integer(1)` with the maximal number of peaks within the
#'     permitted distance to the calibrants that are considered. Among these the
#'     mz value of the peak with the largest intensity is used in the
#'     calibration function estimation.
#'
#' @param method `character(1)` defining the method that should be used to
#'     estimate the calibration function. Can be `"shift"`, `"linear"` (default)
#'     or `"edgeshift"`.
#'
#' @details The method does first identify peaks that are close to the provided
#'     mz values and, given that there difference to the calibrants is smaller
#'     than the user provided cut off (based on arguments `mzabs` and `mzppm`),
#'     their mz values are replaced with the provided mz values. The mz values
#'     of all other peaks are either globally shifted (for `method = "shift"`
#'     or estimated by a linear model through all calibrants.
#'     Peaks are considered close to a calibrant mz if the difference between
#'     the calibrant and its mz is `<= mzabs + mz * mzppm /1e6`.
#'
#' **Adjustment methods**: adjustment function/factor is estimated using
#' the difference between calibrant and peak mz values only for peaks
#' that are close enough to the calibrants. The availabel methods are:
#' * `shift`: shifts the m/z of each peak by a global factor which
#'   corresponds to the average difference between peak mz and calibrant mz.
#' * `linear`: fits a linear model throught the differences between
#'   calibrant and peak mz values and adjusts the mz values of all peaks
#'   using this.
#' * `edgeshift`: performs same adjustment as `linear` for peaks that are
#'   within the mz range of the calibrants and shift outside of it.
#'
#' For more information, details and examples refer to the
#' *xcms-direct-injection* vignette.
#'
#' @note `CalibrantMassParam` classes don't have exported getter or setter
#'     methods.
#'
#' @return For `CalibrantMassParam`: a `CalibrantMassParam` instance.
#'     For `calibrate`: an [XCMSnExp] object with chromatographic peaks being
#'     calibrated. **Be aware** that the actual raw mz values are not (yet)
#'     calibrated, but **only** the identified chromatographic peaks.
#'
#' @author Joachim Bargsten, Johannes Rainer
#'
#' @md
#'
#' @rdname calibrate-calibrant-mass
setClass("CalibrantMassParam",
         slots = c(
             mz = "list",
             mzabs = "numeric",
             mzppm = "numeric",
             neighbors = "integer",
             method = "character"
         ),
         contains = c("Param"),
         prototype = prototype(
             mz = list(),
             mzabs = 0.0001,
             mzppm = 5,
             neighbors = 3L,
             method = "linear"
         ),
         validity = function(object) {
             msg <- character()
             if (length(object@mz)) {
                 is_num <- vapply(object@mz, FUN = is.numeric,
                                  FUN.VALUE = logical(1), USE.NAMES = FALSE)
                 if (any(!is_num))
                     msg <- c(msg, paste0("'mz' has to be a list of numeric",
                                          " vectors"))
                 is_unsorted <- vapply(object@mz, FUN = is.unsorted,
                                       FUN.VALUE = logical(1),
                                       USE.NAMES = FALSE)
                 if (any(is_unsorted))
                     msg <- c(msg, paste0("the mz values in 'mz' have to be ",
                                          "increasingly ordered"))
             }
             if (length(object@mzppm) != 1 | any(object@mzppm < 0))
                 msg <- c(msg, paste0("'mzppm' has to be positive numeric",
                                      " of length 1."))
             if (length(object@mzabs) != 1 | any(object@mzabs < 0))
                 msg <- c(msg, paste0("'mzabs' has to be positive numeric",
                                      " of length 1."))
             if (length(object@neighbors) != 1 | any(object@neighbors <= 0))
                 msg <- c(msg, paste0("'neighbors' has to be positive integer",
                                      " of length 1."))
             if (length(object@method) != 1)
                 msg <- c(msg, paste0("'method' has to be of length 1."))
             if (!all(object@method %in% c("linear", "shift", "edgeshift")))
                 msg <- c(msg, paste0("'method' should be one of 'linear'",
                                      ", 'shift' or 'edgeshift'."))
             if (length(msg))
                 msg
             else
                 TRUE
         })
anupbharade09/xcms_test documentation built on May 14, 2019, 4:07 a.m.