R/extract_feature_intensity.R

Defines functions load_spectrum extract_feature_intensity

Documented in extract_feature_intensity load_spectrum

#' Extract intensities of specific features from a single spectrum
#'
#' For each of the given features local maxima will be identified in a region
#' of 2ppm around the theoretical mz value using the algorithm implemented in
#' the function \code{\link[FTICRMS]{locate.peaks}}. The intensity value at the
#' local maximum with maximal intenstity within 1ppm will be extracted.
#' If no peak could be detected, the intensity at the closest measured mz value
#' is used.
#'
#' @param spectrum.file Character. Name of mzML file to read.
#' @param scanrange Numeric vector of length 2. Scan range to read
#' (see \code{\link[xcms]{xcmsRaw}}).
#' @param info.features data.frame with information about features (e.g. as
#' generated by the function \code{\link{chem_formula_2_adducts}}).
#' @param ppm Numeric. Resolution of mass spectrometer.
#' @param num.pts Numeric. Minimum number of points needed to search for a peak
#' (see \code{\link[FTICRMS]{locate.peaks}}).
#' @param oneside.min Numeric. Minimum number of points needed on each side of
#' the local maximum (see \code{\link[FTICRMS]{locate.peaks}}).
#' @param verbose Logical. Print out additional information.
#' @param res.dir Character. Name of directory where result is saved as RDS
#' object (using the basename of the spectrum file). If NULL, a data.frame with
#' the results will be returned.
#'
#' @return
#' if res.dir = NULL, data frame with information for each feature:
#' \itemize{
#'   \item id: feature identifier (corresponds to chemical.form.isotope in
#'   info.features)
#'   \item mz.th: theoretical mz value
#'   \item intensity: extracted intensity
#'   \item mz.meas: mz value at center of peak (or closest measured mz value)
#'   \item peak.detected: 0 = no peak detected, 1 = peak detected, NA = no or
#'   not enough mz values available for feature
#'   }
#'
#' @import FTICRMS
#' @export
#'
#' @examples
#' data("info.features")
#' res.dir = tempdir()
#' mzml.file = system.file("extdata",
#'                         "20121015b__002__07__ME.mzML",
#'                         package = "preprocessHighResMS")
#'
#' extract_feature_intensity(spectrum.file = mzml.file,
#'                           scanrange = c(1, 2),
#'                           info.features = info.features,
#'                           ppm = 20,
#'                           res.dir = res.dir)

extract_feature_intensity <- function(spectrum.file,
                                      scanrange = c(1, 1),
                                      info.features,
                                      ppm = 10,
                                      num.pts = 5,
                                      oneside.min = 1,
                                      verbose = TRUE,
                                      res.dir = NULL) {

    ## extract isotope info
    info.iso = unique(info.features[, c("chemical.form.isotope", "m.z")])
    ## check that id is unique
    if (length(unique(info.iso$chemical.form.isotope)) != nrow(info.iso)) {
        stop("at least one feature with different mz values!")
    }
    if (verbose) {
        print(paste(nrow(info.iso), "unique features"))
    }

    ## define window around each mz value
    info.iso$m.z.2ppm.lower = info.iso$m.z -
        2 * ppm * info.iso$m.z / 1e06
    info.iso$m.z.2ppm.upper = info.iso$m.z +
        2 * ppm * info.iso$m.z / 1e06
    info.iso$m.z.lower = info.iso$m.z -
        ppm * info.iso$m.z / 1e06
    info.iso$m.z.upper = info.iso$m.z +
        ppm * info.iso$m.z / 1e06

    ## load spectrum
    spectrum = load_spectrum(spectrum.file = spectrum.file,
                             scanrange = scanrange)

    ## extract feature intensities
    range.mz = range(spectrum$mz)
    info.iso = info.iso[which(info.iso$m.z.2ppm.lower >= range.mz[1] &
                                  info.iso$m.z.2ppm.upper <= range.mz[2]),]

    if (verbose) {
        print(paste(nrow(info.iso), "isotopes covered by spectrum"))
    }

    intensity = center = width = peak.detected =
        rep(NA, length = nrow(info.iso))

    for (i in seq_len(nrow(info.iso))) {
        if (i %% 500 == 0)
            print(i)
        x = info.iso[i,]
        ind = which(spectrum$mz >= x$m.z.2ppm.lower &
                        spectrum$mz <= x$m.z.2ppm.upper)

        if (length(ind) > 0) {
            temp = spectrum[ind,]
            ## note: needed since locate.peaks() function in FTICRMS package
            ## does not check if at least num.pts mz values are available
            ## no peak information is stored if not enough rows are available
            if (nrow(temp) >= num.pts) {
                #                print("in first if")

                ## peak detection using local maxima
                res.p = locate.peaks(
                    peak.base = temp,
                    num.pts = num.pts,
                    oneside.min = oneside.min,
                    peak.method = "locmaxes",
                    thresh = -Inf
                )

                ## restrict to peaks within ppm
                res.p = res.p[which(res.p$Center_hat >= x$m.z.lower &
                                        res.p$Center_hat <= x$m.z.upper),]

                if (nrow(res.p) > 0) {
                    ## more than 1 peak detected
                    ## select maximal peak
                    if (nrow(res.p) > 1) {
                        ord = order(res.p$Max_hat,
                                    decreasing = TRUE)
                        ratio = res.p$Max_hat[ord[2]] / res.p$Max_hat[ord[1]]
                        if (ratio < 0.5) {
                            peak.detected[i] = 1
                        } else {
                            peak.detected[i] = 0
                        }
                        res.p = res.p[ord[1], , drop = FALSE]
                    } else {
                        peak.detected[i] = 1
                    }

                    intensity[i] = res.p$Max_hat
                    center[i] = res.p$Center_hat
                    width[i] = res.p$Width_hat
                }
            }
        }

        if (length(ind) < num.pts || nrow(res.p) == 0) {
            ## no peak found (e.g. constant intensity (0) or
            ## peak at border of interval
            ## select mz value closest to feature
            if (length(ind) > 0) {
                diff = abs(temp$mz - x$m.z)
                ind.min = which.min(diff)
                intensity[i] = temp$intensity[ind.min]
                center[i] = temp$mz[ind.min]
                peak.detected[i] = 0
            }

        }
    }

    res.features.all = data.frame(
        id = info.iso$chemical.form.isotope,
        mz.th = info.iso$m.z,
        intensity,
        mz.meas = center,
        peak.detected,
        stringsAsFactors = FALSE
    )
    rownames(res.features.all) = res.features.all$id
    #    res.features = na.omit(res.features.all)
    if (verbose)
        print(paste(sum(!is.na(
            res.features.all$intensity
        )),
        "features detected"))

    if (!is.null(res.dir)) {
        feature.file = file.path(res.dir,
                                 gsub(".mzML", ".rds",
                                      basename(spectrum.file)))
        saveRDS(res.features.all,
                file = feature.file)

    } else {
        return(res.features.all)
    }
}

#' Load spectrum
#'
#' Based on functions in Bioconductor package xcms.
#'
#' @param spectrum.file Character. Name of mzML file to read.
#' @param scanrange Numeric vector of length 2. Scan range to read
#' (see \code{\link[xcms]{xcmsRaw}}).
#'
#' @return
#' data frame with spectrum information:
#' \itemize{
#'   \item mz: mz value
#'   \item intensity: intensity
#' }
#'
#' @import xcms
#' @keywords internal

load_spectrum <- function(spectrum.file,
                          scanrange = c(1, 1)) {
    if (!file.exists(spectrum.file)) {
        stop(paste("file", spectrum.file, "does not exist!"))
    }
    xcms.raw.obj = xcmsRaw(filename = spectrum.file,
                           scanrange = scanrange,
                           profstep = 0)

    no.scans = scanrange(xcms.raw.obj)[2]
    if (no.scans == 1) {
        spectrum = getScan(xcms.raw.obj, scan = 1)
    } else {
        spectrum = getSpec(xcms.raw.obj)
    }
    return(data.frame(spectrum))

}
szymczak-lab/preprocessHighResMS documentation built on Oct. 6, 2020, 12:50 a.m.