R/dynamicNoiseFilter.R

#' Dynamic Noise filtration
#' @param spectrum.df a dataframe or matrix with two columns:
#' 1. Mass/ Mass-to-charge ratio
#' 2. Intensity
#' @param DNF dynamic noise filter minimum signal to noise threshold
#' (default = 2), calculated as the ratio between the linear model predicted
#' intensity value and the actual intensity.
#' @param minPeaks minimum number of signal peaks following dynamic
#' noise filtration (default = 5).
#' @param minInt integer minimum dynamic noise filter
#' @return a list containing 3 objects:
#' 1. Above.noise The dynamic noise filtered matrix/ dataframe
#' 2. metaData a dataframe with the following column names:
#' 1. Noise.level the noise level determined by the dynamic noise filter
#' function.
#' 2. IntCompSpec Total intensity composite spectrum.
#' 3. TotalIntSNR Sparse ion signal to noise ratio
#' (mean intensity/ stdev intensity)
#' 4. nPeaks number of peaks in composite spectrum
#' 3. aboveMinPeaks Logical are the number of signals above the minimum level
#' @details  Dynamic noise filter adapted from the method described in Xu H.
#' and Frietas M. 'A Dynamic Noise Level Algorithm for Spectral Screening of
#' Peptide MS/MS Spectra' 2010 BMC Bioinformatics.
#' The function iteratively calculates linear models starting from
#' the median value of the lower half of all intensities in the spectrum.df.
#' The linear model is used to predict the next peak intensity and ratio is
#' calculated between the predicted and actual intensity value.
#' Assuming that all preceeding intensities included in the linear model
#' are noise, the signal to noise ratio between the predicted and actual
#' values
#' should exceed the minimum signal to noise ratio (default DNF = 2).
#' The function continues until either the DNF value minimum has been exceeded
#' and is also below the maxPeaks or maximum number of peaks value. As the
#' function must necessarily calculate potentially hundreds of linear models
#' the RcppEigen package is used to increase the speed of computation.
#' @usage dynamicNoiseFilter(spectrum.df = NULL, DNF = 2, minPeaks = 5,
#' minInt = 100)
dynamicNoiseFilter <-
    function(spectrum.df = NULL,
            DNF = 2,
            minPeaks = 5,
            minInt = 100) {
        # error handling
        if (is.null(spectrum.df)) {
            stop("No spectrum matrix/dataframe supplied")
        }
        if (nrow(spectrum.df) > minPeaks) {
            # rank matrix/ dataframe by intensity
            intOrder <- order(spectrum.df[, 2])
            spectrum.df <- spectrum.df[intOrder, , drop = FALSE]
            minIntIndx <- which(spectrum.df[, 2] >= minInt)[1]
            peakIndx <- seq(1, nrow(spectrum.df), 1)
            minIntIndx <-
                ifelse(is.na(minIntIndx), nrow(spectrum.df), minIntIndx)
            minIntIndx <- ifelse(minIntIndx == 1, 3, minIntIndx)
            if (minIntIndx < (nrow(spectrum.df) - 1)) {
                for (k in minIntIndx:(nrow(spectrum.df) - 1)) {
                    # calc linear model rcppeigen
                    fit <-
                        coef(RcppEigen::fastLm(as.numeric(spectrum.df[
                            seq_len(k),2]) ~ peakIndx[seq_len(k)]))
                    # predicted intensity model from intercept
                    PredInt <- fit[1] + (fit[2]) * (k + 1)
                    # calc Signal to noise ratio predicted vs. actual
                    SNR <- spectrum.df[k + 1, 2] / PredInt
                    # if SNR reached and below max number of peaks break loop
                    if (SNR >= DNF) {
                        Noise.level <- as.numeric(spectrum.df[k + 1, 2])
                        break
                    } else {
                        Noise.level <- as.numeric(spectrum.df[k + 1, 2])
                    }
                }
            } else {
                Noise.level <- spectrum.df[minIntIndx, 2]
            }
            # filter by DNF noise filter level
            Noise.indx <- which(spectrum.df[, 2] >= Noise.level)
            spectrum.df <- spectrum.df[Noise.indx, , drop = FALSE]
            # sort by m/z
            spectrum.df <-
                spectrum.df[order(spectrum.df[, 1]), , drop = FALSE]
            # number of peaks higher than minimum
            aboveMinPeaks <- nrow(spectrum.df) >= minPeaks
            # Total intensity composite spectrum
            IntCompSpec <- sum(spectrum.df[, 2])
            # Sparse ion signal to noise ratio (mean intensity/
            #  stdev intensity)
            if (nrow(spectrum.df) <= 1) {
                TotalIntSNR <- 0
            } else {
                TotalIntSNR <- mean(spectrum.df[, 2], sd(spectrum.df[, 2]))
            }
            DNF.tmp <- list(
                Above.noise = spectrum.df,
                metaData =
                    data.frame(
                        Noise.level = Noise.level,
                        IntCompSpec = IntCompSpec,
                        TotalIntSNR = TotalIntSNR,
                        nPeaks = nrow(spectrum.df),
                        stringsAsFactors = FALSE),
                        aboveMinPeaks = aboveMinPeaks
            )
        } else {
            DNF.tmp <- list(
                Above.noise = spectrum.df,
                metaData =
                    data.frame(
                        Noise.level = 0,
                        IntCompSpec = 0,
                        TotalIntSNR = 0,
                        nPeaks = nrow(spectrum.df),
                        stringsAsFactors = FALSE),
                aboveMinPeaks = FALSE
            )
        }
        return(DNF.tmp)
    }
JosieLHayes/adductomicsR documentation built on May 5, 2019, 9:43 p.m.