Nothing
### $Id: baseline.peakDetection.R 170 2011-01-03 20:38:25Z bhm $
#' @title Simultaneous Peak Detection and Baseline Correction
#'
#' @description A translation from Kevin R. Coombes et al.'s MATLAB code for detecting peaks
#' and removing baselines
#'
#' @details Peak detection is done in several steps sorting out real peaks through
#' different criteria. Peaks are removed from spectra and minimums and medians
#' are used to smooth the remaining parts of the spectra. If \code{snminimum}
#' is omitted, y3, midspec, y and y2 are not returned (faster)
#'
#' @aliases baseline.peakDetection peakDetection
#' @param spectra Matrix with spectra in rows
#' @param left Smallest window size for peak widths
#' @param right Largest window size for peak widths
#' @param lwin Smallest window size for minimums and medians in peak removed
#' spectra
#' @param rwin Largest window size for minimums and medians in peak removed
#' spectra
#' @param snminimum Minimum signal to noise ratio for accepting peaks
#' @param mono Monotonically decreasing baseline if \code{mono}>0
#' @param multiplier Internal window size multiplier
#' @param left.right Sets eflt and right to value of \code{left.right}
#' @param lwin.rwin Sets lwin and rwin to value of \code{lwin.rwin}
#' @return \item{baseline }{Matrix of baselines corresponding to spectra
#' \code{spectra}} \item{corrected }{Matrix of baseline corrected spectra}
#' \item{peaks}{Final list of selected peaks} \item{sn}{List signal to noise
#' ratios for peaks} \item{y3}{List of peaks prior to singal to noise
#' selection} \item{midspec}{Mid-way baseline estimation} \item{y}{First
#' estimate of peaks} \item{y2}{Second estimate of peaks}
#' @author Kristian Hovde Liland and Bjørn-Helge Mevik
#' @references KEVIN R. COOMBES et al.: Quality control and peak finding for
#' proteomics data collected from nipple aspirate fluid by surface-enhanced
#' laser desorption and ionization.
#' @keywords baseline spectra
#' @examples
#'
#' data(milk)
#' bc.peakDetection <- baseline(milk$spectra[1,, drop=FALSE], method='peakDetection',
#' left=300, right=300, lwin=50, rwin=50)
#'
#' \donttest{
#' # To obtain the peak list, specify S/N threshold and call the baseline function directly:
#' bc.peakDetection2 <- baseline.peakDetection(milk$spectra[1,, drop=FALSE],
#' left=300, right=300, lwin=50, rwin=50, snminimum = 5)
#' }
#'
#' \dontrun{
#' plot(bc.peakDetection)
#' }
#' @export
baseline.peakDetection <- function(spectra, left, right, lwin, rwin, snminimum, mono=0, multiplier=5, left.right, lwin.rwin){
# peakDetection(spectra, L, R, LB, RB, sn, mono, multiplier)
#
# Given a spectrum, we want to remove the baseline and identify the
# location of peaks. The main return from this function is a list,
# PEAKS, of the peak locations in ticks. The second return value is
# the modified spectrum with the baseline removed.
#
# The second argument, snminimum, is the minimum acceptable value for
# the signal to noise ratio. The next pair of values are the left and
# right window sizes for peak resolution; see .mixedup for details. The
# next pair of values are the left and right window sizes for peak
# removal in preparation for baseline correction; see .localMinPet for
# details. The next value specifies whether the baseline should be made
# monotonically decreasing. The final value is a multiplier specifying
# the number of peak widths used to calculate the noise; this defaults to 5.
#
# Described as algorithm SPDBC in Clinical Chemistry 2003; 49:1615-1623.
#
# Original version by Kevin R. Coombes
# $Revision: 2.2 $
# Last modified by $Author: krc $ on $Date: 2003/03/31 19:01:58 $.
np <- dim(spectra)
Midspec <- corrected <- matrix(0,np[1],np[2])
Sn <- Peaks <- Y1 <- Y2 <- Y3 <- list()
if(!missing(left.right)){
left <- left.right
right <- left.right
}
if(!missing(lwin.rwin)){
lwin <- lwin.rwin
rwin <- lwin.rwin
}
for(a in 1:np[1]){
spectrum <- spectra[a,]
# step 1: Use our basic peak finder on the raw spectrum.
mixr <- .mixedup(spectrum, left, right)
y <- mixr$topper; yleft <- mixr$lotter; yright <- mixr$rotter
# step 2: Remove the peaks and fill in with straight lines
# across the base.
slip <- .peakRemoval(spectrum, y, yleft, yright, mono)
# step 3: Compute a local minimum in a window of width 256. (The
# argument to the function is 128, which extends on each side from
# the center.)
mini1 <- .localMinPet(slip, lwin, rwin, mono)
# step 4: Remove the local minimum to estimate the base line.
midspec <- spectrum - mini1
# step 5: Repeat steps one through four. The first pass typically
# leaves a slight slant to the baseline in the exponentially decaying
# region, and a second pass cleans up most of this.
mixr <- .mixedup(midspec, left, right)
y2 <- mixr$topper; yleft2 <- mixr$lotter; yright2 <- mixr$rotter
slip2 <- .peakRemoval(midspec, y2, yleft2, yright2, mono)
mini <- .localMinPet(slip2, lwin, rwin, mono)
newspec <- midspec - mini
corrected[a,] <- newspec
if(!missing(snminimum)){
# step 6: Make a final peak finding pass on the baseline-corrected
# spectrum.
mixr <- .mixedup(newspec, left, right)
y3 <- mixr$topper; yleft3 <- mixr$lotter; yright3 <- mixr$rotter
# step 7: In general, the peak finder is too inclusive, since it does
# not take noise into consideration. As described in that file, the
# general process found about 450 peaks in a spectrum of 18000 points.
# Visual inspection revealed that many of them were still in the noise
# region. So, we compute a signal to noise ratio using the MAD in a
# local window, and only keep peaks above a threshold. Using S/N > 3,
# we get down to about 200 peaks, most of which seem believeable.
width <- multiplier*pmax(yright3-y3, y3-yleft3)
noise <- 0*y3
middle <- 0*y3
for(i in 1:length(y3)){
lend <- max(1, (y3[i]-width[i]))
rend <- min(length(newspec), (y3[i]+width[i]))
slice <- c(newspec[lend:yleft3[i]], newspec[yright3[i]:rend])
middle[i] <- median(slice)
noise[i] <- median(abs(slice-middle[i]))
noise[i] <- median(abs(diff(newspec[lend:rend])))
}
signal <- newspec[y3]-middle
sn <- pmax(0, signal/noise)
peaks <- y3[sn > snminimum]
Peaks[[a]] <- peaks
Sn[[a]] <- sn
Y1[[a]] <- y
Y2[[a]] <- y2
Y3[[a]] <- y3
Midspec[a,] <- midspec
}
}
# return
if(missing(snminimum)){
list(baseline=spectra-corrected, corrected=corrected)
} else {
list(baseline=spectra-corrected, corrected=corrected, peaks=Peaks, sn=Sn, y3=Y3, midspec=Midspec, y=Y1, y2=Y2)
}
}
#########################################################################################################
.peakRemoval <- function(w, y, yleft, yright, mono){
# .peakRemoval(spectrum, y, left, right, mono)
#
# Inputs are a spectrum, and a list of peaks (y) with their
# left (yleft) and right (yright) boundaries. There is another
# optional argument (mono). if(mono is greater than zero, then
# we believe the ultimate baseline should be monotonically
# decreasing. The default value of mono = 0, which does not
# enfore monotonicity. The output is a new spectrum of the
# same length with the peaks removed and replaced by straight
# lines across the base.
#
# Part of the SPF, SPDBC package described in Clinical Chemistry 2003; 49:1615-1623.
#
# Original version by Kevin Coombes
# $Revision: 2.2 $
# Last modified by $Author: krc $ on $Date: 2003/03/31 19:01:58 $.
if(missing(mono))
mono <- 0
# The problem is what happens if(you have several real peaks
# close together. if(one peak doesn't come all the way down to
# baseline, you get big jumps in the "peak-removed" spectrum.
# We can start chopping those out, but have to adjust the
# left and right boundaries.
magic <- 3 # magic number of multiples of the median slope
slopes <- (w[yright]-w[yleft])/(yright-yleft)
m <- magic*median(abs(slopes))
counters <- 1:length(slopes)
if(mono > 0){
startup <- min(counters[abs(slopes)<m]) # initial slide
} else {
temp <- 1:length(w)
screwy <- max(temp[w==max(w)])
startup <- min(counters[y > screwy]) # end of saturation
}
m <- magic*median(abs(slopes[counters>startup])) # ignore beginning
selector <- !(counters>startup & abs(slopes) > m)
# only apply this to the peaks with slopes at most 3 times the median
# however, we can have large slopes at the beginning of the spectrum
# in the matrix noise region.
# now we move the left and right boundaries accordingly
z <- y
zr <- yright
zl <- yleft
for(i in 2:(length(y)-1)){
if(selector[i] == 0){ # forget this peak
if(w[yright[i-1]] > w[yright[i]])
zr[i-1] <- zr[i]
if(w[yleft[i+1]] > w[yleft[i]])
zl[i+1] <- zl[i]
}
}
# use the selector
z <- z[selector]
zl <- zl[selector]
zr <- zr[selector]
# finally, remove the peaks
newspec <- w
for(i in 1:length(z)){
yr <- zr[i]
yl <- zl[i]
slope <- (w[yr] - w[yl])/(yr-yl)
newspec[yl:yr] <- slope*((yl:yr) - yl) + w[yl]
}
# still not done... have to make sure we don't get negative values.
# return
pmin(newspec, w)
}
#########################################################################################################
.mixedup <- function(w, left, right){
# .mixedup(spectrum, smallWindow, largeWindow)
#
# Input consists of a spectrum and two window sizes. The smallWindow is
# used to collapse potential peaks together at the left end of the
# spectrum; ther default size is 5 ticks. The largeWindow is used to
# collapse peaks together at the right end of the spectrum; the default
# size is 30. The window sizes are smoothly interpolated along a
# quadratic curve, which makes them a fixed percentage of the mass on
# that scale.
#
# The return values are:
# PEAKS: a list of peak locations, in ticks
# LEFTMIN: a list of the closest minimum to the left of each peak
# RIGHTMIN: a list of the closest minimum to the right of each peak
#
# Described as algorithm SPF in Clinical Chemistry 2003; 49:1615-1623.
#
# Original version by Kevin R. Coombes
# set up default values for optional arguments
n <- length(w)-1
if (missing(right)) right <- 30
if (missing(left)) left <- 5
# perform a coherency check in case someone gave us nonsensical values
if(left <= 0)
left <- 5
if(right < left){
temp <- right
right <- left
left <- temp
}
## Step 1: Find all local maxima and associated local minima.
x <- diff(w) # first difference
xl <- x[1:(n-1)] # left side
xr <- x[2:n] # right side
neartop <- (xl > 0 & xr <= 0) | (xl == 0 & xr < 0)
nearbot <- (xl < 0 & xr >= 0) | (xl == 0 & xr > 0)
xx <- 1:length(x)
nb <- c(1,(xx[nearbot]+1)) # tentative minimum
nt <- xx[neartop]+1 # tentative maximum
## Step 2: Coalesce rising plateaus to find the right-most one.
curt <- 1
curb <- 1
tops <- 0
bots <- 1
numtop <- 1
numbot <- 1
while(curt < length(nt) && curb < length(nb)){
while(nb[curb]<nt[curt] && curb < length(nb)){
curb <- curb+1
}
bots[numbot] <- nb[curb]
numbot <- numbot+1
while(nt[curt] < nb[curb] && curt < length(nt)){
curt <- curt + 1
}
tops[numtop] <- nt[curt];
numtop <- numtop + 1
}
# At this point, "tops" and "bots" contain local maxima and minima
# including those that go up for one tick and then immediately go
# back down. The arrays should be the same size, with each minimum
# occuring strictly to the left of each maximum. In a test example,
# there were about 3500 of these local maxima in a spectrum that
# contained 18000 points.
## Step 3: Cull some obviously inadequate candidates. Anything where the
# distance from minimum to maximum is less than the median single step
# size gets removed. In our example, this step got us down to about
# 1500 potential peaks.
fuzz <- median(abs(x)) # typical first difference; a noise estimate
flux <- w[tops] - w[bots] # size of the step up from bottom to top
tops <- tops[flux > 2*fuzz]
## Step 4: Further culling. We combine maxima that are obviously too
# close together to be resolved. The resolution is determined by
# the arguments specifying window sizes. In our test example, this
# step got us down from about 1500 peaks to about 500.
dt <- diff(tops) # distance in ticks between successive tops
a <- (right-left)/length(x)^2 # define "close" using y = a*x^2 + b
resolve <- left + (a*tops)*tops # using parameters left and right.
temtops <- tops
topper <- 1 # list of resolvable maxima.
i <- 1
while(i < length(temtops)-1){
if(dt[i] < resolve[i]){
if(w[temtops[i]] < w[temtops[i+1]])
temtops[i] <- temtops[i+1]
else
temtops[i+1] <- temtops[i]
dt[i+1] <- temtops[i+2]-temtops[i]
}
else
topper[length(topper)+1] <- temtops[i]
i <- i + 1
}
# After combining maxima, we then figure out where the closest minima
# are to both left and right. These might not agree for adjacent peaks
# because of the presence of plateaus.
lotter <- rep(1,length(topper)-1) # list of resolvable minima to left of peak.
rotter <- rep(1,length(topper)-1) # similar list to right of peak.
for(i in 2:length(topper)){
temp <- topper[i-1]:topper[i]
lotter[i] <- max(temp[w[temp]==min(w[temp])])
rotter[i] <- min(temp[w[temp]==min(w[temp])])
}
rotter <- c(rotter[2:length(rotter)], length(x))
# Step 5: The primary goal of this step is to strip out candidate
# peaks that lie on the slope up to a true peak. We insist that every
# peak include a minimum number of "up ticks" and "down ticks" from
# the maximum to the neighboring minima.
uptick <- topper-lotter
upper <- median(uptick)-median(abs(uptick-median(uptick)))
dntick <- rotter-topper
downer <- median(dntick)-median(abs(dntick-median(dntick)))
magic <- min(upper, downer) + 1
keeper <- !(uptick < magic | dntick < magic)
topper <- topper[keeper]
# rotter <- rotter[keeper]
lotter <- lotter[keeper]
## Step 6: Refine the peak location. We may have culled the wrong
# peak when two peaks were close together. Thus, we again locate the
# local maximum between two successive minima.
bottle <- c(lotter, length(x));
for(i in 1:length(topper)){
temp <- bottle[i]:bottle[i+1];
topper[i] <- max(temp[w[temp]==max(w[temp])])
}
# Of course, if the peaks move, we may need to adjust the bases as well.
lotter <- rep(1,length(topper)-1) # list of resolvable minima to left of peak.
rotter <- rep(1,length(topper)-1) # similar list to right of peak.
for(i in 2:length(topper)){
temp <- topper[i-1]:topper[i]
lotter[i] <- max(temp[w[temp]==min(w[temp])])
rotter[i] <- min(temp[w[temp]==min(w[temp])])
}
maxright <- min(topper[length(topper)]+100, length(w))
rotter <- c(rotter[2:length(rotter)], maxright)
# return
list(topper=topper, lotter=lotter, rotter=rotter)
}
#########################################################################################################
.localMinPet <- function(spectrum, window, rightWindow, mono){
# .localMinPet(spectrum, leftWindow, rightWindow, mono)
#
# Arguments are a spectrum, optional window sizes that scale
# quadratically from the left window to the right window, and
# an optional flag to tell us if the baseline should be
# monotonically decreasing. Default window sizes both equal 100.
# Default value of mono = 0, meaning
#
# Part of the SPF, SPDBC package described in Clinical Chemistry 2003; 49:1615-1623.
#
# Original version by Kevin Coombes
if(missing(window))
window <- 100
if(missing(rightWindow))
rightWindow <- window
if(missing(mono))
mono <- 0
len <- length(spectrum)
tops <- 1:len
a <- (rightWindow-window)/len^2 # define "close" using y = a*x^2 + b
resolve <- floor(window + (a*tops)*tops) # using window parameters.
# compute the local minima in the windows. Edge effects to avoid running
# off the ends of the array.
mini <- numeric(len)
lend <- pmax(1,(tops-resolve))
rend <- pmin(len, (tops+resolve))
for(i in 1:len){
s <- spectrum[lend[i]:rend[i]]
mini[i] <- min(s)
}
# monotonize
if(mono > 0){
mono <- numeric(len)
mono[1] <- mini[1]
for(i in 2:len){
mono[i] <- mono[i-1]
if(mini[i] < mono[i])
mono[i] <- mini[i]
}
mini <- mono
}
# return
mini
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.