################################################
## S+Proteome Peak alignment functions
##
## msAlign
## msAlignCluster
## msAlignGap
## msAlignMRD
## msAlignVote
##
################################################
###
# msAlign
###
"msAlign" <- function(x, FUN="cluster", mz.precision=0.003,
snr.thresh=10, ...)
{
# extract peak.list
peak.list <- x$peak.list
if (is.null(peak.list)) {
if (is.null(x$peak.class)) {
stop("The input ", deparse(substitute(x)), " has no peak.list attribute,",
"which can be generated by running msPeak() with use.mean=FALSE.")
}
else { # if peak.class has been obtained from the mean spectrum
warning("The input ", deparse(substitute(x)), " has peak.class attribute,",
"no peak alignment is needed any more. Returning the original object.")
return(x)
}
}
# check the number of spectra with peak detected
n.peak.list <- length(peak.list)
nopeak <- (sapply(peak.list, NROW) == 0)
n.nopeak <- sum(nopeak)
if (n.nopeak==n.peak.list) {
stop("no peak to align")
} else if (n.nopeak != 0) {
peak.list <- peak.list[!nopeak]
}
# extract locations of peaks with large snr from the peak.list
peak.all <- unlist(lapply(peak.list,
function(x, snr.thresh) x[x$snr>=snr.thresh, "tick.loc"],
snr.thresh=snr.thresh))
if (length(peak.all)==0) stop("no strong peak to align, reduce snr.thresh")
peak.count <- table(peak.all)
peak.unique <- as.integer(names(peak.count))
# compute mz precision threshold in log scale
# mass(1+mz.precision) -> log(mass) + log(1+mz.precision)
mz <- x$mz
logmz <- log(mz)
logmz.precision <- log(1 + mz.precision)
# group peaks based on their locations
peak.group <- switch(match.arg(FUN, choices=c("cluster", "gap", "vote", "mrd")),
cluster = msAlignCluster(logmz, logmz.precision, peak.unique),
gap = msAlignGap(logmz, logmz.precision, peak.unique),
vote = msAlignVote(logmz, logmz.precision, peak.unique, peak.count),
mrd = msAlignMRD(peak.unique, peak.count,
level=ifelse1(is.null(x$mrd), NULL, max(x$mrd$levels)), ...))
# compute some summary statistics for each peak class
tick.loc = tapply(peak.unique, peak.group, median)
if (is.R()) {
tick.range = cbind(tapply(peak.unique, peak.group, min), tapply(peak.unique, peak.group, max))
} else {
tick.range = t(data.frame(tapply(peak.unique, peak.group, range)))
}
peak.class <- cbind(
tick.loc = tick.loc,
tick.left = tick.range[,1],
tick.right = tick.range[,2],
tick.span = tick.range[,2] - tick.range[,1] + 1,
mass.loc = mz[tick.loc],
mass.left = mz[tick.range[,1]],
mass.right = mz[tick.range[,2]],
mass.span = mz[tick.range[,2]] - mz[tick.range[,1]])
ord <- order(peak.class[, "tick.loc"])
peak.class <- peak.class[ord, ]
dimnames(peak.class)[[1]] <- seq(nrow(peak.class))
# add peak.class to the msSet object
x <- msSet(x, peak.class=peak.class)
# append history event
eventHistory(x, "Peak Alignment"=
list(process=FUN, mz.precision=mz.precision, snr.thresh=snr.thresh))
}
###
# msAlignCluster
###
# align peaks using 1-D clustering
"msAlignCluster" <- function(logmz, logmz.precision, peak.unique)
{
if (is.R()) {
clust.tree <- hclust(dist(logmz[peak.unique], method="euclidean"),
method="complete")
} else {
clust.tree <- hclust(dist(logmz[peak.unique], method="euclidean"),
method="compact")
}
# peak.group
cutree(clust.tree, h=2*logmz.precision) # +/- logmz.precision
}
###
# msAlignGap
###
# align peaks using between-peak distance
"msAlignGap" <- function(logmz, logmz.precision, peak.unique)
{
diff.logmz <- diff(logmz[peak.unique])
1 + c(0, cumsum(diff.logmz > logmz.precision))
}
###
# msAlignMRD
###
"msAlignMRD" <- function(peak.unique, peak.count, level=NULL,
wavelet="s8", reflect=TRUE, ...)
{
# check input arguments
max.level <- floor(logb(diff(range(peak.unique)),2))
if (is.null(level))
level <- 3
checkRange(level, c(3,max.level))
checkScalarType(wavelet,"character")
checkVectorType(peak.unique,"integer")
checkVectorType(peak.count,"integer")
# form histogram of peak locations
tick.max <- max(peak.unique)
peak.hist <- rep(0, tick.max)
peak.hist[peak.unique] <- peak.count
# smooth the histogram ala an MRD
peak.hist.smoothed <- wavMRDSum(peak.hist, wavelet=wavelet, levels=level-1,
keep.smooth=TRUE, keep.details=FALSE, xform="modwt", reflect=reflect)
# calculate first derivative approximation of smoothed histogram
crystal <- paste("d", level-2, sep="")
dpeak.hist.smoothed <- wavShift(wavMODWT(peak.hist.smoothed,
wavelet="haar", n.levels=level-2))[[crystal]]
# find local minima
inadirs <- zeroCross(dpeak.hist.smoothed, "positive")
# form binary bin boundary vector: 1 indicates the
# index location of the peak histogram bin boundary
bin.marks <- rep(0, tick.max)
bin.marks[inadirs] <- 1
# return peak grouping
cumsum(bin.marks)[peak.unique]
}
###
# msAlignVote
###
# align peaks using voting
"msAlignVote" <- function(logmz, logmz.precision, peak.unique, peak.count)
{
n.peak <- length(peak.unique)
logmzp = logmz[peak.unique]
# find the left and right bounds of each peak in peak.unique
left <- right <- rep(NA, n.peak)
for (i in 1:n.peak) {
logmz.current <- logmzp[i]
index <- 1:i
left[i] <- index[logmz.current-logmzp[index]<=2*logmz.precision][1]
index <- i:n.peak
right[i] <- rev(index[logmzp[index]-logmz.current<=2*logmz.precision])[1]
}
# # NOTE: could get rid of the above for loop using outer() but
# # 1. it could get too big if there are more than five thousand unique peaks
# # 2. the computation is not necessarily faster
# diff.matrix <- outer(logmzp, logmzp, "-")
# left <- apply(diff.matrix<=2*logmz.precision, 1,
# function(x) which(x)[1])
# right <- apply(diff.matrix>=-2*logmz.precision, 1,
# function(x) rev(which(x))[1])
# identify peak groups iteratively by voting
peak.group <- rep(NA, n.peak)
peak.remain <- 1:n.peak
count.remain <- sum(peak.count)
while (count.remain>0){
my.count <- apply(matrix(peak.remain), 1,
function(i, peak.count, left, right) sum(peak.count[left[i]:right[i]]),
peak.count=peak.count, left=left, right=right)
my.count.max <- max(my.count)
index <- peak.remain[my.count==my.count.max][1] # one at a time
peak.group[left[index]:right[index]] <- index
peak.count[left[index]:right[index]] <- 0
count.remain <- count.remain - my.count.max
peak.remain <- setdiff(peak.remain, left[index]:right[index])
}
peak.group
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.