getAtoms = function(formula) {
# parse chemical formula and return the atomic composition
output = vector()
numOfAtoms = length(strsplit(formula, split="([A-Z][a-z]*\\d*)")[[1]])
for ( i in 1:(numOfAtoms) ) {
elem = gsub(pattern=".*([A-Z][a-z]*).*", replacement="\\1", formula)
formula = strsplit(formula, split=elem)[[1]][1]
output = c(output,elem)
}
return(output)
}
convertResults = function(peakData,mol,polarity,stdInfo) {
lowE = 1
highE = 2
intVec = grep("0[1|2]$",names(peakData[[1]]))
peaks.out = data.frame() # final output structure
mainPeak = peakData[[1]]
mainPeakTag = names(peakData)[1]
peaks.out = rbind(peaks.out, cbind(
"ID" = stdInfo["ID"],
"name" = stdInfo["compound"],
"peak.tag" = mainPeakTag,
"formula" = stdInfo["formula"],
"M" = peakData[["MM"]],
"rt.win" = stdInfo["rtWin"],
"XCMS" = "$$$", # data separator
"row.idx" = as.numeric(rownames(mainPeak)),
"mz" = mainPeak$mz,
"rt.med" = round(mainPeak$rt/60,2),
"rt.min" = round(mainPeak$rtmin/60,2),
"rt.max" = round(mainPeak$rtmax/60,2),
"lowE" = floor(as.numeric(mainPeak[intVec[lowE]])),
"highE" = floor(as.numeric(mainPeak[intVec[highE]])),
"isotopes" = mainPeak$isotopes,
mainPeak["pcgroup"],
"peak.table" = stdInfo$stdFile)
)
isotopes = peakData$Isotopes
if ( !is.null(isotopes) ) {
for( isoNum in 1:length(isotopes) ) {
peakTag = names(isotopes)[isoNum]
data = data.frame( rep(as.data.frame(""), length.out=ncol(peaks.out)) )
names( data ) = names(peaks.out)
data$ID = stdInfo$ID
data$peak.tag = peakTag
data$XCMS = "$$$"
data$rt.med = round(isotopes[[isoNum]]$rt/60, digits=2)
data$rt.min = round(isotopes[[isoNum]]$rtmin/60,2)
data$rt.max = round(isotopes[[isoNum]]$rtmax/60,2)
data$mz = isotopes[[isoNum]]$mz
data$row.idx = as.numeric(rownames(peakData$Isotopes[[isoNum]]))
data$pcgroup = as.numeric(peakData$Isotopes[[isoNum]]$pcgroup)
data$peak.table = stdInfo$stdFile
data$lowE = floor(as.numeric(isotopes[[isoNum]][intVec[lowE]]))
data$highE = floor(as.numeric(isotopes[[isoNum]][intVec[highE]]))
data$isotopes = isotopes[[isoNum]]$isotopes
# bind this data to the output table
peaks.out = rbind(peaks.out, data)
}
}
adducts = peakData$Adducts
if( length(adducts) ) {
for( adductIdx in 1:length(adducts) )
{
adductName = names(peakData[["Adducts"]])[adductIdx]
# get initial data headers from the table of standards
data = data.frame( rep(as.data.frame(""), length.out=ncol(peaks.out)) )
names( data ) = names(peaks.out)
data$ID = stdInfo$ID
data$peak.tag = adductName # peak class tag
data$XCMS = "$$$"
data$mz = as.numeric(adducts[[adductIdx]]$mz)
data$rt.med = round(adducts[[adductIdx]]$rt/60, 2) # xcms RT
data$rt.min = round(adducts[[adductIdx]]$rtmin/60, 2) # xcms RT
data$rt.max = round(adducts[[adductIdx]]$rtmax/60, 2) # xcms RT
data$row.idx = as.numeric(rownames(adducts[[adductIdx]]))
data$pcgroup = as.numeric(adducts[[adductIdx]]$pcgroup)
data$peak.table = stdInfo$stdFile
data$lowE = floor(as.numeric(adducts[[adductIdx]][intVec[lowE]]))
data$highE = floor(as.numeric(adducts[[adductIdx]][intVec[highE]]))
data$isotopes = adducts[[adductIdx]]$isotopes
peaks.out = rbind(peaks.out, cbind(data))
}
}
ionCluster = peakData$pcgrp
if (length(ionCluster)) {
for( ionIdx in 1:nrow(ionCluster) )
{
# add ion cluster group data
data = data.frame( rep(as.data.frame(""), length.out=ncol(peaks.out)) )
names( data ) = names(peaks.out)
data$ID = stdInfo$ID
data$peak.tag = "pcgrp"
data$XCMS = "$$$"
data$rt.med = round(ionCluster$rt[ionIdx]/60,2)
data$rt.min = round(ionCluster$rtmin[ionIdx]/60,2)
data$rt.max = round(ionCluster$rtmax[ionIdx]/60,2)
data$mz = ionCluster$mz[ionIdx]
data$row.idx = as.numeric(rownames(ionCluster[ionIdx,]))
data$pcgroup = as.numeric(ionCluster$pcgroup[ionIdx])
data$peak.table = stdInfo$stdFile
data$lowE = floor(as.numeric(ionCluster[ionIdx,intVec[lowE]]))
data$highE = floor(as.numeric(ionCluster[ionIdx,intVec[highE]]))
data$isotopes = ionCluster[ionIdx,"isotopes"]
# bind this data to the output table
peaks.out = rbind(peaks.out, data)
}
}
rownames(peaks.out) = NULL
return(peaks.out)
}
findCompound = function(poolData,pl,polarity)
{
##############################################################
# An intermediate function to extract data of pooled samples #
# Pass data to a generic peak-picking function 'peak detect' #
##############################################################
##
## Estimate RT range by RT window number
##
if (is.na(poolData["RT"])) {
rtWin = as.integer(poolData["rtWin"])
rtBins = seq(1,39,by=2)
estimatedRtRange = c(rtBins[max(1,rtWin-2)], rtBins[min(20,rtWin+3)]) * 60
print ("Estimating RT range by RT bins")
refRT = (rtWin*2-1)*60
} else {
refRT = poolData["RT"]*60 # manually observed RT in minutes
estimatedRtRange = c(refRT-30,refRT+30)
print ("Estimating RT range by observed RT")
}
##
## Calculate exact mass using the 'Rdisop' package:
## check for the cases where the compound is a natural cation in solution
## in these cases for positive mode: mass = [M], and for negative mode: M = [M-2H]
##
chemFormula = poolData["formula"]
if (length(grep("\\+$",chemFormula))) {
# remove trailing plus sign
# poolData["chemFormula"] = substring(poolData["chemFormula"], 1,nchar(poolData["chemFormula"])-1)
# set mode
naturalCation = 1
} else {
naturalCation = 0
}
elem = c(Rdisop::initializeCHNOPS(),Rdisop::initializeElements("Cl"),Rdisop::initializeCharges())
mol = Rdisop::getMolecule(chemFormula, elements=elem)
Mneutral = mol$exactmass # get exact neutral mass
##
## NOTE: generalize these function
if (polarity=="negative") {
if (naturalCation) {
# get number of Hydrogens in original formula, then substract two (double deprotonation)
Hnum = as.numeric( sub( ".*H(\\d*).*","\\1",chemFormula) ) - 2
} else {
# get number of Hydrogens in original formula, then substract one (deprotonation)
Hnum = as.numeric( sub( ".*H(\\d*).*","\\1",chemFormula) ) - 1
}
} else if (polarity=="positive") {
if (naturalCation) {
# no need to protonate
Hnum = 0
} else {
# get number of Hydrogens in original formula, then add one (protonation)
Hnum = as.numeric( sub( ".*H(\\d*).*","\\1",chemFormula) ) + 1
print(Hnum)
}
}
print(paste("Chem Formula:",chemFormula))
refFormula = sub("H(\\d*)",paste("H",Hnum,sep=""),chemFormula) # adjust chemical formula
print(paste("Ref Formula:",refFormula))
output = peakDetect_2ch(
pl=pl,
Mneutral=Mneutral,
refRT=refRT,
lowBound=estimatedRtRange[1],
highBound=estimatedRtRange[2],
polarity=polarity,
chemFormula=chemFormula,
refFormula = refFormula,
naturalCation=naturalCation
)
output = c(output,"MM"=Mneutral)
return (output)
}
peakDetect_2ch = function(pl,
Mneutral,
refRT,
lowBound,
highBound,
polarity,
chemFormula,
refFormula,
naturalCation,
refErr=20,
spec=NULL)
{
#########################################################################
# Detect peak row in xcms peak table, based on following procedures: #
# 1) Mass accuracy range (M/z Da.) #
# 2) Estimated RT and RT boundaries (time set in seconds) #
# 3) Peak signal to noise ratio (XCMS calculated S/N) #
# 4) Camera peaks group - minimum is two members #
# 5) Main peak isotopes - minimum is one extra (M+1) #
# If none or multiple peaks are detected - return NULL #
# * Add a module which returns wild Mz matches across all chromatogram #
#########################################################################
print(paste(refRT,lowBound,highBound))
#browser()
Mp = NA
intCol = grep("01$",names(pl)) # get the low energy intensity column idx
##
## Set ion mode dependant parameters, including reference mass
##
if (polarity=="positive") {
if (naturalCation) {
Mtag="[M]+"
refMz = Mneutral
} else {
Mtag = "[M+H]+"
refMz = Mneutral + protonMass
}
ions = ruleset.pos
} else if (polarity=="negative") {
if (naturalCation) {
Mtag="[M-2H]-"
refMz = Mneutral - 2*protonMass
} else {
Mtag="[M-H]-"
refMz = Mneutral - protonMass
}
ions = ruleset.neg
}
massIons = Mneutral + ions$massdiff
names(massIons) = ions$name
##
## Mass2mass matching
##
massDelta = round(refMz*refErr/1e+06,digits=4) # convert PPM window to mass units (Da)
mzMatch = which( (refMz > pl[,"mz"]-massDelta) & (refMz < pl[,"mz"]+massDelta) ) # Mz match by mass accuracy
if (!length(mzMatch)) {
# look for another
massMatches = lapply(massIons, function(refMz) {
mzMatch = which((refMz > pl[,"mz"]-massDelta) & (refMz < pl[,"mz"]+massDelta) )
if (length(mzMatch)) {
mzrtMatch = which ((pl[mzMatch,"rt"] < highBound) & (pl[mzMatch,"rt"] > lowBound))
if (length(mzrtMatch)) {
intValid = pl[mzMatch[mzrtMatch],intCol] > pl[mzMatch[mzrtMatch],(intCol+1)]
mzrtMatch = mzrtMatch[intValid]
mainIon = mzMatch[mzrtMatch[which.max(pl[mzMatch[mzrtMatch],intCol])]]
if (length(mainIon)) {
pc = pl$pcgroup[mainIon]
pcgrp = pl[pl$pcgroup==pc,]
if (nrow(pcgrp)>1) {
if (pl[mainIon,intCol] < max(pcgrp[,intCol])) {
return (NULL)
} else {
return (mainIon)
}
} else {
return (NULL)
}
} else {
return (NULL)
}
}
}
})
massMatches = do.call ("rbind",massMatches)
if (is.null(massMatches)) { return (NULL) }
if (nrow(massMatches)>1) { massMatches = as.matrix(massMatches[which.max(pl[massMatches,intCol]),]) }
mainIon = pl[massMatches,]
Mtag = rownames(massMatches)
# get the ion rule
rule = ions[ions$name==Mtag,]
# adjust formula
refFormula = parseCameraAdducts (rule, chemFormula)
#browser()
} else {
## main parent MZ match:
mzrtMatch = which ((pl[mzMatch,"rt"] < highBound) & (pl[mzMatch,"rt"] > lowBound))
if (length(mzrtMatch)) {
intValid = pl[mzMatch[mzrtMatch],intCol] > pl[mzMatch[mzrtMatch],(intCol+1)]
mzrtMatch = mzrtMatch[intValid]
if (length(mzrtMatch)) {
mainIon = pl[mzMatch[mzrtMatch[which.max(pl[mzMatch[mzrtMatch],intCol])]],]
} else { return(NULL) }
} else {
return (NULL)
}
}
pc = mainIon$pcgroup
pcgrp = pl[pl$pcgroup==pc,]
maxPeak = max(pcgrp[,intCol])
print(mainIon)
isoNum = gsub("^\\[(\\d+).*$","\\1",mainIon$isotopes)
isotopes = grep(paste("\\[",isoNum,"\\]",sep=""), pl[,"isotopes"])
##
## Check Isotopes
##
if (length(isotopes)) {
iso = chkIsotopes(mainIon, pl[isotopes,], polarity, refFormula, intCol, Mneutral)
} else {
iso = NULL
}
##
## Get Camera adducts and grouping
##
# if pcgroup is small and there are no isotopes - return NULL
if ((nrow(pcgrp)==1) & (is.null(iso))) {
print("Warning: pcgroup too small. No Isotopes Found... Returning NULL")
#>>> might add here an accuracy + Mtag filter to allow single peak enteries - as we saw some FNs
# (but add only when ppm<=5 and Mtag=[parent ion])
return(NULL)
} else if ( nrow(pcgrp) > 1 ) {
if (!is.null(iso)) {
# remove found isotopes from peak groups
exclude = which(row.names(pcgrp) %in% as.numeric(lapply(iso,rownames)))
if (length(exclude)) {
pcgrp = pcgrp[-exclude,]
}
}
# try to detect some main ion adducts and attach the pcgrp peaks
adducts = attachAdducts(mainIon, pcgrp, Mneutral)
# redefine pcgrp, excluding annotated adducts
pcgrp = adducts[["pcgrp"]]
# define separatly the annotated adducts, otherwise as NULL
if ( length(which(names(adducts)!="pcgrp")) ) {
adducts = adducts[which(names(adducts)!="pcgrp")]
} else {
adducts = NULL
}
} else {
# there are isotopes, but no adducts
adducts = NULL
}
##
## Remove main ion peak from pcgrp
##
#print(pcgrp) length(pcgrp)
if (!is.null(pcgrp)) {
# remove the main ion from the pcgrp
if ( length(which(row.names(pcgrp) == mainIon)) ) {
pcgrp = pcgrp[-which(row.names(pcgrp) == mainIon),]
}
# apply relative intensity filter
intVec = pmax(pcgrp[,intCol],pcgrp[,(intCol+1)])
lowAdducts = which (intVec < 0.05*maxPeak & pcgrp$mz > Mneutral)
lowFrags = which (intVec < 0.01*maxPeak & pcgrp$mz < (Mneutral-8))
toRemove = c(lowFrags,lowAdducts)
if (length(toRemove)) {
if (length(toRemove)) { pcgrp = pcgrp[-toRemove,] }
}
if (!nrow(pcgrp)) { pcgrp=NULL }
}
output = list(mainIon,
iso,
adducts,
pcgrp
)
names (output) = c(Mtag,"Isotopes","Adducts","pcgrp")
return (output)
# no proper peak matching found
print("No xcms peak match or multiple matches or main ion not found - returning NULL!")
return (NULL)
}
chkIsotopes = function(mainIonPeak,peaks,polarity,chemFormula=NA,intCol=NA,M,refErr=30) {
# use the Rdisop package
# get the calculates theoretical isotopes
# find the corresponding peaks in xcms peak list
# decompose found peaks
# compare highest scoring (most probable) chemical furmula to the given formula.
# return a list of annotated isotope peaks.
if (is.na(chemFormula)) { return(NULL) }
print("==========================")
print("Cheking possible isotopes:")
print("==========================")
isotopes = list()
minIso = 2
maxIso = 4
rtBounds = 4
## use Rdisop to extract isotopes
# remove protonation sign, if exists
if (length(grep("\\+$",chemFormula))) {
chemFormula = substring(chemFormula, 1,nchar(chemFormula)-1)
}
# define the elements for isotope decomposition
t = getAtoms(as.character(chemFormula))
essentialElements = Rdisop::initializeElements (rev(t))
# print( sapply(essentialElements, function(x) { x$name }) )
# adjust molecular formula
if (polarity == "positive") { z = 1 } else if (polarity == "negative") { z = -1 }
print(paste("Using:",chemFormula))
# create Rdisop object
mol = Rdisop::getMolecule(chemFormula, elements=essentialElements, z=z)
# theoretical mass values calculated by formula
isotopicMasses = mol$isotopes[[1]][1,1:maxIso]
if (isotopicMasses[1] > 900) { decompPPM = 5 } else { decompPPM = 30 }
# set the first iso peak as main ion
isotopeMatch = data.frame (
"mz" = peaks$mz,
"int" = as.numeric(peaks[,intCol]),
"peakIdx" = as.numeric(row.names(peaks))
)
# prepare vectors of detected masses, intensities and corresponding peak indices
masses = isotopeMatch[,1]
intensities = isotopeMatch[,2]
peakIds = isotopeMatch[,3]
# decompose detected signals and check that chemical formula is possible
iso = Rdisop::decomposeIsotopes(masses, intensities, ppm=decompPPM, maxisotopes=4, elements=essentialElements)
if( length(grep(chemFormula,getFormula(iso))) ) {
for (i in 2:length(masses)) {
M_Annot = paste("M+",i-1,sep="")
isotopes[[M_Annot]] = peaks[i,]
}
print("**** Isotopes Check - OK ****")
return(isotopes)
}
return(NULL)
}
attachAdducts = function(mainIon, peaks, Mneutral, adduct=NA) {
print("==========================")
print("Checking possible adducts:")
print("==========================")
adducts = list()
# exclude peak of the main ion from the ion cluster
peaks = peaks[-which(row.names(peaks)==row.names(mainIon)),]
# find the manually detected adduct or principle ion, if exists
if (!is.na(adduct) ) {
mzMatch = which( round(peaks$mz,1) == round(detectedAdduct,1) )
if (length(mzMatch)) {
# add this adduct as manually detected feature
adducts[["DA"]] = peaks[mzMatch,]
# remove the adduct from the ion cluster
peaks = peaks[-mzMatch,]
}
}
refMass = floor(Mneutral)
mzMatch = grep(refMass, peaks$adduct)
# get the Camera adducts that match the ref mass
if (length(mzMatch) ) {
print("**** Added adducts ****")
adductNames = unlist( lapply( peaks$adduct[mzMatch], function(x) {
adductName = strsplit(as.character(x), split=" ")[[1]][1]
}) )
for (i in 1:length(mzMatch)) {
adducts[[adductNames[i]]] = peaks[mzMatch[i],]
}
peaks = peaks[-mzMatch,]
} else {
print("**** Not found ****")
}
if (nrow(peaks)>0) {
# annotated all remaining peaks as "pcgrp" members
adducts[["pcgrp"]] = peaks
}
return (adducts)
}
buildLibobject = function(DBpeaks, polarity, massTol=0.01) {
print ("Building library object from table of peaks")
Libobj = list(peaks=DBpeaks, polarity=polarity)
names(Libobj$peaks)[names(Libobj$peaks)=="rt.med"] = "rt.db"
names(Libobj$peaks)[names(Libobj$peaks)=="mz"] = "mz.db"
maxInt.vec = unlist( apply(Libobj$peaks[,c("lowE","highE")], 1, function(x) { max(x) } ) )
maxInt.vec[maxInt.vec==0] = NA
## get mass error tolerance by automatic calculation using mass and intensity - TO ADD
### Libobj$peaks$massTol = calcMassErr(Libobj$peaks$mz, maxInt.vec, massErrorSurface)
## set mass tolerance by the given threshold
Libobj$peaks$massTol = massTol
## Calculate mass error tolerance for the main ions
print ("Calculating mass error tolerances...")
annotatedPeaks = which( !is.na( as.numeric(Libobj$peaks$M) ) )
Libobj$peaks$massTol[annotatedPeaks] = apply( Libobj$peaks[annotatedPeaks,], 1, function(peak) {
M = as.numeric(peak["M"])
mz = as.numeric(peak["mz.db"])
# calc mass diff for the neutral specie
if (floor(M) == floor(mz)) { abs(M - mz) }
# calc mass diff for the M-H specie
else if (floor(M-protonMass) == floor(mz)) { abs(M-protonMass-mz) }
# calc mass diff for the M+H specie
else if (floor(M+protonMass) == floor(mz)) { abs(M+protonMass-mz) }
else { massTol }
})
## rank peaks high energy ramp channel (DB putative fragments)
print ("Ranking fragments...")
dbIds = unique(Libobj$peaks$ID)
Libobj$ranked = lapply(dbIds, function(id) {
lib = Libobj$peaks[Libobj$peaks$ID==id,]
frgs = lib[lib$mz < (as.numeric(lib$M[1])-4*protonMass),]
# remove any CAMERA annotated isotopes
whichIso = grep("M\\+",frgs$isotopes[1:nrow(lib)])
frgs = frgs[-whichIso,c("mz.db","lowE","highE")]
names(frgs) = c("mz","lowE","highE")
# add maxInt column (for int. threshold filtering and order peaks by the highE col.
if (nrow(frgs)) {
frgs$max = pmax(frgs[,2],frgs[,3])
frgs[order(frgs$highE, decreasing=T),][1:min(MAX_RANK,nrow(frgs)),]
}
})
names(Libobj$ranked) = dbIds
rownames(Libobj$peaks) = NULL
print ("Done: library object")
return ( Libobj )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.