Nothing
#'@title PlausibleFormula.
#'
#'@description
#'\code{PlausibleFormula} will count the chemical elements within a provided formula and apply a rule set to evaluate if this formula is plausible.
#'It is currently exported to allow the user to modify/substitute the result but may become an internal function in the future.
#'
#'@details
#'The rules are developed based on comparison with ~1000 compounds found in the Golm Metabolome Database (GMD) for silylated metabolites detectable in GC-MS.
#'The following elements will be counted and tested: c("C","H","N","O","P","S","Si").
#'
#'@param x A chemical formula. No validity checks are performed.
#'@param ruleset Currently either "GCMS" or "LCMS" as a keyword. In future potentially a link to a text file containing the rule set.
#'
#'@return
#'Either TRUE or FALSE. Use sapply(vector, PlausibleFormula) if necessary.
#'
#'@examples
#'PlausibleFormula(x="C6H12O6")
#'PlausibleFormula(x="C5H13O14P3")
#'
#'@export
#'
#'@keywords internal
PlausibleFormula <- function(x, ruleset=c("APCI","ESI","none")) {
ruleset <- match.arg(ruleset)
if (ruleset=="APCI") {
if (length(x)==1 && is.character(x)) {
out_name <- x
# check if a certain SumFormula is likely to appear in GCMS-APCI data
x <- CountChemicalElements(x, ele=c("C","H","N","O","P","S","Si"))
out <- TRUE
# test maximal number of elemental occurences (empirical cutoff evaluation based on GMD entries)
ulim <- list("C"=60, "H"=100, "N"=10, "O"=27, "P"=4, "S"=3, "Si"=12)
out <- out & all(sapply(names(ulim), function(y) { x[y]<=ulim[[y]] }), na.rm=T)
# test minimal number of elemental occurences (empirical cutoff evaluation based on GMD entries)
llim <- list("C"=1, "H"=4)
out <- out & all(sapply(names(llim), function(y) { x[y]>=llim[[y]] }), na.rm=T)
# test individual element ratios
out <- out & x["H"]<=(1+4*x["C"])
out <- out & x["N"]<=x["C"]
out <- out & x["O"]<=x["C"]
out <- out & 6*x["P"]<=x["C"]
out <- out & 4*x["S"]<=x["C"]
out <- out & sum(x[c("N","O","P","S")])<=x["C"]
# test Si to O+N+S ratio (assumption: O, N and S Groups will most likely be Silylated, which may not be always True)
# allow aditionally non Silylated Compounds as some of those exist in GMD
#out <- out & ifelse(x["Si"]==0, TRUE, !(6*x["Si"] < x["O"]+x["N"]+x["S"]+x["P"]))
out <- out & ifelse(x["Si"]==0, TRUE, !(8*x["Si"] < x["O"]+x["N"]+x["S"]+x["P"]))
out <- out & !(x["Si"] > 2*(x["O"]+x["N"]+x["S"]+x["P"]))
#out <- out & !(x["Si"] >= x["O"]+x["N"]+x["S"])
# test Si to C ratio (assumption: Si is still present as SiC3H8 Group, which may not be always True)
# lowest value allowed for C is 3*Si-2 to account for e.g. CH4 Loss of Phosphate-3TMS or TMS-Dimer (mz=147, F=C5H15OSi2)
out <- out & (3*x["Si"]-2 <= x["C"])
# test Si to H ratio (assumption: Si is still present as SiC3H8 Group, which may not be always True)
out <- out & (8*x["Si"]-3 <= x["H"])
# S and P are rare to occur together (though they sometimes may)
out <- out & x["S"]*x["P"]==0
# if S=2 than Si>2 (...)
out <- out & ifelse(x["S"]==2, x["S"]<x["Si"], TRUE)
# if P>1 than Si>2P (...)
out <- out & ifelse(x["P"]>0, 2*x["P"]<=x["Si"], TRUE)
# if P>1 than O>=4 (...)
out <- out & ifelse(x["P"]>0, x["O"]>=4, TRUE)
names(out) <- out_name
} else {
out <- NA
names(out) <- paste(x, collapse="_", sep="_")
}
}
if (ruleset=="ESI") {
if (length(x)==1 && is.character(x)) {
out_name <- x
# check if a certain SumFormula is likely to appear in LCMS-ESI data
# rules for the following counted elements will be checked
x <- CountChemicalElements(x, ele=c("C","H","N","O","P","S","Cl","K","Na"))
# the molecular mass is therefore
m <- sum(x*c(12.0, 1.007825, 14.003074, 15.994915, 30.973762, 31.972071, 34.96885, 38.96371, 22.98977))
out <- TRUE
# test maximal number of elemental occurences (empirical cutoff evaluation based on GMD entries)
ulim <- list("C"=2+0.07*m, "H"=6+0.14*m, "N"=5+0.01*m, "O"=3+0.03*m, "P"=min(c(8, 1+0.01*m)), "S"=4)
out <- out & all(sapply(names(ulim), function(y) { x[y]<=ulim[[y]] }), na.rm=T)
llim <- list("C"=-15+0.04*m, "H"=-10+0.03*m, "N"=0, "O"=-25+0.03*m, "P"=0, "S"=0)
#out <- out & all(sapply(names(llim), function(y) { x[y]>=llim[[y]] }), na.rm=T)
# test individual element ratios
out <- out & x["N"]<=(3+x["C"])
out <- out & (x["O"]-4*x["P"])<=(2+x["C"])
out <- out & x["P"]<=(1+x["C"])
out <- out & x["S"]<=(1+x["C"])
out <- out & (x["N"]+x["P"]+x["O"]-4*x["P"]+x["S"])<=(4+x["C"])
# S and P are rare to occur together (though they sometimes may)
out <- out & x["S"]*x["P"]==0
# Na and K (forming adducts) should not occur together
out <- out & x["Na"]*x["K"]==0
# if P>1 than O>=2 (...)
out <- out & ifelse(x["P"]>0, x["O"]>=2, TRUE)
names(out) <- out_name
} else {
out <- NA
names(out) <- paste(x, collapse="_", sep="_")
}
}
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.